Repository: tskit-dev/tskit
Branch: main
Commit: 40698f504b6e
Files: 220
Total size: 7.3 MB
Directory structure:
gitextract_7z1tql5y/
├── .clang-format
├── .github/
│ ├── PULL_REQUEST_TEMPLATE.md
│ └── workflows/
│ ├── docs.yml
│ ├── lint.yml
│ ├── release-c.yml
│ ├── tests.yml
│ └── wheels.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── c/
│ ├── .gitignore
│ ├── CHANGELOG.rst
│ ├── VERSION.txt
│ ├── examples/
│ │ ├── Makefile
│ │ ├── api_structure.c
│ │ ├── cpp_sorting_example.cpp
│ │ ├── error_handling.c
│ │ ├── haploid_wright_fisher.c
│ │ ├── json_struct_metadata.c
│ │ ├── multichrom_wright_fisher.c
│ │ ├── multichrom_wright_fisher_singlethreaded.c
│ │ ├── streaming.c
│ │ ├── take_ownership.c
│ │ ├── tree_iteration.c
│ │ └── tree_traversal.c
│ ├── meson.build
│ ├── meson_options.txt
│ ├── subprojects/
│ │ └── kastore/
│ │ ├── README.md
│ │ ├── VERSION.txt
│ │ ├── kastore.c
│ │ ├── kastore.h
│ │ └── meson.build
│ ├── tests/
│ │ ├── meson-subproject/
│ │ │ ├── example.c
│ │ │ └── meson.build
│ │ ├── test_convert.c
│ │ ├── test_core.c
│ │ ├── test_file_format.c
│ │ ├── test_genotypes.c
│ │ ├── test_haplotype_matching.c
│ │ ├── test_minimal_cpp.cpp
│ │ ├── test_stats.c
│ │ ├── test_tables.c
│ │ ├── test_trees.c
│ │ ├── testlib.c
│ │ └── testlib.h
│ ├── tskit/
│ │ ├── convert.c
│ │ ├── convert.h
│ │ ├── core.c
│ │ ├── core.h
│ │ ├── genotypes.c
│ │ ├── genotypes.h
│ │ ├── haplotype_matching.c
│ │ ├── haplotype_matching.h
│ │ ├── stats.c
│ │ ├── stats.h
│ │ ├── tables.c
│ │ ├── tables.h
│ │ ├── trees.c
│ │ └── trees.h
│ └── tskit.h
├── codecov.yml
├── docs/
│ ├── .gitignore
│ ├── Makefile
│ ├── _config.yml
│ ├── _static/
│ │ ├── README
│ │ └── bespoke.css
│ ├── _toc.yml
│ ├── build.sh
│ ├── c-api.rst
│ ├── changelogs.rst
│ ├── citation.md
│ ├── cli.md
│ ├── data/
│ │ └── basic_tree_seq.trees
│ ├── data-model.md
│ ├── development.md
│ ├── doxygen/
│ │ └── Doxyfile
│ ├── export.md
│ ├── file-formats.md
│ ├── glossary.md
│ ├── ibd.md
│ ├── installation.md
│ ├── introduction.md
│ ├── metadata.md
│ ├── numba.md
│ ├── provenance.md
│ ├── python-api.md
│ ├── quickstart.md
│ ├── stats.md
│ ├── substitutions/
│ │ ├── linear_traversal_warning.rst
│ │ ├── table_edit_warning.rst
│ │ ├── table_keep_rows_main.rst
│ │ ├── tree_array_warning.rst
│ │ └── virtual_root_array_note.rst
│ └── topological-analysis.md
├── prek.toml
└── python/
├── .gitignore
├── CHANGELOG.rst
├── MANIFEST.in
├── Makefile
├── README.rst
├── _tskitmodule.c
├── benchmark/
│ ├── config.yaml
│ ├── run-for-all-releases.py
│ └── run.py
├── lwt_interface/
│ ├── CHANGELOG.rst
│ ├── Makefile
│ ├── README.md
│ ├── cython_example/
│ │ ├── Makefile
│ │ ├── _lwtc.c
│ │ ├── example.pyx
│ │ ├── pyproject.toml
│ │ └── setup.py
│ ├── dict_encoding_testlib.py
│ ├── example_c_module.c
│ ├── setup.py
│ ├── test_example_c_module.py
│ └── tskit_lwt_interface.h
├── pyproject.toml
├── setup.py
├── stress_lowlevel.py
├── tests/
│ ├── __init__.py
│ ├── conftest.py
│ ├── data/
│ │ ├── SLiM/
│ │ │ ├── README
│ │ │ ├── minimal-example.trees
│ │ │ ├── minimal-example.txt
│ │ │ ├── single-locus-example.trees
│ │ │ └── single-locus-example.txt
│ │ ├── dict-encodings/
│ │ │ ├── generate_msprime.py
│ │ │ └── msprime-0.7.4.pkl
│ │ ├── hdf5-formats/
│ │ │ ├── msprime-0.3.0_v2.0.hdf5
│ │ │ ├── msprime-0.4.0_v3.1.hdf5
│ │ │ └── msprime-0.5.0_v10.0.hdf5
│ │ ├── old-formats/
│ │ │ └── tskit-0.3.3.trees
│ │ └── simplify-bugs/
│ │ ├── 01-edges.txt
│ │ ├── 01-mutations.txt
│ │ ├── 01-nodes.txt
│ │ ├── 01-sites.txt
│ │ ├── 02-edges.txt
│ │ ├── 02-mutations.txt
│ │ ├── 02-nodes.txt
│ │ ├── 02-sites.txt
│ │ ├── 03-edges.txt
│ │ ├── 03-mutations.txt
│ │ ├── 03-nodes.txt
│ │ ├── 03-sites.txt
│ │ ├── 04-edges.txt
│ │ ├── 04-mutations.txt
│ │ ├── 04-nodes.txt
│ │ ├── 04-sites.txt
│ │ ├── 05-edges.txt
│ │ ├── 05-mutations.txt
│ │ ├── 05-nodes.txt
│ │ └── 05-sites.txt
│ ├── ibd.py
│ ├── simplify.py
│ ├── test_avl_tree.py
│ ├── test_balance_metrics.py
│ ├── test_cli.py
│ ├── test_coalrate.py
│ ├── test_combinatorics.py
│ ├── test_dict_encoding.py
│ ├── test_distance_metrics.py
│ ├── test_divmat.py
│ ├── test_drawing.py
│ ├── test_extend_haplotypes.py
│ ├── test_file_format.py
│ ├── test_fileobj.py
│ ├── test_genotype_matching.py
│ ├── test_genotypes.py
│ ├── test_haplotype_matching.py
│ ├── test_highlevel.py
│ ├── test_ibd.py
│ ├── test_immutable_table_collection.py
│ ├── test_intervals.py
│ ├── test_jit.py
│ ├── test_ld_matrix.py
│ ├── test_metadata.py
│ ├── test_ms.py
│ ├── test_parsimony.py
│ ├── test_phylo_formats.py
│ ├── test_provenance.py
│ ├── test_python_c.py
│ ├── test_reference_sequence.py
│ ├── test_relatedness_vector.py
│ ├── test_stats.py
│ ├── test_table_transforms.py
│ ├── test_tables.py
│ ├── test_text_formats.py
│ ├── test_threads.py
│ ├── test_topology.py
│ ├── test_tree_positioning.py
│ ├── test_tree_stats.py
│ ├── test_util.py
│ ├── test_utilities.py
│ ├── test_vcf.py
│ ├── test_version.py
│ ├── test_wright_fisher.py
│ └── tsutil.py
└── tskit/
├── __init__.py
├── __main__.py
├── _version.py
├── cli.py
├── combinatorics.py
├── drawing.py
├── exceptions.py
├── genotypes.py
├── intervals.py
├── jit/
│ ├── __init__.py
│ └── numba.py
├── metadata.py
├── metadata_schema.schema.json
├── provenance.py
├── provenance.schema.json
├── stats.py
├── tables.py
├── text_formats.py
├── trees.py
├── util.py
└── vcf.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .clang-format
================================================
Language: Cpp
BasedOnStyle: GNU
SortIncludes: false
AllowShortIfStatementsOnASingleLine: false
BreakBeforeBraces: Linux
TabWidth: 4
IndentWidth: 4
ColumnLimit: 89
SpaceBeforeParens:
ControlStatements
SpacesInCStyleCastParentheses: false
SpaceAfterCStyleCast: true
IndentCaseLabels: true
AlignAfterOpenBracket: DontAlign
BinPackArguments: true
BinPackParameters: true
AlwaysBreakAfterReturnType: AllDefinitions
StatementMacros: ["PyObject_HEAD", "Py_BEGIN_ALLOW_THREADS", "Py_END_ALLOW_THREADS"]
AlignConsecutiveMacros: true
================================================
FILE: .github/PULL_REQUEST_TEMPLATE.md
================================================
## Description
Thanks for contributing to tskit! :heart:
A guide to the PR process is [here](https://tskit.dev/tskit/docs/stable/development.html#git-workflow)
Please replace this text with a summary of the change and which issue is fixed, if any. Please also include relevant motivation and context.
Fixes #(issue) <- Putting the issue number here will auto-close the issue when this PR is merged
# PR Checklist:
- [ ] Tests that fully cover new/changed functionality.
- [ ] Documentation including tutorial content if appropriate.
- [ ] Changelogs, if there are API changes.
================================================
FILE: .github/workflows/docs.yml
================================================
name: Build Docs
on:
pull_request:
merge_group:
push:
branches: [main]
tags:
- '*'
env:
FORCE_COLOR: 1
jobs:
Docs:
uses: tskit-dev/.github/.github/workflows/docs.yml@v15
with:
pyproject-directory: python
additional-apt-packages: doxygen
pre-build-command: cd docs/doxygen && doxygen
================================================
FILE: .github/workflows/lint.yml
================================================
name: Lint
on:
pull_request:
merge_group:
jobs:
Lint:
uses: tskit-dev/.github/.github/workflows/lint.yml@v15
with:
pyproject-directory: python
================================================
FILE: .github/workflows/release-c.yml
================================================
name: Publish C API release
on:
push:
branches: [main, test]
tags: ['*']
env:
FORCE_COLOR: 1
jobs:
build:
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@v6.0.2
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.10.0"
- name: Install system deps
run: |
sudo apt-get update
sudo apt-get install -y ninja-build libcunit1-dev
- name: Install meson
run: uv tool install meson==1.10.1
- name: Build tarball
run: |
git rm -rf c/tests/meson-subproject
git config --global user.email "CI@CI.com"
git config --global user.name "Mr Robot"
git add -A
git commit -m "dummy commit to make meson not add in the symlinked directory"
meson c build-gcc
meson dist -C build-gcc
- name: C Release
uses: softprops/action-gh-release@v2.5.0
if: startsWith(github.ref, 'refs/tags/') && contains(github.event.ref, 'C_')
with:
draft: True
files: build-gcc/meson-dist/*
================================================
FILE: .github/workflows/tests.yml
================================================
name: Tests
on:
pull_request:
merge_group:
push:
branches: [main, test]
env:
FORCE_COLOR: 1
jobs:
packaging:
name: Python packaging
uses: tskit-dev/.github/.github/workflows/python-packaging.yml@v15
with:
pyproject-directory: python
cli-test-cmd: tskit --help
test-c:
name: C tests
uses: tskit-dev/.github/.github/workflows/c-tests.yml@v15
with:
library-directory: c
secrets: inherit
test-python-c:
name: Python-C tests
uses: tskit-dev/.github/.github/workflows/python-c-tests.yml@v15
with:
tests: python/tests/test_python_c.py python/tests/test_dict_encoding.py
pyproject-directory: python
secrets: inherit
test:
name: Python
uses: tskit-dev/.github/.github/workflows/python-tests.yml@v15
with:
os: ${{ matrix.os }}
python-version: ${{ matrix.python }}
pyproject-directory: python
coverage-directory: python/tskit
secrets: inherit
strategy:
matrix:
python: [ 3.11, 3.13 ]
os: [ macos-latest, ubuntu-24.04, windows-latest ]
msys2:
runs-on: windows-latest
strategy:
matrix:
include:
- { sys: mingw32, env: i686 }
- { sys: mingw64, env: x86_64 }
name: Windows (${{ matrix.sys }}, ${{ matrix.env }})
defaults:
run:
shell: msys2 {0}
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.13.0
with:
access_token: ${{ github.token }}
- name: 'Checkout'
uses: actions/checkout@v6.0.2
- name: Setup MSYS2 ${{matrix.sys}}
uses: msys2/setup-msys2@v2.27.0
with:
msystem: ${{matrix.sys}}
update: true
install: >-
git
mingw-w64-${{matrix.env}}-toolchain
mingw-w64-${{matrix.env}}-ninja
mingw-w64-${{matrix.env}}-meson
mingw-w64-${{matrix.env}}-cunit
- name: Build
working-directory: c
run: |
meson build -Dbuild_examples=false
ninja -C build
- name: Run tests
working-directory: c
run: |
ninja -C build test
bespoke-python-test:
name: Bespoke Python tests
runs-on: ubuntu-24.04
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.13.0
with:
access_token: ${{ github.token }}
- name: Checkout
uses: actions/checkout@v6.0.2
with:
submodules: true
- name: Install uv and set the python version
uses: astral-sh/setup-uv@v6
with:
python-version: 3.11
version: "0.10.0"
- name: Install Python dependencies
working-directory: python
run: uv sync --locked --group test --no-default-groups
- name: Minidom test
working-directory: python
# Importing either IPython or pytest causes import of xml.dom.minidom
# So to actually test that tskit imports it, we need a minimal test
run: |
uv run --locked --group test --no-default-groups \
python -c "import tskit;tskit.Tree.generate_star(5).tree_sequence.draw_svg(path='test.svg')"
- name: Run JIT code coverage
run: |
NUMBA_DISABLE_JIT=1 uv run --locked --project=python --no-default-groups\
pytest --cov=python/tskit --cov-report=xml --cov-branch \
python/tests/test_jit.py
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v5.5.2
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: true
files: coverage.xml
disable_search: true
verbose: true
flags: python-tests-no-jit
- name: Build example LWT interface code and test
working-directory: python/lwt_interface/
run: |
make allchecks
uv run --project=../ --group=test pytest -vs
- name: Build cython example LWT interface code and run
working-directory: python/lwt_interface/cython_example
run: make
bespoke-c-test:
name: Bespoke C tests
runs-on: ubuntu-24.04
steps:
- name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.13.0
with:
access_token: ${{ github.token }}
- name: Checkout
uses: actions/checkout@v6.0.2
with:
submodules: true
- name: Install system deps
run: |
sudo apt-get update
sudo apt-get install -y libcunit1-dev ninja-build clang
- name: Install uv
uses: astral-sh/setup-uv@v6
with:
version: "0.10.0"
- name: Install uv deps
run: |
uv tool install meson==1.10.1
- name: Configure code
run: CFLAGS=-D_TSK_BIG_TABLES CPPFLAGS=-D_TSK_BIG_TABLES meson setup build-bt c/
- name: Compile
run: ninja -C build-bt
- name: Run tests
run: ninja -C build-bt test
- name: Test building with meson subproject
run: |
meson build-subproject c/tests/meson-subproject
ninja -C build-subproject
./build-subproject/example
- name: Install shared library and hand-compile program.
run: |
meson build-install c --prefix=/usr
sudo ninja -C build-install install
clang c/examples/api_structure.c -I c/subprojects/kastore -o api_structure -ltskit
./api_structure
- name: Run example make file
run: |
make -C c/examples
================================================
FILE: .github/workflows/wheels.yml
================================================
name: Publish Python release
on:
push:
branches: [test-publish]
release:
types: [published]
jobs:
build-wheels:
if: "!startsWith(github.ref, 'refs/tags/C_')"
uses: tskit-dev/.github/.github/workflows/build-wheels.yml@v15
with:
pyproject-directory: python
publish:
runs-on: ubuntu-24.04
environment: release
needs: [ 'build-wheels' ]
permissions:
id-token: write
steps:
- name: Download artifacts
uses: actions/download-artifact@v7.0.0
with:
pattern: build-*
path: dist
merge-multiple: true
- name: Show artifacts
run: ls -lah dist
- name: Publish distribution to Test PyPI
if: github.event_name == 'push' && github.ref_name == 'test-publish'
uses: pypa/gh-action-pypi-publish@v1.13.0
with:
repository-url: https://test.pypi.org/legacy/
verbose: true
- name: Publish distribution to Production PyPI
if: github.event_name == 'release'
uses: pypa/gh-action-pypi-publish@v1.13.0
================================================
FILE: .gitignore
================================================
build-gcc
.DS_Store
python/benchmark/*.trees
python/benchmark/*.json
python/benchmark/*.html
.venv
.env
.vscode
env
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing
Tskit is a free and open-source project that welcomes contributions from everyone.
The [Developer documentation](https://tskit.dev/tskit/docs/latest/development.html)
will help you get started.
We have an active slack group where tskit and associated projects are discussed.
If you wish to join email [admin@tskit.dev](mailto:admin@tskit.dev).
We ask all users to follow our [code of conduct](https://github.com/tskit-dev/.github/blob/main/CODE_OF_CONDUCT.md)
when interacting with the project.
================================================
FILE: LICENSE
================================================
MIT License
Copyright (c) 2018-2019 Tskit Developers
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# tskit
[](https://github.com/tskit-dev/tskit/blob/main/LICENSE)
[](https://github.com/tskit-dev/tskit/graphs/contributors)
[](https://github.com/tskit-dev/tskit/commits/main)
[](https://codecov.io/gh/tskit-dev/tskit)

[Documentation (stable)](https://tskit.dev/tskit/docs/stable/) • [Documentation (latest)](https://tskit.dev/tskit/docs/latest/)
[](https://github.com/tskit-dev/tskit/actions/workflows/docs.yml)[](https://github.com/tskit-dev/tskit/actions/workflows/tests.yml)
The succinct tree sequence (`tskit`) format is an efficient way of representing
the genetic history - sometimes known as an
[Ancestral Recombination Graph or ARG](https://doi.org/10.1093/genetics/iyae100) -
of a set of related DNA sequences. `Tskit` is used
by a number of software libraries and programs (such as
[msprime](https://github.com/tskit-dev/msprime),
[SLiM](https://github.com/MesserLab/SLiM),
[fwdpp](http://molpopgen.github.io/fwdpp/), and
[tsinfer](https://tskit.dev/tsinfer/docs/stable/)) that either simulate or infer
the evolutionary ancestry of genetic sequences.
The `tskit` library provides the underlying functionality used to load, examine, and
manipulate ARGs in the tree sequence format, including efficient access to the
sequence of correlated trees along a genome and general methods to calculate
genetic statistics. `Tskit` often forms part of an installation of other
software packages such as those listed above. Please see the
[documentation](https://tskit.dev/tskit/docs/stable/) for further details, which
includes
[installation instructions](https://tskit.dev/tskit/docs/stable/installation.html).
To get started with tskit, tutorials and other content are at http://tskit.dev. For help
and support from the community you can use
[discussions](https://github.com/tskit-dev/tskit/discussions) here on github, or raise an
issue for a specific bug or feature request.
We warmly welcome contributions from the community. Raise an issue if you have an
idea you'd like to work on, or submit a PR for comments and help.
The base `tskit` library provides both a [Python](https://tskit.dev/tskit/docs/stable/python-api.html)
and [C](https://tskit.dev/tskit/docs/stable/c-api.html) API. A Rust API is provided in the
[tskit-rust](https://github.com/tskit-dev/tskit-rust) repository.
#### Python API
[](https://pypi.org/project/tskit/)
[](https://pypi.org/project/tskit/)
[](https://pypi.org/project/tskit/)
[](https://github.com/psf/black)
Most users of `tskit` will use the python API as it provides a convenient, high-level API
to access, analyse and create tree sequences. Full documentation is
[here](https://tskit.dev/tskit/docs/stable/python-api.html).
#### C API
[](https://en.wikipedia.org/wiki/C99)
The `tskit` C API provides comprehensive, low-level methods for manipulating and
processing tree-sequences. Written to the C99 standard and fully thread-safe, it can be
used with either C or C++. Full documentation is
[here](https://tskit.dev/tskit/docs/stable/c-api.html).
## Installation
```bash
python -m pip install tskit
# or
conda install -c conda-forge tskit
```
================================================
FILE: c/.gitignore
================================================
build
.*.swp
.*.swo
================================================
FILE: c/CHANGELOG.rst
================================================
--------------------
[1.3.2] - 2026-XX-XX
--------------------
In development
- Add ``tsk_json_struct_metadata_get_blob`` function
(:user:`benjeffery`, :pr:`3306`)
--------------------
[1.3.1] - 2026-03-06
--------------------
Maintenance release.
- Update to kastore 2.1.2
- Fix doc typo for file uuid (:pr:`3399`)
- Migrate linting to clang-format 21.1.8 (:pr:`3389`)
- Support compile time setting of debug stream (:pr:`3364`)
--------------------
[1.3.0] - 2025-11-27
--------------------
**Breaking changes**
- ``trees.c`` now depends on ``genotypes.c`` (via ``tskit/genotypes.h``) and must
be built and linked together with it.
(:user:`benjeffery`, :pr:`3324`)
**Features**
- ``tsk_variant_init`` and associated variant decoding methods now
fully support ``TSK_ISOLATED_NOT_MISSING`` not being set for internal nodes.
(:user:`benjeffery`, :pr:`3313`)
- Add ``tsk_treeseq_decode_alignments`` to decode full-length reference-based
sequence alignments for specified nodes over a genomic interval, respecting
``TSK_ISOLATED_NOT_MISSING`` semantics.
(:user:`benjeffery`, :pr:`3324`, :issue:`3319`)
--------------------
[1.2.0] - 2025-09-24
--------------------
**Breaking changes**
- Remove ``tsk_diff_iter_t`` and associated functions.
(:user:`benjeffery`, :pr:`3221`, :issue:`2797`).
- ``tsk_treeseq_init`` now requires that mutation parents in the table collection
are correct and consistent with the topology of the tree at each mutation site.
Returns ``TSK_ERR_BAD_MUTATION_PARENT`` if this is not the case, or
``TSK_ERR_MUTATION_PARENT_AFTER_CHILD`` if the mutations are not in an order
compatible with the correct mutation parent.
(:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).
**Features**
- Add ``TSK_TS_INIT_COMPUTE_MUTATION_PARENTS`` to ``tsk_treeseq_init``
to compute mutation parents from the tree sequence topology.
Note that the mutations must be in the correct order.
(:user:`benjeffery`, :issue:`2757`, :pr:`3212`).
- Add ``TSK_CHECK_MUTATION_PARENTS`` option to ``tsk_table_collection_check_integrity``
to check that mutation parents are consistent with the tree sequence topology.
This option implies ``TSK_CHECK_TREES``.
(:user:`benjeffery`, :issue:`2729`, :issue:`2732`, :pr:`3212`).
- Add the ``TSK_NO_CHECK_INTEGRITY`` option to ``tsk_table_collection_compute_mutation_parents``
to skip the integrity checks that are normally run when computing mutation parents.
This is useful for speeding up the computation of mutation parents when the
tree sequence is certainly known to be valid.
(:user:`benjeffery`, :pr:`3212`).
- Mutations returned by ``tsk_treeseq_get_mutation`` now include pre-computed
``inherited_state`` and ``inherited_state_length`` fields. The inherited state
is computed during tree sequence initialization and represents the state that
existed at the site before each mutation occurred (either the ancestral state
if the mutation is the root mutation or the derived state of the parent mutation).
Note that this breaks ABI compatibility due to the addition of these fields
to the ``tsk_mutation_t`` struct.
(:user:`benjeffery`, :pr:`3277`, :issue:`2631`).
--------------------
[1.1.4] - 2025-03-31
--------------------
**Changes**
- Added the TSK_TRACE_ERRORS macro to enable tracing of errors in the C library.
This is useful for debugging as errors will print to stderr when set.
(:user:`jeromekelleher`, :pr:`3095`).
--------------------
[1.1.3] - 2024-10-16
--------------------
**Features**
- Add the `tsk_treeseq_extend_haplotypes` method that can compress a tree sequence
by extending edges into adjacent trees and thus creating unary nodes in those
trees (:user:`petrelharp`, :user:`hfr1tze`, :user:`avabamf`, :pr:`2651`, :pr:`2938`).
--------------------
[1.1.2] - 2023-05-17
--------------------
**Performance improvements**
- tsk_tree_seek is now much faster at seeking to arbitrary points along
the sequence from the null tree (:user:`molpopgen`, :pr:`2661`).
**Features**
- The struct ``tsk_treeseq_t`` now has the variables ``min_time`` and ``max_time``,
which are the minimum and maximum among the node times and mutation times,
respectively. ``min_time`` and ``max_time`` can be accessed using the functions
``tsk_treeseq_get_min_time`` and ``tsk_treeseq_get_max_time``, respectively.
(:user:`szhan`, :pr:`2612`, :issue:`2271`)
- Add the `TSK_SIMPLIFY_NO_FILTER_NODES` option to simplify to allow unreferenced
nodes be kept in the output (:user:`jeromekelleher`, :user:`hyanwong`,
:issue:`2606`, :pr:`2619`).
- Add the `TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS` option to simplify which ensures
no node sample flags are changed to allow calling code to manage sample status.
(:user:`jeromekelleher`, :issue:`2662`, :pr:`2663`).
- Guarantee that unfiltered tables are not written to unnecessarily
during simplify (:user:`jeromekelleher`, :pr:`2619`).
- Add `x_table_keep_rows` methods to provide efficient in-place table subsetting
(:user:`jeromekelleher`, :pr:`2700`).
- Add `tsk_tree_seek_index` function
--------------------
[1.1.1] - 2022-07-29
--------------------
**Bug fixes**
- Fix segfault in tsk_variant_restricted_copy in tree sequences with large
numbers of alleles or very long alleles
(:user:`jeromekelleher`, :pr:`2437`, :issue:`2429`).
--------------------
[1.1.0] - 2022-07-14
--------------------
**Features**
- Add ``num_children`` to ``tsk_tree_t`` an array which contains counts of the number of child
nodes of each node in the tree. (:user:`GertjanBisschop`, :issue:`2274`, :pr:`2316`)
- Add ``edge`` to ``tsk_tree_t`` an array which contains the ``edge_id`` of the edge encoding
the relationship between the child node and its parent for each (child) node in the tree.
(:user:`GertjanBisschop`, :issue:`2304`, :pr:`2340`)
**Changes**
- Reduce the maximum number of rows in a table by 1. This removes edge cases so that a ``tsk_id_t`` can be
used to count the number of rows. (:user:`benjeffery`, :issue:`2336`, :pr:`2337`)
- Samples are now copied by ``tsk_variant_restricted_copy``. (:user:`benjeffery`, :issue:`2400`, :pr:`2401`)
--------------------
[1.0.0] - 2022-05-24
--------------------
This major release marks the point at which the documented API becomes stable and supported.
**Breaking changes**
- Change the type of genotypes to ``int32_t``, removing the TSK_16_BIT_GENOTYPES flag option.
(:user:`benjeffery`, :issue:`463`, :pr:`2108`)
- ``tsk_variant_t`` now includes its ``tsk_site_t`` rather than pointing to it.
(:user:`benjeffery`, :issue:`2161`, :pr:`2162`)
- Rename ``TSK_TAKE_TABLES`` to ``TSK_TAKE_OWNERSHIP``.
(:user:`benjeffery`, :issue:`2221`, :pr:`2222`)
- ``TSK_DEBUG``, ``TSK_NO_INIT``, ``TSK_NO_CHECK_INTEGRITY`` and ``TSK_TAKE_OWNERSHIP`` have moved to ``core.h``
(:user:`benjeffery`, :issue:`2218`, :pr:`2230`))
- Rename several flags:
- All flags to ``simplify`` for example ``TSK_KEEP_INPUT_ROOTS`` becomes ``TSK_SIMPLIFY_KEEP_INPUT_ROOTS``.
- All flags to ``subset`` for example ``TSK_KEEP_UNREFERENCED`` becomes ``TSK_SUBSET_KEEP_UNREFERENCED``.
- ``TSK_BUILD_INDEXES`` -> ``TSK_TS_INIT_BUILD_INDEXES``
- ``TSK_NO_METADATA`` -> ``TSK_TABLE_NO_METADATA``
- ``TSK_NO_EDGE_METADATA`` -> ``TSK_TC_NO_EDGE_METADATA``
(:user:`benjeffery`, :issue:`1720`, :pr:`2226`, :pr:`2229`, :pr:`2224`)
- Remove the generic ``TSK_ERR_OUT_OF_BOUNDS`` - replacing with specific errors.
Remove ``TSK_ERR_NON_SINGLE_CHAR_MUTATION`` which was unused.
(:user:`benjeffery`, :pr:`2260`)
- Reorder stats API methods to place ``result`` as the last argument. (:user:`benjeffery`, :pr:`2292`, :issue:`2285`)
**Features**
- Make dumping of tables and tree sequences to disk a zero-copy operation.
(:user:`benjeffery`, :issue:`2111`, :pr:`2124`)
- Add ``edge`` attribute to ``mutation_t`` struct and make available in tree sequence.
(:user:`jeromekelleher`, :issue:`685`, :pr:`2279`)
- Reduce peak memory usage in ``tsk_treeseq_simplify``.
(:user:`jeromekelleher`, :issue:`2287`, :pr:`2288`)
----------------------
[0.99.15] - 2021-12-07
----------------------
**Breaking changes**
- The ``tables`` argument to ``tsk_treeseq_init`` is no longer ``const``, to allow for future no-copy tree sequence creation.
(:user:`benjeffery`, :issue:`1718`, :pr:`1719`)
- Additional consistency checks for mutation tables are now run by ``tsk_table_collection_check_integrity``
even when ``TSK_CHECK_MUTATION_ORDERING`` is not passed in. (:user:`petrelharp`, :issue:`1713`, :pr:`1722`)
- ``num_tracked_samples`` and ``num_samples`` in ``tsk_tree_t`` are now typed as ``tsk_size_t``
(:user:`benjeffery`, :issue:`1723`, :pr:`1727`)
- The previously deprecated option ``TSK_SAMPLE_COUNTS`` has been removed. (:user:`benjeffery`, :issue:`1744`, :pr:`1761`).
- Individuals are no longer guaranteed or required to be topologically sorted in a tree sequence.
``tsk_table_collection_sort`` no longer sorts individuals.
(:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
- The ``tsk_tree_t.left_root`` member has been removed. Client code can be updated
most easily by using the equivalent ``tsk_tree_get_left_root`` function. However,
it may be worth considering updating code to use either the standard traversal
functions (which automatically iterate over roots) or to use the ``virtual_root``
member (which may lead to more concise code). (:user:`jeromekelleher`, :issue:`1796`,
:pr:`1862`)
- Rename ``tsk_tree_t.left`` and ``tsk_tree_t.right`` members to
``tsk_tree_t.interval.left`` and ``tsk_tree_t.interval.right`` respectively.
(:user:`jeromekelleher`, :issue:`1686`, :pr:`1913`)
- ``kastore`` is now vendored into this repo instead of being a git submodule. Developers need to run
``git submodule update``. (:user:`jeromekelleher`, :issue:`1687`, :pr:`1973`)
- ``Tree`` arrays such as ``left_sib``, ``right_child`` etc. now have an additional
"virtual root" node at the end. (:user:`jeromekelleher`, :issue:`1691`, :pr:`1704`)
- ``marked`` and ``mark`` have been removed from ``tsk_tree_t``. (:user:`jeromekelleher`, :pr:`1936`)
**Features**
- Add ``tsk_table_collection_individual_topological_sort`` to sort the individuals as this is no longer done by the
default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
- The default behaviour for table size growth is now to double the current size of the table,
up to a threshold. To keep the previous behaviour, use (e.g.)
``tsk_edge_table_set_max_rows_increment(tables->edges, 1024)``, which results in adding
space for 1024 additional rows each time we run out of space in the edge table.
(:user:`benjeffery`, :issue:`5`, :pr:`1683`)
- ``tsk_table_collection_check_integrity`` now has a ``TSK_CHECK_MIGRATION_ORDERING`` flag. (:user:`petrelharp`, :pr:`1722`)
- The default behaviour for ragged column growth is now to double the current size of the column,
up to a threshold. To keep the previous behaviour, use (e.g.)
``tsk_node_table_set_max_metadata_length_increment(tables->nodes, 1024)``, which results in adding
space for 1024 additional entries each time we run out of space in the ragged column.
(:user:`benjeffery`, :issue:`1703`, :pr:`1709`)
- Support for compiling the C library on Windows using msys2 (:user:`jeromekelleher`,
:pr:`1742`).
- Add ``time_units`` to ``tsk_table_collection_t`` to describe the units of the time dimension of the
tree sequence. This is then used to geerate an error if ``time_units`` is ``uncalibrated`` when
using the branch lengths in statistics. (:user:`benjeffery`, :issue:`1644`, :pr:`1760`)
- Add the ``TSK_LOAD_SKIP_TABLES`` option to load just the top-level information from a
file. Also add the ``TSK_CMP_IGNORE_TABLES`` option to compare only the top-level
information in two table collections. (:user:`clwgg`, :pr:`1882`, :issue:`1854`).
- Add reference sequence.
(:user:`jeromekelleher`, :user:`benjeffery`, :issue:`146`, :pr:`1911`, :pr:`1944`, :pr:`1911`)
- Add the ``TSK_LOAD_SKIP_REFERENCE_SEQUENCE`` option to load a table collection
without the reference sequence. Also add the TSK_CMP_IGNORE_REFERENCE_SEQUENCE
option to compare two table collections without comparing their reference
sequence. (:user:`clwgg`, :pr:`2019`, :issue:`1971`).
- Add a "virtual root" to ``Tree`` arrays such as ``left_sib``, ``right_child`` etc.
The virtual root is appended to each array, has all real roots as its children,
but is not the parent of any node. Simplifies traversal algorithms.
(:user:`jeromekelleher`, :issue:`1691`, :pr:`1704`)
- Add ``num_edges`` to ``tsk_tree_t`` to count the edges that define the topology of
the tree. (:user:`jeromekelleher`, :pr:`1704`)
- Add the ``tsk_tree_get_size_bound`` function which returns an upper bound on the number of nodes reachable from
the roots of a tree. Useful for tree stack allocations (:user:`jeromekelleher`, :pr:`1704`).
- Add ``MetadataSchema.permissive_json`` for an easy way to get the simplest schema.
----------------------
[0.99.14] - 2021-09-03
----------------------
**Breaking changes**
- 64 bits are now used to store the sizes of ragged table columns such as metadata,
allowing them to hold more data. As such ``tsk_size_t`` is now 64 bits wide.
This change is fully backwards and forwards compatible for all tree-sequences whose
ragged column sizes fit into 32 bits. New tree-sequences with
large offset arrays that require 64 bits will fail to load in previous versions with
error ``TSK_ERR_BAD_COLUMN_TYPE``.
(:user:`jeromekelleher`, :issue:`343`, :issue:`1527`, :issue:`1528`, :issue:`1530`,
:issue:`1554`, :issue:`1573`, :issue:`1589`,:issue:`1598`,:issue:`1628`, :pr:`1571`,
:pr:`1579`, :pr:`1585`, :pr:`1590`, :pr:`1602`, :pr:`1618`, :pr:`1620`, :pr:`1652`).
**Features**
- Add `tsk_X_table_update_row` methods which allow modifying single rows of tables
(:user:`jeromekelleher`, :issue:`1545`, :pr:`1552`).
----------------------
[0.99.13] - 2021-07-08
----------------------
**Fixes**
- Fix segfault when very large columns overflow
(:user:`bhaller`, :user:`benjeffery`, :issue:`1509`, :pr:`1511`).
----------------------
[0.99.12] - 2021-05-14
----------------------
**Breaking changes**
- Removed ``TSK_NO_BUILD_INDEXES``.
Not building indexes is now the default behaviour of `tsk_table_collection_dump` and related functions.
(:user:`molpopgen`, :issue:`1327`, :pr:`1337`).
**Features**
- Add ``tsk_*_table_extend`` methods to append to a table from another
(:user:`benjeffery`, :issue:`1271`, :pr:`1287`).
**Fixes**
----------------------
[0.99.11] - 2021-03-16
----------------------
**Features**
- Add ``parents`` to the individual table to enable recording of pedigrees
(:user:`ivan-krukov`, :user:`benjeffery`, :issue:`852`, :pr:`1125`, :pr:`866`, :pr:`1153`, :pr:`1177`, :pr:`1199`).
- Added a ``tsk_table_collection_canonicalise`` method, that allows checking for equality between
tables that are equivalent up to reordering (:user:`petrelharp`, :user:`mufernando`, :pr:`1108`).
- Removed a previous requirement on ``tsk_table_collection_union``, allowing for unioning of
new information both above and below shared history (:user:`petrelharp`, :user:`mufernando`, :pr:`1108`).
- Support migrations in tsk_table_collection_sort. (:user:`jeromekelleher`,
:issue:`22`, :issue:`117`, :pr:`1131`).
**Breaking changes**
- Method ``tsk_individual_table_add_row`` has an extra arguments ``parents`` and ``parents_length``.
- Add an ``options`` argument to ``tsk_table_collection_subset`` (:user:`petrelharp`, :pr:`1108`),
to allow for retaining the order of populations.
- Mutation error codes have changed
**Changes**
- Allow mutations that have the same derived state as their parent mutation.
(:user:`benjeffery`, :issue:`1180`, :pr:`1233`)
- File minor version change to support individual parents
----------------------
[0.99.10] - 2021-01-25
----------------------
Minor bugfix on internal APIs
---------------------
[0.99.9] - 2021-01-22
---------------------
**Features**
- Add ``TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS`` flag to simplify, which allows the user to
keep unary nodes only if they belong to a tabled individual. This is useful for
simplification in forwards simulations (:user:`hyanwong`, :issue:`1113`, :pr:`1119`).
---------------------
[0.99.8] - 2020-11-27
---------------------
**Features**
- Add ``tsk_treeseq_genetic_relatedness`` for calculating genetic relatedness between
pairs of sets of nodes (:user:`brieuclehmann`, :issue:`1021`, :pr:`1023`, :issue:`974`,
:issue:`973`, :pr:`898`).
- Exposed ``tsk_table_collection_set_indexes`` to the API
(:user:`benjeffery`, :issue:`870`, :pr:`921`).
**Breaking changes**
- Added an ``options`` argument to ``tsk_table_collection_equals``
and table equality methods to allow for more flexible equality criteria
(e.g., ignore top-level metadata and schema or provenance tables).
Existing code should add an extra final parameter ``0`` to retain the
current behaviour (:user:`mufernando`, :user:`jeromekelleher`,
:issue:`896`, :pr:`897`, :issue:`913`, :pr:`917`).
- Changed default behaviour of ``tsk_table_collection_clear`` to not clear
provenances and added ``options`` argument to optionally clear provenances
and schemas (:user:`benjeffery`, :issue:`929`, :pr:`1001`).
- Renamed ``ts.trait_regression`` to ``ts.trait_linear_model``.
---------------------
[0.99.7] - 2020-09-29
---------------------
- Added ``TSK_INCLUDE_TERMINAL`` option to ``tsk_diff_iter_init`` to output the last edges
at the end of a tree sequence (:user:`hyanwong`, :issue:`783`, :pr:`787`).
- Added ``tsk_bug_assert`` for assertions that should be compiled into release binaries
(:user:`benjeffery`, :pr:`860`).
---------------------
[0.99.6] - 2020-09-04
---------------------
**Bugfixes**
- :issue:`823` - Fix mutation time error when using
``tsk_table_collection_simplify`` with ``TSK_SIMPLIFY_KEEP_INPUT_ROOTS``
(:user:`petrelharp`, :pr:`823`).
---------------------
[0.99.5] - 2020-08-27
---------------------
**Breaking changes**
- The macro ``TSK_IMPUTE_MISSING_DATA`` is renamed to ``TSK_ISOLATED_NOT_MISSING``
(:user:`benjeffery`, :issue:`716`, :pr:`794`)
**New features**
- Add a ``TSK_SIMPLIFY_KEEP_INPUT_ROOTS`` option to simplify which, if enabled, adds edges
from the MRCAs of samples in the simplified tree sequence back to the roots
in the input tree sequence (:user:`jeromekelleher`, :issue:`775`, :pr:`782`).
**Bugfixes**
- :issue:`777` - Mutations over isolated samples were incorrectly decoded as
missing data. (:user:`jeromekelleher`, :pr:`778`)
- :issue:`776` - Fix a segfault when a partial list of samples
was provided to the ``variants`` iterator. (:user:`jeromekelleher`, :pr:`778`)
---------------------
[0.99.4] - 2020-08-12
---------------------
**Note**
- The ``TSK_VERSION_PATCH`` macro was incorrectly set to ``4`` for 0.99.3, so both
0.99.4 and 0.99.3 have the same value.
**Changes**
- Mutation times can be a mixture of known and unknown as long as for each
individual site they are either all known or all unknown (:user:`benjeffery`, :pr:`761`).
**Bugfixes**
- Fix for including core.h under C++ (:user:`petrelharp`, :pr:`755`).
---------------------
[0.99.3] - 2020-07-27
---------------------
**Breaking changes**
- ``tsk_mutation_table_add_row`` has an extra ``time`` argument. If the time
is unknown ``TSK_UNKNOWN_TIME`` should be passed.
(:user:`benjeffery`, :pr:`672`)
- Change genotypes from unsigned to signed to accommodate missing data
(see :issue:`144` for discussion). This only affects users of the
``tsk_vargen_t`` class. Genotypes are now stored as int8_t and int16_t
types rather than the former unsigned types. The field names in the
genotypes union of the ``tsk_variant_t`` struct returned by ``tsk_vargen_next``
have been renamed to ``i8`` and ``i16`` accordingly; care should be
taken when updating client code to ensure that types are correct. The number
of distinct alleles supported by 8 bit genotypes has therefore dropped
from 255 to 127, with a similar reduction for 16 bit genotypes.
- Change the ``tsk_vargen_init`` method to take an extra parameter ``alleles``.
To keep the current behaviour, set this parameter to NULL.
- Edges can now have metadata. Hence edge methods now take two extra arguments:
metadata and metadata length. The file format has also changed to accommodate this,
but is backwards compatible. Edge metadata can be disabled for a table collection with
the TSK_NO_EDGE_METADATA flag.
(:user:`benjeffery`, :pr:`496`, :pr:`712`)
- Migrations can now have metadata. Hence migration methods now take two extra arguments:
metadata and metadata length. The file format has also changed to accommodate this,
but is backwards compatible.
(:user:`benjeffery`, :pr:`505`)
- The text dump of tables with metadata now includes the metadata schema as a header.
(:user:`benjeffery`, :pr:`493`)
- Bad tree topologies are detected earlier, so that it is no longer possible
to create a tsk_treeseq_t object which contains a parent with contradictory
children on an interval. Previously an error occured when some operation
building the trees was attempted (:user:`jeromekelleher`, :pr:`709`).
**New features**
- New methods to perform set operations on table collections.
``tsk_table_collection_subset`` subsets and reorders table collections by nodes
(:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).
``tsk_table_collection_union`` forms the node-wise union of two table collections
(:user:`mufernando`, :user:`petrelharp`, :issue:`381`, :pr:`623`).
- Mutations now have an optional double-precision floating-point ``time`` column.
If not specified, this defaults to a particular NaN value (``TSK_UNKNOWN_TIME``)
indicating that the time is unknown. For a tree sequence to be considered valid
it must meet new criteria for mutation times, see :ref:`sec_mutation_requirements`.
Add ``tsk_table_collection_compute_mutation_times`` and new flag to
``tsk_table_collection_check_integrity``:``TSK_CHECK_MUTATION_TIME``. Table sorting
orders mutations by non-increasing time per-site, which is also a requirement for a
valid tree sequence.
(:user:`benjeffery`, :pr:`672`)
- Add ``metadata`` and ``metadata_schema`` fields to table collection, with accessors on
tree sequence. These store arbitrary bytes and are optional in the file format.
(:user: `benjeffery`, :pr:`641`)
- Add the ``TSK_SIMPLIFY_KEEP_UNARY`` option to simplify (:user:`gtsambos`). See :issue:`1`
and :pr:`143`.
- Add a ``set_root_threshold`` option to tsk_tree_t which allows us to set the
number of samples a node must be an ancestor of to be considered a root
(:pr:`462`).
- Change the semantics of tsk_tree_t so that sample counts are always
computed, and add a new ``TSK_NO_SAMPLE_COUNTS`` option to turn this
off (:pr:`462`).
- Tables with metadata now have an optional `metadata_schema` field that can contain
arbitrary bytes. (:user:`benjeffery`, :pr:`493`)
- Tables loaded from a file can now be edited in the same way as any other
table collection (:user:`jeromekelleher`, :issue:`536`, :pr:`530`.
- Support for reading/writing to arbitrary file streams with the loadf/dumpf
variants for tree sequence and table collection load/dump
(:user:`jeromekelleher`, :user:`grahamgower`, :issue:`565`, :pr:`599`).
- Add low-level sorting API and ``TSK_NO_CHECK_INTEGRITY`` flag
(:user:`jeromekelleher`, :pr:`627`, :issue:`626`).
- Add extension of Kendall-Colijn tree distance metric for tree sequences
computed by ``tsk_treeseq_kc_distance``
(:user:`daniel-goldstein`, :pr:`548`)
**Deprecated**
- The ``TSK_SAMPLE_COUNTS`` options is now ignored and will print out a warning
if used (:pr:`462`).
---------------------
[0.99.2] - 2019-03-27
---------------------
Bugfix release. Changes:
- Fix incorrect errors on tbl_collection_dump (#132)
- Catch table overflows (#157)
---------------------
[0.99.1] - 2019-01-24
---------------------
Refinements to the C API as we move towards 1.0.0. Changes:
- Change the ``_tbl_`` abbreviation to ``_table_`` to improve readability.
Hence, we now have, e.g., ``tsk_node_table_t`` etc.
- Change ``tsk_tbl_size_t`` to ``tsk_size_t``.
- Standardise public API to use ``tsk_size_t`` and ``tsk_id_t`` as appropriate.
- Add ``tsk_flags_t`` typedef and consistently use this as the type used to
encode bitwise flags. To avoid confusion, functions now have an ``options``
parameter.
- Rename ``tsk_table_collection_position_t`` to ``tsk_bookmark_t``.
- Rename ``tsk_table_collection_reset_position`` to ``tsk_table_collection_truncate``
and ``tsk_table_collection_record_position`` to ``tsk_table_collection_record_num_rows``.
- Generalise ``tsk_table_collection_sort`` to take a bookmark as start argument.
- Relax restriction that nodes in the ``samples`` argument to simplify must
currently be marked as samples. (https://github.com/tskit-dev/tskit/issues/72)
- Allow ``tsk_table_collection_simplify`` to take a NULL samples argument to
specify "all samples in the current tables".
- Add support for building as a meson subproject.
---------------------
[0.99.0] - 2019-01-14
---------------------
Initial alpha version of the tskit C API tagged. Version 0.99.x
represents the series of releases leading to version 1.0.0 which
will be the first stable release. After 1.0.0, semver rules
regarding API/ABI breakage will apply; however, in the 0.99.x
series arbitrary changes may happen.
--------------------
[0.0.0] - 2019-01-10
--------------------
Initial extraction of tskit code from msprime. Relicense to MIT.
Code copied at hash 29921408661d5fe0b1a82b1ca302a8b87510fd23
================================================
FILE: c/VERSION.txt
================================================
1.3.1
================================================
FILE: c/examples/Makefile
================================================
# Simple Makefile for building examples.
# This will build the examples in the current directory by compiling in the
# full tskit source into each of the examples. This is *not* recommended for
# real projects!
#
# To use, type "make" in the this directory. If you have GSL installed you
# should then get two example programs built.
#
# **Note**: This repo uses git submodules, and these must be checked out
# correctly for this makefile to work, e.g.:
#
# $ git clone git@github.com:tskit-dev/tskit.git --recurse-submodules
#
# See the documentation (https://tskit.dev/tskit/docs/stable/c-api.html)
# for more details on how to use the C API, and the tskit build examples
# repo (https://github.com/tskit-dev/tskit-build-examples) for examples
# of how to set up a production-ready build with tskit.
#
CFLAGS=-I../ -I../subprojects/kastore
TSKIT_SOURCE=../tskit/*.c ../subprojects/kastore/kastore.c
targets = api_structure error_handling \
haploid_wright_fisher streaming \
tree_iteration tree_traversal \
take_ownership \
json_struct_metadata
all: $(targets)
$(targets): %: %.c
${CC} ${CFLAGS} -o $@ $< ${TSKIT_SOURCE} -lm
clean:
rm -f $(targets)
================================================
FILE: c/examples/api_structure.c
================================================
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
fprintf(stderr, "line %d: %s", __LINE__, tsk_strerror(val)); \
exit(EXIT_FAILURE); \
}
int
main(int argc, char **argv)
{
int j, ret;
tsk_edge_table_t edges;
ret = tsk_edge_table_init(&edges, 0);
check_tsk_error(ret);
for (j = 0; j < 5; j++) {
ret = tsk_edge_table_add_row(&edges, 0, 1, j + 1, j, NULL, 0);
check_tsk_error(ret);
}
tsk_edge_table_print_state(&edges, stdout);
tsk_edge_table_free(&edges);
return EXIT_SUCCESS;
}
================================================
FILE: c/examples/cpp_sorting_example.cpp
================================================
#include
#include
#include
#include
#include
#include
#include
#include
static void
handle_tskit_return_code(int code)
{
if (code != 0) {
std::ostringstream o;
o << tsk_strerror(code);
throw std::runtime_error(o.str());
}
}
struct edge_plus_time {
double time;
tsk_id_t parent, child;
double left, right;
};
int
sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)
{
if (sorter->tables->edges.metadata_length != 0) {
throw std::invalid_argument(
"the sorter does not currently handle edge metadata");
}
if (start != 0) {
throw std::invalid_argument("the sorter requires start==0");
}
std::vector temp;
temp.reserve(static_cast(sorter->tables->edges.num_rows));
auto edges = &sorter->tables->edges;
auto nodes = &sorter->tables->nodes;
for (tsk_size_t i = 0; i < sorter->tables->edges.num_rows; ++i) {
temp.push_back(edge_plus_time{ nodes->time[edges->parent[i]], edges->parent[i],
edges->child[i], edges->left[i], edges->right[i] });
}
std::sort(begin(temp), end(temp),
[](const edge_plus_time &lhs, const edge_plus_time &rhs) {
if (lhs.time == rhs.time) {
if (lhs.parent == rhs.parent) {
if (lhs.child == rhs.child) {
return lhs.left < rhs.left;
}
return lhs.child < rhs.child;
}
return lhs.parent < rhs.parent;
}
return lhs.time < rhs.time;
});
for (std::size_t i = 0; i < temp.size(); ++i) {
edges->left[i] = temp[i].left;
edges->right[i] = temp[i].right;
edges->parent[i] = temp[i].parent;
edges->child[i] = temp[i].child;
}
return 0;
}
int
main(int argc, char **argv)
{
if (argc != 3) {
std::cerr << "Usage: " << argv[0] << " input.trees output.trees\n";
std::exit(0);
}
const char *infile = argv[1];
const char *outfile = argv[2];
tsk_table_collection_t tables;
auto ret = tsk_table_collection_load(&tables, infile, 0);
handle_tskit_return_code(ret);
tsk_table_sorter_t sorter;
ret = tsk_table_sorter_init(&sorter, &tables, 0);
handle_tskit_return_code(ret);
sorter.sort_edges = sort_edges;
try {
ret = tsk_table_sorter_run(&sorter, NULL);
} catch (std::exception &e) {
std::cerr << e.what() << '\n';
std::exit(1);
}
handle_tskit_return_code(ret);
ret = tsk_table_collection_dump(&tables, outfile, 0);
handle_tskit_return_code(ret);
ret = tsk_table_collection_free(&tables);
handle_tskit_return_code(ret);
}
================================================
FILE: c/examples/error_handling.c
================================================
#include
#include
#include
int
main(int argc, char **argv)
{
int ret;
tsk_treeseq_t ts;
if (argc != 2) {
fprintf(stderr, "usage: ");
exit(EXIT_FAILURE);
}
ret = tsk_treeseq_load(&ts, argv[1], 0);
if (ret < 0) {
/* Error condition. Free and exit */
tsk_treeseq_free(&ts);
fprintf(stderr, "%s", tsk_strerror(ret));
exit(EXIT_FAILURE);
}
printf("Loaded tree sequence with %lld nodes and %lld edges from %s\n",
(long long) tsk_treeseq_get_num_nodes(&ts),
(long long) tsk_treeseq_get_num_edges(&ts), argv[1]);
tsk_treeseq_free(&ts);
return EXIT_SUCCESS;
}
================================================
FILE: c/examples/haploid_wright_fisher.c
================================================
#include
#include
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \
}
void
simulate(tsk_table_collection_t *tables, int N, int T, int simplify_interval)
{
tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;
double breakpoint;
int ret, j, t, b;
assert(simplify_interval != 0); // leads to division by zero
buffer = malloc(2 * N * sizeof(tsk_id_t));
if (buffer == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
tables->sequence_length = 1.0;
parents = buffer;
for (j = 0; j < N; j++) {
parents[j]
= tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);
check_tsk_error(parents[j]);
}
b = 0;
for (t = T - 1; t >= 0; t--) {
/* Alternate between using the first and last N values in the buffer */
parents = buffer + (b * N);
b = (b + 1) % 2;
children = buffer + (b * N);
for (j = 0; j < N; j++) {
child = tsk_node_table_add_row(
&tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);
check_tsk_error(child);
/* NOTE: the use of rand() is discouraged for
* research code and proper random number generator
* libraries should be preferred.
*/
left_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];
right_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];
do {
breakpoint = rand() / (1. + RAND_MAX);
} while (breakpoint == 0); /* tiny proba of breakpoint being 0 */
ret = tsk_edge_table_add_row(
&tables->edges, 0, breakpoint, left_parent, child, NULL, 0);
check_tsk_error(ret);
ret = tsk_edge_table_add_row(
&tables->edges, breakpoint, 1, right_parent, child, NULL, 0);
check_tsk_error(ret);
children[j] = child;
}
if (t % simplify_interval == 0) {
printf("Simplify at generation %lld: (%lld nodes %lld edges)", (long long) t,
(long long) tables->nodes.num_rows, (long long) tables->edges.num_rows);
/* Note: Edges must be sorted for simplify to work, and we use a brute force
* approach of sorting each time here for simplicity. This is inefficient. */
ret = tsk_table_collection_sort(tables, NULL, 0);
check_tsk_error(ret);
ret = tsk_table_collection_simplify(tables, children, N, 0, NULL);
check_tsk_error(ret);
printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows,
(long long) tables->edges.num_rows);
for (j = 0; j < N; j++) {
children[j] = j;
}
}
}
free(buffer);
}
int
main(int argc, char **argv)
{
int ret;
tsk_table_collection_t tables;
if (argc != 6) {
errx(EXIT_FAILURE, "usage: N T simplify-interval output-file seed");
}
ret = tsk_table_collection_init(&tables, 0);
check_tsk_error(ret);
srand((unsigned) atoi(argv[5]));
simulate(&tables, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
/* Sort and index so that the result can be opened as a tree sequence */
ret = tsk_table_collection_sort(&tables, NULL, 0);
check_tsk_error(ret);
ret = tsk_table_collection_build_index(&tables, 0);
check_tsk_error(ret);
ret = tsk_table_collection_dump(&tables, argv[4], 0);
check_tsk_error(ret);
tsk_table_collection_free(&tables);
return 0;
}
================================================
FILE: c/examples/json_struct_metadata.c
================================================
#include
#include
#include
#include
#include
// these are properties of the ``json+struct`` codec, documented in tskit
#define JSON_STRUCT_HEADER_SIZE 21
const uint8_t json_struct_codec_magic[4] = { 'J', 'B', 'L', 'B' };
const uint8_t json_struct_codec_version = 1;
// little-endian read of a uint64_t from an address
static uint64_t
load_u64_le(const uint8_t *p)
{
uint64_t value = (uint64_t) p[0];
value |= (uint64_t) p[1] << 8;
value |= (uint64_t) p[2] << 16;
value |= (uint64_t) p[3] << 24;
value |= (uint64_t) p[4] << 32;
value |= (uint64_t) p[5] << 40;
value |= (uint64_t) p[6] << 48;
value |= (uint64_t) p[7] << 56;
return value;
}
// little-endian write of a uint64_t to an address
static void
set_u64_le(uint8_t *dest, uint64_t value)
{
dest[0] = (uint8_t) (value & 0xFF);
dest[1] = (uint8_t) ((value >> 8) & 0xFF);
dest[2] = (uint8_t) ((value >> 16) & 0xFF);
dest[3] = (uint8_t) ((value >> 24) & 0xFF);
dest[4] = (uint8_t) ((value >> 32) & 0xFF);
dest[5] = (uint8_t) ((value >> 40) & 0xFF);
dest[6] = (uint8_t) ((value >> 48) & 0xFF);
dest[7] = (uint8_t) ((value >> 56) & 0xFF);
}
// Extract the json and binary payloads from the `json+struct` codec data buffer.
// Note that the output pointers `json` and `binary` reference memory
// inside the `metadata` buffer passed in.
void
json_struct_codec_get_components(uint8_t *metadata, tsk_size_t metadata_length,
uint8_t **json, tsk_size_t *json_length, uint8_t **binary, tsk_size_t *binary_length)
{
// check the structure of the codec header and the sizes it specifies
if (metadata == NULL || json == NULL || json_length == NULL || binary == NULL
|| binary_length == NULL)
errx(EXIT_FAILURE, "bad parameter value.");
if (metadata_length < JSON_STRUCT_HEADER_SIZE)
errx(EXIT_FAILURE, "metadata truncated.");
if (memcmp(metadata, json_struct_codec_magic, sizeof(json_struct_codec_magic)) != 0)
errx(EXIT_FAILURE, "bad magic bytes.");
uint8_t version = metadata[4];
if (version != json_struct_codec_version)
errx(EXIT_FAILURE, "bad version number.");
uint64_t json_length_u64 = load_u64_le(metadata + 5);
uint64_t binary_length_u64 = load_u64_le(metadata + 13);
if (json_length_u64 > UINT64_MAX - (uint64_t) JSON_STRUCT_HEADER_SIZE)
errx(EXIT_FAILURE, "invalid length.");
// determine the number of padding bytes and do more safety checks
uint64_t length = (uint64_t) JSON_STRUCT_HEADER_SIZE + json_length_u64;
uint64_t padding_length = (8 - (length & 0x07)) % 8;
if (padding_length > UINT64_MAX - length)
errx(EXIT_FAILURE, "invalid length.");
length += padding_length;
if (binary_length_u64 > UINT64_MAX - length)
errx(EXIT_FAILURE, "invalid length.");
length += binary_length_u64;
if ((uint64_t) metadata_length != length)
errx(EXIT_FAILURE, "unexpected size.");
uint8_t *padding_start = metadata + JSON_STRUCT_HEADER_SIZE + json_length_u64;
for (uint64_t j = 0; j < padding_length; ++j)
if (*(padding_start + j) != 0)
errx(EXIT_FAILURE, "padding bytes are nonzero.");
// the structure of the codec data seems valid; return components
*json = metadata + JSON_STRUCT_HEADER_SIZE;
*json_length = (tsk_size_t) json_length_u64;
*binary = metadata + JSON_STRUCT_HEADER_SIZE + json_length_u64 + padding_length;
*binary_length = (tsk_size_t) binary_length_u64;
}
// malloc and return a data buffer for the `json+struct` codec
// that contains the given components
void
json_struct_codec_create_buffer(const uint8_t *json, tsk_size_t json_length,
const uint8_t *binary, tsk_size_t binary_length, uint8_t **buffer,
tsk_size_t *buffer_length)
{
// figure out the total length of the codec's data and allocate the buffer for it
tsk_size_t header_length = JSON_STRUCT_HEADER_SIZE;
tsk_size_t padding_length = (8 - ((header_length + json_length) & 0x07)) % 8;
tsk_size_t total_length
= header_length + json_length + padding_length + binary_length;
uint8_t *bytes = malloc(total_length);
if (!bytes)
errx(EXIT_FAILURE, "memory for buffer could not be allocated.");
// then set up the bytes for the codec header
memcpy(bytes, json_struct_codec_magic, 4);
bytes[4] = json_struct_codec_version;
set_u64_le(bytes + 5, (uint64_t) json_length);
set_u64_le(bytes + 13, (uint64_t) binary_length);
// copy in the JSON and binary data, separated by the padding bytes; the goal of the
// padding bytes is to ensure that the binary data is 8-byte-aligned relative to the
// start of the buffer
memcpy(bytes + header_length, json, json_length);
memset(bytes + header_length + json_length, 0, padding_length);
memcpy(bytes + header_length + json_length + padding_length, binary, binary_length);
// return the buffer and its length; the caller takes ownership of the buffer
*buffer = bytes;
*buffer_length = total_length;
}
int
main(int argc, char **argv)
{
// we start with JSON and binary payloads that we encode into a new buffer
// note that the JSON payload does not have to end with a trailing NULL
const char json_payload[] = { '{', '"', 'a', '"', ':', '1', '}' };
const uint8_t binary_payload[] = { 0x01, 0x02, 0x03, 0x04 };
uint8_t *metadata;
tsk_size_t metadata_length;
json_struct_codec_create_buffer((const uint8_t *) json_payload, sizeof(json_payload),
binary_payload, sizeof(binary_payload), &metadata, &metadata_length);
// then we decode that buffer to recover the json and binary data
uint8_t *decoded_json, *decoded_binary;
tsk_size_t decoded_json_length, decoded_binary_length;
json_struct_codec_get_components(metadata, metadata_length, &decoded_json,
&decoded_json_length, &decoded_binary, &decoded_binary_length);
// print the recovered data to demonstrate that the round-trip worked
// note that the JSON data is not NULL-terminated unless you put a NULL there!
printf("JSON: %.*s\n", (int) decoded_json_length, decoded_json);
printf("Binary data:");
for (tsk_size_t j = 0; j < decoded_binary_length; j++)
printf(" %#04x", decoded_binary[j]);
printf("\n");
free(metadata);
return EXIT_SUCCESS;
}
================================================
FILE: c/examples/multichrom_wright_fisher.c
================================================
#include
#include
#include
#include
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
errx(EXIT_FAILURE, "line %d: %s\n", __LINE__, tsk_strerror(val)); \
}
static void
init_tables(tsk_table_collection_t *tcs, int num_chroms)
{
int j, ret;
for (j = 0; j < num_chroms; j++) {
ret = tsk_table_collection_init(&tcs[j], 0);
check_tsk_error(ret);
if (j > 0) {
tsk_node_table_free(&tcs[j].nodes);
}
}
}
static void
free_tables(tsk_table_collection_t *tcs, int num_chroms)
{
int j;
for (j = 0; j < num_chroms; j++) {
if (j > 0) {
/* Must not double free node table columns. */
memset(&tcs[j].nodes, 0, sizeof(tcs[j].nodes));
}
tsk_table_collection_free(&tcs[j]);
}
}
static void
join_tables(tsk_table_collection_t *tcs, int num_chroms)
{
int j, ret;
for (j = 1; j < num_chroms; j++) {
ret = tsk_edge_table_extend(
&tcs[0].edges, &tcs[j].edges, tcs[j].edges.num_rows, NULL, 0);
check_tsk_error(ret);
}
/* Get all the squashable edges next to each other */
ret = tsk_table_collection_sort(&tcs[0], NULL, 0);
check_tsk_error(ret);
ret = tsk_edge_table_squash(&tcs[0].edges);
check_tsk_error(ret);
/* We need to sort again after squash */
ret = tsk_table_collection_sort(&tcs[0], NULL, 0);
check_tsk_error(ret);
ret = tsk_table_collection_build_index(&tcs[0], 0);
check_tsk_error(ret);
}
struct chunk_work {
int chunk;
tsk_table_collection_t *tc;
int *samples;
int N;
};
void *
simplify_chunk(void *arg)
{
int ret;
struct chunk_work *work = (struct chunk_work *) arg;
tsk_size_t edges_before = work->tc->edges.num_rows;
ret = tsk_table_collection_sort(work->tc, NULL, 0);
check_tsk_error(ret);
ret = tsk_table_collection_simplify(work->tc, work->samples, work->N,
TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, NULL);
check_tsk_error(ret);
/* NOTE: this printf makes helgrind complain */
printf("\tchunk %d: %lld -> %lld\n", work->chunk, (long long) edges_before,
(long long) work->tc->edges.num_rows);
return NULL;
}
void
sort_and_simplify_all(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)
{
int j, ret;
struct chunk_work work[num_chroms];
pthread_t threads[num_chroms];
for (j = 1; j < num_chroms; j++) {
tcs[j].nodes = tcs[0].nodes;
}
for (j = 0; j < num_chroms; j++) {
work[j].chunk = j;
work[j].tc = &tcs[j];
work[j].samples = samples;
work[j].N = N;
ret = pthread_create(&threads[j], NULL, simplify_chunk, (void *) &work[j]);
if (ret != 0) {
errx(EXIT_FAILURE, "Pthread create failed");
}
/* simplify_chunk((void *) &work[j]); */
}
for (j = 0; j < num_chroms; j++) {
ret = pthread_join(threads[j], NULL);
if (ret != 0) {
errx(EXIT_FAILURE, "Pthread join failed");
}
}
}
void
simplify_tables(tsk_table_collection_t *tcs, int num_chroms, int *samples, int N)
{
int j, k, num_edges, ret;
const tsk_size_t num_nodes = tcs[0].nodes.num_rows;
tsk_bool_t *keep_nodes = malloc(num_nodes * sizeof(*keep_nodes));
tsk_id_t *node_id_map = malloc(num_nodes * sizeof(*node_id_map));
tsk_id_t *edge_child, *edge_parent;
if (keep_nodes == NULL || node_id_map == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
printf("Simplify %lld nodes\n", (long long) tcs[0].nodes.num_rows);
sort_and_simplify_all(tcs, num_chroms, samples, N);
for (j = 0; j < num_nodes; j++) {
keep_nodes[j] = false;
tcs[0].nodes.flags[j] &= (~TSK_NODE_IS_SAMPLE);
}
for (j = 0; j < N; j++) {
keep_nodes[samples[j]] = true;
tcs[0].nodes.flags[samples[j]] |= TSK_NODE_IS_SAMPLE;
}
for (j = 0; j < num_chroms; j++) {
edge_child = tcs[j].edges.child;
edge_parent = tcs[j].edges.parent;
num_edges = tcs[j].edges.num_rows;
for (k = 0; k < num_edges; k++) {
keep_nodes[edge_child[k]] = true;
keep_nodes[edge_parent[k]] = true;
}
}
tsk_node_table_keep_rows(&tcs[0].nodes, keep_nodes, 0, node_id_map);
printf("\tdone: %lld nodes\n", (long long) tcs[0].nodes.num_rows);
/* Remap node references */
for (j = 0; j < num_chroms; j++) {
edge_child = tcs[j].edges.child;
edge_parent = tcs[j].edges.parent;
num_edges = tcs[j].edges.num_rows;
for (k = 0; k < num_edges; k++) {
edge_child[k] = node_id_map[edge_child[k]];
edge_parent[k] = node_id_map[edge_parent[k]];
}
ret = tsk_table_collection_check_integrity(&tcs[j], 0);
check_tsk_error(ret);
}
for (j = 0; j < N; j++) {
samples[j] = node_id_map[samples[j]];
}
free(keep_nodes);
free(node_id_map);
}
void
simulate(
tsk_table_collection_t *tcs, int num_chroms, int N, int T, int simplify_interval)
{
tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;
bool left_is_first;
double chunk_left, chunk_right;
int ret, j, t, b, k;
assert(simplify_interval != 0); // leads to division by zero
buffer = malloc(2 * N * sizeof(tsk_id_t));
if (buffer == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
for (k = 0; k < num_chroms; k++) {
tcs[k].sequence_length = num_chroms;
}
parents = buffer;
for (j = 0; j < N; j++) {
parents[j]
= tsk_node_table_add_row(&tcs[0].nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);
check_tsk_error(parents[j]);
}
b = 0;
for (t = T - 1; t >= 0; t--) {
/* Alternate between using the first and last N values in the buffer */
parents = buffer + (b * N);
b = (b + 1) % 2;
children = buffer + (b * N);
for (j = 0; j < N; j++) {
child = tsk_node_table_add_row(
&tcs[0].nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);
check_tsk_error(child);
/* NOTE: the use of rand() is discouraged for
* research code and proper random number generator
* libraries should be preferred.
*/
left_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];
right_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];
left_is_first = rand() < 0.5;
chunk_left = 0.0;
for (k = 0; k < num_chroms; k++) {
chunk_right = chunk_left + rand() / (1. + RAND_MAX);
/* a very tiny chance that right and left are equal */
if (chunk_right > chunk_left) {
ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_left, chunk_right,
left_is_first ? left_parent : right_parent, child, NULL, 0);
check_tsk_error(ret);
}
chunk_left += 1.0;
if (chunk_right < chunk_left) {
ret = tsk_edge_table_add_row(&tcs[k].edges, chunk_right, chunk_left,
left_is_first ? right_parent : left_parent, child, NULL, 0);
check_tsk_error(ret);
}
}
children[j] = child;
}
if (t % simplify_interval == 0) {
simplify_tables(tcs, num_chroms, children, N);
}
}
/* Set the sample flags for final generation */
for (j = 0; j < N; j++) {
tcs[0].nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;
}
free(buffer);
}
int
main(int argc, char **argv)
{
int ret;
int num_chroms;
if (argc != 7) {
errx(EXIT_FAILURE, "usage: N T simplify-interval output seed num-chroms");
}
num_chroms = atoi(argv[6]);
tsk_table_collection_t tcs[num_chroms];
srand((unsigned) atoi(argv[5]));
init_tables(tcs, num_chroms);
simulate(tcs, num_chroms, atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
join_tables(tcs, num_chroms);
ret = tsk_table_collection_dump(&tcs[0], argv[4], 0);
check_tsk_error(ret);
free_tables(tcs, num_chroms);
return 0;
}
================================================
FILE: c/examples/multichrom_wright_fisher_singlethreaded.c
================================================
#include
#include
#include
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
errx(EXIT_FAILURE, "line %d: %s\n", __LINE__, tsk_strerror(val)); \
}
void
simulate(
tsk_table_collection_t *tables, int num_chroms, int N, int T, int simplify_interval)
{
tsk_id_t *buffer, *parents, *children, child, left_parent, right_parent;
bool left_is_first;
double chunk_left, chunk_right;
int ret, j, t, b, k;
assert(simplify_interval != 0); // leads to division by zero
buffer = malloc(2 * N * sizeof(tsk_id_t));
if (buffer == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
tables->sequence_length = num_chroms;
parents = buffer;
for (j = 0; j < N; j++) {
parents[j]
= tsk_node_table_add_row(&tables->nodes, 0, T, TSK_NULL, TSK_NULL, NULL, 0);
check_tsk_error(parents[j]);
}
b = 0;
for (t = T - 1; t >= 0; t--) {
/* Alternate between using the first and last N values in the buffer */
parents = buffer + (b * N);
b = (b + 1) % 2;
children = buffer + (b * N);
for (j = 0; j < N; j++) {
child = tsk_node_table_add_row(
&tables->nodes, 0, t, TSK_NULL, TSK_NULL, NULL, 0);
check_tsk_error(child);
/* NOTE: the use of rand() is discouraged for
* research code and proper random number generator
* libraries should be preferred.
*/
left_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];
right_parent = parents[(size_t) ((rand() / (1. + RAND_MAX)) * N)];
left_is_first = rand() < 0.5;
chunk_left = 0.0;
for (k = 0; k < num_chroms; k++) {
chunk_right = chunk_left + rand() / (1. + RAND_MAX);
/* a very tiny chance that right and left are equal */
if (chunk_right > chunk_left) {
ret = tsk_edge_table_add_row(&tables->edges, chunk_left, chunk_right,
left_is_first ? left_parent : right_parent, child, NULL, 0);
check_tsk_error(ret);
}
chunk_left += 1.0;
if (chunk_right < chunk_left) {
ret = tsk_edge_table_add_row(&tables->edges, chunk_right, chunk_left,
left_is_first ? right_parent : left_parent, child, NULL, 0);
check_tsk_error(ret);
}
}
children[j] = child;
}
if (t % simplify_interval == 0) {
printf("Simplify at generation %lld: (%lld nodes %lld edges)", (long long) t,
(long long) tables->nodes.num_rows, (long long) tables->edges.num_rows);
/* Note: Edges must be sorted for simplify to work, and we use a brute force
* approach of sorting each time here for simplicity. This is inefficient. */
ret = tsk_table_collection_sort(tables, NULL, 0);
check_tsk_error(ret);
ret = tsk_table_collection_simplify(tables, children, N, 0, NULL);
check_tsk_error(ret);
printf(" -> (%lld nodes %lld edges)\n", (long long) tables->nodes.num_rows,
(long long) tables->edges.num_rows);
for (j = 0; j < N; j++) {
children[j] = j;
}
}
}
/* Set the sample flags for final generation */
for (j = 0; j < N; j++) {
tables->nodes.flags[children[j]] = TSK_NODE_IS_SAMPLE;
}
free(buffer);
}
int
main(int argc, char **argv)
{
int ret;
tsk_table_collection_t tables;
if (argc != 7) {
errx(EXIT_FAILURE, "usage: N T simplify-interval output seed num-chroms");
}
ret = tsk_table_collection_init(&tables, 0);
check_tsk_error(ret);
srand((unsigned) atoi(argv[5]));
simulate(&tables, atoi(argv[6]), atoi(argv[1]), atoi(argv[2]), atoi(argv[3]));
/* Sort and index so that the result can be opened as a tree sequence */
ret = tsk_table_collection_sort(&tables, NULL, 0);
check_tsk_error(ret);
ret = tsk_table_collection_build_index(&tables, 0);
check_tsk_error(ret);
ret = tsk_table_collection_dump(&tables, argv[4], 0);
check_tsk_error(ret);
tsk_table_collection_free(&tables);
return 0;
}
================================================
FILE: c/examples/streaming.c
================================================
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
fprintf(stderr, "Error: line %d: %s\n", __LINE__, tsk_strerror(val)); \
exit(EXIT_FAILURE); \
}
int
main(int argc, char **argv)
{
int ret;
int j = 0;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
check_tsk_error(ret);
while (true) {
ret = tsk_table_collection_loadf(&tables, stdin, TSK_NO_INIT);
if (ret == TSK_ERR_EOF) {
break;
}
check_tsk_error(ret);
fprintf(stderr, "Tree sequence %d had %lld mutations\n", j,
(long long) tables.mutations.num_rows);
ret = tsk_mutation_table_truncate(&tables.mutations, 0);
check_tsk_error(ret);
ret = tsk_table_collection_dumpf(&tables, stdout, 0);
check_tsk_error(ret);
j++;
}
tsk_table_collection_free(&tables);
return EXIT_SUCCESS;
}
================================================
FILE: c/examples/take_ownership.c
================================================
#include
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \
}
int
main(int argc, char **argv)
{
tsk_table_collection_t *tables;
tsk_treeseq_t treeseq;
int rv;
tables = malloc(sizeof(*tables));
rv = tsk_table_collection_init(tables, 0);
check_tsk_error(rv);
/* NOTE: you must set sequence length AFTER initialization */
tables->sequence_length = 1.0;
/* Do your regular table operations */
rv = tsk_node_table_add_row(&tables->nodes, 0, 0.0, -1, -1, NULL, 0);
check_tsk_error(rv);
/* Initalize the tree sequence, transferring all responsibility
* for the table collection's memory managment
*/
rv = tsk_treeseq_init(
&treeseq, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TAKE_OWNERSHIP);
check_tsk_error(rv);
/* WARNING: calling tsk_table_collection_free is now a memory error! */
tsk_treeseq_free(&treeseq);
}
================================================
FILE: c/examples/tree_iteration.c
================================================
#include
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \
}
int
main(int argc, char **argv)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t tree;
if (argc != 2) {
errx(EXIT_FAILURE, "usage: ");
}
ret = tsk_treeseq_load(&ts, argv[1], 0);
check_tsk_error(ret);
ret = tsk_tree_init(&tree, &ts, 0);
check_tsk_error(ret);
printf("Iterate forwards\n");
for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {
printf("\ttree %lld has %lld roots\n", (long long) tree.index,
(long long) tsk_tree_get_num_roots(&tree));
}
check_tsk_error(ret);
printf("Iterate backwards\n");
for (ret = tsk_tree_last(&tree); ret == TSK_TREE_OK; ret = tsk_tree_prev(&tree)) {
printf("\ttree %lld has %lld roots\n", (long long) tree.index,
(long long) tsk_tree_get_num_roots(&tree));
}
check_tsk_error(ret);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
return 0;
}
================================================
FILE: c/examples/tree_traversal.c
================================================
#include
#include
#include
#include
#define check_tsk_error(val) \
if (val < 0) { \
errx(EXIT_FAILURE, "line %d: %s", __LINE__, tsk_strerror(val)); \
}
static void
traverse_standard(const tsk_tree_t *tree)
{
int ret;
tsk_size_t num_nodes, j;
tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));
if (nodes == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
ret = tsk_tree_preorder(tree, nodes, &num_nodes);
check_tsk_error(ret);
for (j = 0; j < num_nodes; j++) {
printf("Visit preorder %lld\n", (long long) nodes[j]);
}
ret = tsk_tree_postorder(tree, nodes, &num_nodes);
check_tsk_error(ret);
for (j = 0; j < num_nodes; j++) {
printf("Visit postorder %lld\n", (long long) nodes[j]);
}
free(nodes);
}
static void
_traverse(const tsk_tree_t *tree, tsk_id_t u, int depth)
{
tsk_id_t v;
int j;
for (j = 0; j < depth; j++) {
printf(" ");
}
printf("Visit recursive %lld\n", (long long) u);
for (v = tree->left_child[u]; v != TSK_NULL; v = tree->right_sib[v]) {
_traverse(tree, v, depth + 1);
}
}
static void
traverse_recursive(const tsk_tree_t *tree)
{
_traverse(tree, tree->virtual_root, -1);
}
static void
traverse_stack(const tsk_tree_t *tree)
{
int stack_top;
tsk_id_t u, v;
tsk_id_t *stack = malloc(tsk_tree_get_size_bound(tree) * sizeof(*stack));
if (stack == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
stack_top = 0;
stack[stack_top] = tree->virtual_root;
while (stack_top >= 0) {
u = stack[stack_top];
stack_top--;
printf("Visit stack %lld\n", (long long) u);
/* Put nodes on the stack right-to-left, so we visit in left-to-right */
for (v = tree->right_child[u]; v != TSK_NULL; v = tree->left_sib[v]) {
stack_top++;
stack[stack_top] = v;
}
}
free(stack);
}
static void
traverse_upwards(const tsk_tree_t *tree)
{
const tsk_id_t *samples = tsk_treeseq_get_samples(tree->tree_sequence);
tsk_size_t num_samples = tsk_treeseq_get_num_samples(tree->tree_sequence);
tsk_size_t j;
tsk_id_t u;
for (j = 0; j < num_samples; j++) {
u = samples[j];
while (u != TSK_NULL) {
printf("Visit upwards: %lld\n", (long long) u);
u = tree->parent[u];
}
}
}
int
main(int argc, char **argv)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t tree;
if (argc != 2) {
errx(EXIT_FAILURE, "usage: ");
}
ret = tsk_treeseq_load(&ts, argv[1], 0);
check_tsk_error(ret);
ret = tsk_tree_init(&tree, &ts, 0);
check_tsk_error(ret);
ret = tsk_tree_first(&tree);
check_tsk_error(ret);
traverse_standard(&tree);
traverse_recursive(&tree);
traverse_stack(&tree);
traverse_upwards(&tree);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
return 0;
}
================================================
FILE: c/meson.build
================================================
project('tskit', ['c', 'cpp'],
version: files('VERSION.txt'),
default_options: ['c_std=c99', 'cpp_std=c++11']
)
debug_c_args = []
if get_option('buildtype').startswith('debug')
debug_c_args = ['-DTSK_TRACE_ERRORS']
endif
kastore_proj = subproject('kastore')
kastore_dep = kastore_proj.get_variable('kastore_dep')
kastore_inc = kastore_proj.get_variable('kastore_inc')
cc = meson.get_compiler('c')
m_dep = cc.find_library('m', required: false)
lib_deps = [m_dep, kastore_dep]
extra_c_args = [
'-Wall', '-Wextra', '-Werror', '-Wpedantic', '-W',
'-Wmissing-prototypes', '-Wstrict-prototypes',
'-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align',
'-Wcast-qual', '-Wwrite-strings', '-Wnested-externs',
'-fshort-enums', '-fno-common'] + debug_c_args
lib_sources = [
'tskit/core.c', 'tskit/tables.c', 'tskit/trees.c',
'tskit/genotypes.c', 'tskit/stats.c', 'tskit/convert.c', 'tskit/haplotype_matching.c']
lib_headers = [
'tskit/core.h', 'tskit/tables.h', 'tskit/trees.h',
'tskit/genotypes.h', 'tskit/stats.h', 'tskit/convert.h', 'tskit/haplotype_matching.h']
# Subprojects use the static library for simplicity.
tskit_inc = [kastore_inc, include_directories(['.'])]
tskit_lib = static_library('tskit',
sources: lib_sources, dependencies: lib_deps)
tskit_dep = declare_dependency(include_directories:tskit_inc, link_with: tskit_lib)
if not meson.is_subproject()
# Shared library install target.
shared_library('tskit',
sources: lib_sources, dependencies: lib_deps, c_args: extra_c_args, install: true)
install_headers('tskit.h')
install_headers(lib_headers, subdir: 'tskit')
cunit_dep = dependency('cunit')
# We don't specify extra C args here as CUnit won't pass the checks.
test_lib = static_library('testlib',
sources: ['tests/testlib.c'], dependencies: [cunit_dep, kastore_dep, tskit_dep])
test_core = executable('test_core',
sources: ['tests/test_core.c'],
link_with: [tskit_lib, test_lib],
c_args: extra_c_args+['-DMESON_PROJECT_VERSION="@0@"'.format(meson.project_version())],
dependencies: kastore_dep,
)
test('core', test_core)
test_tables = executable('test_tables',
sources: ['tests/test_tables.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('tables', test_tables)
test_trees = executable('test_trees',
sources: ['tests/test_trees.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('trees', test_trees)
test_genotypes = executable('test_genotypes',
sources: ['tests/test_genotypes.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('genotypes', test_genotypes)
test_convert = executable('test_convert',
sources: ['tests/test_convert.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('convert', test_convert)
test_stats = executable('test_stats',
sources: ['tests/test_stats.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('stats', test_stats)
test_haplotype_matching = executable('test_haplotype_matching',
sources: ['tests/test_haplotype_matching.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('haplotype_matching', test_haplotype_matching)
test_file_format = executable('test_file_format',
sources: ['tests/test_file_format.c'],
link_with: [tskit_lib, test_lib], c_args: extra_c_args, dependencies: kastore_dep)
test('file_format', test_file_format)
test_minimal_cpp = executable('test_minimal_cpp',
sources: ['tests/test_minimal_cpp.cpp'], link_with: [tskit_lib],
dependencies: kastore_dep)
test('minimal_cpp', test_minimal_cpp)
if get_option('build_examples')
# These example programs use less portable features,
# and we don't want to always compile them. Use, e.g.,
# meson build -Dbuild_examples=false
executable('api_structure',
sources: ['examples/api_structure.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('error_handling',
sources: ['examples/error_handling.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('tree_iteration',
sources: ['examples/tree_iteration.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('tree_traversal',
sources: ['examples/tree_traversal.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('streaming',
sources: ['examples/streaming.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('cpp_sorting_example',
sources: ['examples/cpp_sorting_example.cpp'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('haploid_wright_fisher',
sources: ['examples/haploid_wright_fisher.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('multichrom_wright_fisher_singlethreaded',
sources: ['examples/multichrom_wright_fisher_singlethreaded.c'],
link_with: [tskit_lib], dependencies: lib_deps)
executable('json_struct_metadata',
sources: ['examples/json_struct_metadata.c'],
link_with: [tskit_lib], dependencies: lib_deps)
thread_dep = dependency('threads')
executable('multichrom_wright_fisher',
sources: ['examples/multichrom_wright_fisher.c'],
link_with: [tskit_lib], dependencies: [m_dep, kastore_dep, thread_dep])
endif
endif
================================================
FILE: c/meson_options.txt
================================================
option('build_examples', type : 'boolean', value : true)
================================================
FILE: c/subprojects/kastore/README.md
================================================
This directory is an abbreviated version of the kastore distribution source.
All files should be updated when we are updating to a new kastore version.
================================================
FILE: c/subprojects/kastore/VERSION.txt
================================================
2.1.2
================================================
FILE: c/subprojects/kastore/kastore.c
================================================
#include
#include
#include
#include
#include
#include
#include "kastore.h"
/* Private flag used to indicate when we have opened the file ourselves
* and need to free it. */
/* Note: we use 1<<14 to keep this flag at the end of the flag space,
* and this is the highest bit that can be guaranteed to fit into
* an int. */
#define OWN_FILE (1 << 14)
const char *
kas_strerror(int err)
{
const char *ret = "Unknown error";
switch (err) {
case KAS_ERR_GENERIC:
ret = "Generic error; please file a bug report";
break;
case KAS_ERR_IO:
if (errno != 0) {
ret = strerror(errno);
} else {
ret = "I/O error with errno unset. Please file a bug report";
}
break;
case KAS_ERR_BAD_MODE:
ret = "Bad open mode; must be \"r\", \"w\", or \"a\"";
break;
case KAS_ERR_BAD_FLAGS:
ret = "Unknown flags specified. Only (KAS_GET_TAKES_OWNERSHIP and/or"
"KAS_READ_ALL) or 0 can be specified "
"for open, and KAS_BORROWS_ARRAY or 0 for put";
break;
case KAS_ERR_NO_MEMORY:
ret = "Out of memory";
break;
case KAS_ERR_BAD_FILE_FORMAT:
ret = "File not in KAS format";
break;
case KAS_ERR_VERSION_TOO_OLD:
ret = "File format version is too old. Please upgrade using "
"'kas upgrade '";
break;
case KAS_ERR_VERSION_TOO_NEW:
ret = "File format version is too new. Please upgrade your "
"kastore library version";
break;
case KAS_ERR_BAD_TYPE:
ret = "Unknown data type";
break;
case KAS_ERR_DUPLICATE_KEY:
ret = "Duplicate key provided";
break;
case KAS_ERR_KEY_NOT_FOUND:
ret = "Key not found";
break;
case KAS_ERR_EMPTY_KEY:
ret = "Keys cannot be empty";
break;
case KAS_ERR_ILLEGAL_OPERATION:
ret = "Cannot perform the requested operation in the current mode";
break;
case KAS_ERR_TYPE_MISMATCH:
ret = "Mismatch between requested and stored types for array";
break;
case KAS_ERR_EOF:
ret = "End of file";
break;
}
return ret;
}
kas_version_t
kas_version(void)
{
kas_version_t version;
version.major = KAS_VERSION_MAJOR;
version.minor = KAS_VERSION_MINOR;
version.patch = KAS_VERSION_PATCH;
return version;
}
static size_t
type_size(int type)
{
const size_t type_size_map[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
assert(type < KAS_NUM_TYPES);
return type_size_map[type];
}
/* Compare item keys lexicographically. */
static int
compare_items(const void *a, const void *b)
{
const kaitem_t *ia = (const kaitem_t *) a;
const kaitem_t *ib = (const kaitem_t *) b;
size_t len = ia->key_len < ib->key_len ? ia->key_len : ib->key_len;
int ret = memcmp(ia->key, ib->key, len);
if (ret == 0) {
ret = (ia->key_len > ib->key_len) - (ia->key_len < ib->key_len);
}
return ret;
}
/* When a read error occurs we don't know whether this is because the file
* ended unexpectedly or an IO error occured. If the file ends unexpectedly
* this is a file format error.
*/
static int KAS_WARN_UNUSED
kastore_get_read_io_error(kastore_t *self)
{
int ret = KAS_ERR_IO;
if (feof(self->file) || errno == 0) {
ret = KAS_ERR_BAD_FILE_FORMAT;
}
return ret;
}
static int KAS_WARN_UNUSED
kastore_write_header(kastore_t *self)
{
int ret = 0;
char header[KAS_HEADER_SIZE];
uint16_t version_major = KAS_FILE_VERSION_MAJOR;
uint16_t version_minor = KAS_FILE_VERSION_MINOR;
uint32_t num_items = (uint32_t) self->num_items;
uint64_t file_size = (uint64_t) self->file_size;
memset(header, 0, sizeof(header));
memcpy(header, KAS_MAGIC, 8);
memcpy(header + 8, &version_major, 2);
memcpy(header + 10, &version_minor, 2);
memcpy(header + 12, &num_items, 4);
memcpy(header + 16, &file_size, 8);
/* Rest of header is reserved */
if (fwrite(header, KAS_HEADER_SIZE, 1, self->file) != 1) {
ret = KAS_ERR_IO;
goto out;
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_read_header(kastore_t *self)
{
int ret = 0;
char header[KAS_HEADER_SIZE];
uint16_t version_major, version_minor;
uint32_t num_items;
uint64_t file_size;
size_t count;
count = fread(header, 1, KAS_HEADER_SIZE, self->file);
if (count == 0 && feof(self->file)) {
ret = KAS_ERR_EOF;
goto out;
} else if (count != KAS_HEADER_SIZE) {
ret = kastore_get_read_io_error(self);
goto out;
}
if (strncmp(header, KAS_MAGIC, 8) != 0) {
ret = KAS_ERR_BAD_FILE_FORMAT;
goto out;
}
memcpy(&version_major, header + 8, 2);
memcpy(&version_minor, header + 10, 2);
memcpy(&num_items, header + 12, 4);
memcpy(&file_size, header + 16, 8);
self->file_version[0] = (int) version_major;
self->file_version[1] = (int) version_minor;
if (self->file_version[0] < KAS_FILE_VERSION_MAJOR) {
ret = KAS_ERR_VERSION_TOO_OLD;
goto out;
} else if (self->file_version[0] > KAS_FILE_VERSION_MAJOR) {
ret = KAS_ERR_VERSION_TOO_NEW;
goto out;
}
self->num_items = num_items;
self->file_size = (size_t) file_size;
if (self->file_size < KAS_HEADER_SIZE) {
ret = KAS_ERR_BAD_FILE_FORMAT;
goto out;
}
out:
return ret;
}
/* Compute the locations of the keys and arrays in the file. */
static void
kastore_pack_items(kastore_t *self)
{
size_t j, offset, remainder;
/* Pack the keys */
offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;
for (j = 0; j < self->num_items; j++) {
self->items[j].key_start = offset;
offset += self->items[j].key_len;
}
/* Pack the arrays */
for (j = 0; j < self->num_items; j++) {
remainder = offset % KAS_ARRAY_ALIGN;
if (remainder != 0) {
offset += KAS_ARRAY_ALIGN - remainder;
}
self->items[j].array_start = offset;
offset += self->items[j].array_len * type_size(self->items[j].type);
}
self->file_size = offset;
}
static int KAS_WARN_UNUSED
kastore_write_descriptors(kastore_t *self)
{
int ret = 0;
size_t j;
uint8_t type;
uint64_t key_start, key_len, array_start, array_len;
char descriptor[KAS_ITEM_DESCRIPTOR_SIZE];
for (j = 0; j < self->num_items; j++) {
memset(descriptor, 0, KAS_ITEM_DESCRIPTOR_SIZE);
type = (uint8_t) self->items[j].type;
key_start = (uint64_t) self->items[j].key_start;
key_len = (uint64_t) self->items[j].key_len;
array_start = (uint64_t) self->items[j].array_start;
array_len = (uint64_t) self->items[j].array_len;
memcpy(descriptor, &type, 1);
/* Bytes 1-8 are reserved */
memcpy(descriptor + 8, &key_start, 8);
memcpy(descriptor + 16, &key_len, 8);
memcpy(descriptor + 24, &array_start, 8);
memcpy(descriptor + 32, &array_len, 8);
/* Rest of descriptor is reserved */
if (fwrite(descriptor, sizeof(descriptor), 1, self->file) != 1) {
ret = KAS_ERR_IO;
goto out;
}
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_read_descriptors(kastore_t *self)
{
int ret = KAS_ERR_BAD_FILE_FORMAT;
size_t j;
uint8_t type;
uint64_t key_start, key_len, array_start, array_len;
char *descriptor;
size_t descriptor_offset, offset, remainder, size, count;
char *read_buffer = NULL;
size = self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;
if (size + KAS_HEADER_SIZE > self->file_size) {
goto out;
}
read_buffer = (char *) malloc(size);
if (read_buffer == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
count = fread(read_buffer, size, 1, self->file);
if (count == 0) {
ret = kastore_get_read_io_error(self);
goto out;
}
descriptor_offset = 0;
for (j = 0; j < self->num_items; j++) {
descriptor = read_buffer + descriptor_offset;
descriptor_offset += KAS_ITEM_DESCRIPTOR_SIZE;
memcpy(&type, descriptor, 1);
memcpy(&key_start, descriptor + 8, 8);
memcpy(&key_len, descriptor + 16, 8);
memcpy(&array_start, descriptor + 24, 8);
memcpy(&array_len, descriptor + 32, 8);
if (type >= KAS_NUM_TYPES) {
ret = KAS_ERR_BAD_TYPE;
goto out;
}
self->items[j].type = (int) type;
if (key_start + key_len > self->file_size) {
goto out;
}
self->items[j].key_start = (size_t) key_start;
self->items[j].key_len = (size_t) key_len;
if (array_start + array_len * type_size(type) > self->file_size) {
goto out;
}
self->items[j].array_start = (size_t) array_start;
self->items[j].array_len = (size_t) array_len;
}
/* Check the integrity of the key and array packing. Keys must
* be packed sequentially starting immediately after the descriptors. */
offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;
for (j = 0; j < self->num_items; j++) {
if (self->items[j].key_start != offset) {
ret = KAS_ERR_BAD_FILE_FORMAT;
goto out;
}
offset += self->items[j].key_len;
}
for (j = 0; j < self->num_items; j++) {
/* Arrays are 8 byte aligned and adjacent */
remainder = offset % KAS_ARRAY_ALIGN;
if (remainder != 0) {
offset += KAS_ARRAY_ALIGN - remainder;
}
if (self->items[j].array_start != offset) {
ret = KAS_ERR_BAD_FILE_FORMAT;
goto out;
}
offset += self->items[j].array_len * type_size(self->items[j].type);
}
if (offset != self->file_size) {
ret = KAS_ERR_BAD_FILE_FORMAT;
goto out;
}
ret = 0;
out:
kas_safe_free(read_buffer);
return ret;
}
static int KAS_WARN_UNUSED
kastore_write_data(kastore_t *self)
{
int ret = 0;
size_t j, size, offset, padding;
char pad[KAS_ARRAY_ALIGN] = { 0, 0, 0, 0, 0, 0, 0 };
const void *write_array;
offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;
/* Write the keys. */
for (j = 0; j < self->num_items; j++) {
assert(offset == self->items[j].key_start);
if (fwrite(self->items[j].key, self->items[j].key_len, 1, self->file) != 1) {
ret = KAS_ERR_IO;
goto out;
}
offset += self->items[j].key_len;
}
/* Write the arrays. */
for (j = 0; j < self->num_items; j++) {
padding = self->items[j].array_start - offset;
assert(padding < KAS_ARRAY_ALIGN);
if (padding > 0 && fwrite(pad, padding, 1, self->file) != 1) {
ret = KAS_ERR_IO;
goto out;
}
size = self->items[j].array_len * type_size(self->items[j].type);
write_array = self->items[j].borrowed_array != NULL
? self->items[j].borrowed_array
: self->items[j].array;
assert(write_array != NULL);
if (size > 0 && fwrite(write_array, size, 1, self->file) != 1) {
ret = KAS_ERR_IO;
goto out;
}
offset = self->items[j].array_start + size;
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_read_file(kastore_t *self)
{
int ret = 0;
size_t count, size, offset, j;
bool read_all = !!(self->flags & KAS_READ_ALL);
offset = KAS_HEADER_SIZE + self->num_items * KAS_ITEM_DESCRIPTOR_SIZE;
/* Read in up to the start of first array. This will contain all the keys. */
size = self->items[0].array_start;
assert(size > offset);
size -= offset;
self->key_read_buffer = (char *) malloc(size);
if (self->key_read_buffer == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
count = fread(self->key_read_buffer, size, 1, self->file);
if (count == 0) {
ret = kastore_get_read_io_error(self);
goto out;
}
/* Assign the pointers for the keys and arrays */
for (j = 0; j < self->num_items; j++) {
/* keys are already loaded in the read buffer */
self->items[j].key = self->key_read_buffer + self->items[j].key_start - offset;
if (read_all) {
if (j == self->num_items - 1) {
size = self->file_size - self->items[j].array_start;
} else {
size = self->items[j + 1].array_start - self->items[j].array_start;
}
self->items[j].array = (char *) malloc(size == 0 ? 1 : size);
if (self->items[j].array == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
if (size > 0) {
count = fread(self->items[j].array, size, 1, self->file);
if (count == 0) {
ret = kastore_get_read_io_error(self);
goto out;
}
}
}
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_read_item(kastore_t *self, kaitem_t *item)
{
int ret = 0;
int err;
size_t size = item->array_len * type_size(item->type);
size_t count;
item->array = malloc(size == 0 ? 1 : size);
if (item->array == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
if (size > 0) {
err = fseek(self->file, self->file_offset + (long) item->array_start, SEEK_SET);
if (err != 0) {
ret = KAS_ERR_IO;
goto out;
}
count = fread(item->array, size, 1, self->file);
if (count == 0) {
ret = kastore_get_read_io_error(self);
goto out;
}
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_write_file(kastore_t *self)
{
int ret = 0;
qsort(self->items, self->num_items, sizeof(kaitem_t), compare_items);
kastore_pack_items(self);
ret = kastore_write_header(self);
if (ret != 0) {
goto out;
}
ret = kastore_write_descriptors(self);
if (ret != 0) {
goto out;
}
ret = kastore_write_data(self);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_read(kastore_t *self)
{
int ret = 0;
if (!(self->flags & KAS_READ_ALL)) {
/* Record the current file offset, in case this is a multi-store file,
* so that we can seek to the correct location in kastore_read_item().
*/
self->file_offset = ftell(self->file);
if (self->file_offset == -1) {
ret = KAS_ERR_IO;
goto out;
}
}
ret = kastore_read_header(self);
if (ret != 0) {
goto out;
}
if (self->num_items > 0) {
self->items = (kaitem_t *) calloc(self->num_items, sizeof(*self->items));
if (self->items == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
ret = kastore_read_descriptors(self);
if (ret != 0) {
goto out;
}
ret = kastore_read_file(self);
if (ret != 0) {
goto out;
}
} else if (self->file_size != KAS_HEADER_SIZE) {
ret = KAS_ERR_BAD_FILE_FORMAT;
goto out;
}
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_insert_all(kastore_t *self, kastore_t *other)
{
size_t j;
int ret = 0;
kaitem_t item;
for (j = 0; j < other->num_items; j++) {
item = other->items[j];
ret = kastore_put(
self, item.key, item.key_len, item.array, item.array_len, item.type, 0);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int KAS_WARN_UNUSED
kastore_open(kastore_t *self, const char *filename, const char *mode, int flags)
{
int ret = 0;
const char *file_mode;
bool appending = false;
kastore_t tmp;
FILE *file;
int err;
memset(self, 0, sizeof(*self));
memset(&tmp, 0, sizeof(tmp));
if (strlen(mode) != 1) {
ret = KAS_ERR_BAD_MODE;
goto out;
}
if (strncmp(mode, "r", 1) == 0) {
file_mode = "rb";
} else if (strncmp(mode, "w", 1) == 0) {
file_mode = "wb";
} else if (strncmp(mode, "a", 1) == 0) {
mode = "w";
file_mode = "wb";
appending = true;
} else {
ret = KAS_ERR_BAD_MODE;
goto out;
}
if (appending) {
ret = kastore_open(&tmp, filename, "r", KAS_READ_ALL);
if (ret != 0) {
goto out;
}
/* tmp will now have read all of the data into memory. We can now
* close its file. We have to do this for Windows. */
err = fclose(tmp.file);
tmp.file = NULL;
if (err != 0) {
ret = KAS_ERR_IO;
goto out;
}
}
file = fopen(filename, file_mode);
if (file == NULL) {
ret = KAS_ERR_IO;
goto out;
}
ret = kastore_openf(self, file, mode, flags);
if (ret != 0) {
(void) fclose(file);
} else {
self->flags |= OWN_FILE;
if (appending) {
ret = kastore_insert_all(self, &tmp);
}
}
out:
if (appending) {
kastore_close(&tmp);
}
return ret;
}
int KAS_WARN_UNUSED
kastore_openf(kastore_t *self, FILE *file, const char *mode, int flags)
{
int ret = 0;
memset(self, 0, sizeof(*self));
if (strlen(mode) != 1) {
ret = KAS_ERR_BAD_MODE;
goto out;
}
if (strncmp(mode, "r", 1) == 0) {
self->mode = KAS_READ;
} else if (strncmp(mode, "w", 1) == 0) {
self->mode = KAS_WRITE;
} else {
ret = KAS_ERR_BAD_MODE;
goto out;
}
if (flags > (KAS_READ_ALL | KAS_GET_TAKES_OWNERSHIP) || flags < 0) {
ret = KAS_ERR_BAD_FLAGS;
goto out;
}
self->flags = flags;
self->file = file;
if (self->mode == KAS_READ) {
ret = kastore_read(self);
}
out:
return ret;
}
int KAS_WARN_UNUSED
kastore_close(kastore_t *self)
{
int ret = 0;
int err;
size_t j;
if (self->mode == KAS_WRITE) {
if (self->file != NULL) {
ret = kastore_write_file(self);
if (ret != 0) {
/* Ignore errors on close now */
if (self->flags & OWN_FILE) {
fclose(self->file);
}
self->file = NULL;
}
}
if (self->items != NULL) {
/* We only alloc memory for the keys and arrays in write mode */
for (j = 0; j < self->num_items; j++) {
kas_safe_free(self->items[j].key);
kas_safe_free(self->items[j].array);
}
}
} else {
kas_safe_free(self->key_read_buffer);
if (self->items != NULL) {
for (j = 0; j < self->num_items; j++) {
kas_safe_free(self->items[j].array);
}
}
}
kas_safe_free(self->items);
if (self->file != NULL && (self->flags & OWN_FILE)) {
err = fclose(self->file);
if (err != 0) {
ret = KAS_ERR_IO;
}
}
memset(self, 0, sizeof(*self));
return ret;
}
static int
kastore_find_item(kastore_t *self, const char *key, size_t key_len, kaitem_t **item)
{
int ret = KAS_ERR_KEY_NOT_FOUND;
kaitem_t search;
search.key = (char *) malloc(key_len);
search.key_len = key_len;
if (self->mode != KAS_READ) {
ret = KAS_ERR_ILLEGAL_OPERATION;
goto out;
}
if (search.key == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
memcpy(search.key, key, key_len);
*item = bsearch(
&search, self->items, self->num_items, sizeof(kaitem_t), compare_items);
if (*item == NULL) {
goto out;
}
ret = 0;
out:
kas_safe_free(search.key);
return ret;
}
int KAS_WARN_UNUSED
kastore_contains(kastore_t *self, const char *key, size_t key_len)
{
kaitem_t *item;
int ret = kastore_find_item(self, key, key_len, &item);
if (ret == 0) {
ret = 1;
} else if (ret == KAS_ERR_KEY_NOT_FOUND) {
ret = 0;
}
return ret;
}
int KAS_WARN_UNUSED
kastore_containss(kastore_t *self, const char *key)
{
return kastore_contains(self, key, strlen(key));
}
int KAS_WARN_UNUSED
kastore_get(kastore_t *self, const char *key, size_t key_len, void **array,
size_t *array_len, int *type)
{
kaitem_t *item;
int ret = kastore_find_item(self, key, key_len, &item);
if (ret != 0) {
goto out;
}
if (item->array == NULL) {
ret = kastore_read_item(self, item);
if (ret != 0) {
goto out;
}
}
*array = item->array;
*array_len = item->array_len;
*type = item->type;
if (self->flags & KAS_GET_TAKES_OWNERSHIP) {
item->array = NULL;
}
ret = 0;
out:
return ret;
}
int KAS_WARN_UNUSED
kastore_gets(
kastore_t *self, const char *key, void **array, size_t *array_len, int *type)
{
return kastore_get(self, key, strlen(key), array, array_len, type);
}
static int KAS_WARN_UNUSED
kastore_gets_type(
kastore_t *self, const char *key, void **array, size_t *array_len, int type)
{
int loaded_type = -1;
int ret;
ret = kastore_get(self, key, strlen(key), array, array_len, &loaded_type);
if (ret != 0) {
goto out;
}
if (type != loaded_type) {
ret = KAS_ERR_TYPE_MISMATCH;
goto out;
}
out:
return ret;
}
int KAS_WARN_UNUSED
kastore_gets_int8(kastore_t *self, const char *key, int8_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT8);
}
int KAS_WARN_UNUSED
kastore_gets_uint8(kastore_t *self, const char *key, uint8_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT8);
}
int KAS_WARN_UNUSED
kastore_gets_int16(kastore_t *self, const char *key, int16_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT16);
}
int KAS_WARN_UNUSED
kastore_gets_uint16(
kastore_t *self, const char *key, uint16_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT16);
}
int KAS_WARN_UNUSED
kastore_gets_int32(kastore_t *self, const char *key, int32_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT32);
}
int KAS_WARN_UNUSED
kastore_gets_uint32(
kastore_t *self, const char *key, uint32_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT32);
}
int KAS_WARN_UNUSED
kastore_gets_int64(kastore_t *self, const char *key, int64_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_INT64);
}
int KAS_WARN_UNUSED
kastore_gets_uint64(
kastore_t *self, const char *key, uint64_t **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_UINT64);
}
int KAS_WARN_UNUSED
kastore_gets_float32(kastore_t *self, const char *key, float **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_FLOAT32);
}
int KAS_WARN_UNUSED
kastore_gets_float64(kastore_t *self, const char *key, double **array, size_t *array_len)
{
return kastore_gets_type(self, key, (void **) array, array_len, KAS_FLOAT64);
}
static int KAS_WARN_UNUSED
kastore_put_item(kastore_t *self, kaitem_t **ret_item, const char *key, size_t key_len,
int type, int KAS_UNUSED(flags))
{
int ret = 0;
kaitem_t *new_item;
kaitem_t *p;
size_t j;
if (self->mode != KAS_WRITE) {
ret = KAS_ERR_ILLEGAL_OPERATION;
goto out;
}
if (type < 0 || type >= KAS_NUM_TYPES) {
ret = KAS_ERR_BAD_TYPE;
goto out;
}
if (key_len == 0) {
ret = KAS_ERR_EMPTY_KEY;
goto out;
}
/* This isn't terribly efficient, but we're not expecting large
* numbers of items. */
p = (kaitem_t *) realloc(self->items, (self->num_items + 1) * sizeof(*self->items));
if (p == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
self->items = p;
new_item = self->items + self->num_items;
memset(new_item, 0, sizeof(*new_item));
new_item->type = type;
new_item->key_len = key_len;
new_item->key = (char *) malloc(key_len);
if (new_item->key == NULL) {
kas_safe_free(new_item->key);
ret = KAS_ERR_NO_MEMORY;
goto out;
}
self->num_items++;
memcpy(new_item->key, key, key_len);
/* Check if this key is already in here. OK, this is a quadratic time
* algorithm, but we're not expecting to have lots of items (< 100). In
* this case, the simple algorithm is probably better. If/when we ever
* deal with more items than this, then we will need a better algorithm.
*/
for (j = 0; j < self->num_items - 1; j++) {
if (compare_items(new_item, self->items + j) == 0) {
/* Free the key memory and remove this item */
self->num_items--;
kas_safe_free(new_item->key);
ret = KAS_ERR_DUPLICATE_KEY;
goto out;
}
}
*ret_item = new_item;
out:
return ret;
}
static int KAS_WARN_UNUSED
kastore_bput(kastore_t *self, const char *key, size_t key_len, const void *array,
size_t array_len, int type, int flags)
{
int ret = 0;
kaitem_t *item;
ret = kastore_put_item(self, &item, key, key_len, type, flags);
if (ret != 0) {
goto out;
}
if (array == NULL) {
/* Both can't be null, so assign a dummy array */
item->array = malloc(1);
} else {
item->borrowed_array = array;
}
item->borrowed_array = array;
item->array_len = array_len;
out:
return ret;
}
int KAS_WARN_UNUSED
kastore_put(kastore_t *self, const char *key, size_t key_len, const void *array,
size_t array_len, int type, int flags)
{
int ret;
size_t array_size;
void *array_copy = NULL;
if (flags != KAS_BORROWS_ARRAY && flags != 0) {
ret = KAS_ERR_BAD_FLAGS;
goto out;
}
if (type < 0 || type >= KAS_NUM_TYPES) {
ret = KAS_ERR_BAD_TYPE;
goto out;
}
if (flags & KAS_BORROWS_ARRAY) {
ret = kastore_bput(self, key, key_len, array, array_len, type, flags);
} else {
array_size = type_size(type) * array_len;
array_copy = malloc(array_size == 0 ? 1 : array_size);
if (array_copy == NULL) {
ret = KAS_ERR_NO_MEMORY;
goto out;
}
memcpy(array_copy, array, array_size);
ret = kastore_oput(self, key, key_len, array_copy, array_len, type, flags);
if (ret == 0) {
/* Kastore has taken ownership of the array, so we don't need to free it */
array_copy = NULL;
}
}
out:
kas_safe_free(array_copy);
return ret;
}
int KAS_WARN_UNUSED
kastore_oput(kastore_t *self, const char *key, size_t key_len, void *array,
size_t array_len, int type, int flags)
{
int ret = 0;
kaitem_t *item;
if (flags != 0) {
ret = KAS_ERR_BAD_FLAGS;
goto out;
}
ret = kastore_put_item(self, &item, key, key_len, type, flags);
if (ret != 0) {
goto out;
}
item->array = array;
item->array_len = array_len;
out:
return ret;
}
int KAS_WARN_UNUSED
kastore_puts(kastore_t *self, const char *key, const void *array, size_t array_len,
int type, int flags)
{
return kastore_put(self, key, strlen(key), array, array_len, type, flags);
}
int KAS_WARN_UNUSED
kastore_puts_int8(
kastore_t *self, const char *key, const int8_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_INT8, flags);
}
int KAS_WARN_UNUSED
kastore_puts_uint8(
kastore_t *self, const char *key, const uint8_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT8, flags);
}
int KAS_WARN_UNUSED
kastore_puts_int16(
kastore_t *self, const char *key, const int16_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_INT16, flags);
}
int KAS_WARN_UNUSED
kastore_puts_uint16(
kastore_t *self, const char *key, const uint16_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT16, flags);
}
int KAS_WARN_UNUSED
kastore_puts_int32(
kastore_t *self, const char *key, const int32_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_INT32, flags);
}
int KAS_WARN_UNUSED
kastore_puts_uint32(
kastore_t *self, const char *key, const uint32_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT32, flags);
}
int KAS_WARN_UNUSED
kastore_puts_int64(
kastore_t *self, const char *key, const int64_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_INT64, flags);
}
int KAS_WARN_UNUSED
kastore_puts_uint64(
kastore_t *self, const char *key, const uint64_t *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_UINT64, flags);
}
int KAS_WARN_UNUSED
kastore_puts_float32(
kastore_t *self, const char *key, const float *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_FLOAT32, flags);
}
int KAS_WARN_UNUSED
kastore_puts_float64(
kastore_t *self, const char *key, const double *array, size_t array_len, int flags)
{
return kastore_puts(self, key, (const void *) array, array_len, KAS_FLOAT64, flags);
}
int KAS_WARN_UNUSED
kastore_oputs(
kastore_t *self, const char *key, void *array, size_t array_len, int type, int flags)
{
return kastore_oput(self, key, strlen(key), array, array_len, type, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_int8(
kastore_t *self, const char *key, int8_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_INT8, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_uint8(
kastore_t *self, const char *key, uint8_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT8, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_int16(
kastore_t *self, const char *key, int16_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_INT16, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_uint16(
kastore_t *self, const char *key, uint16_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT16, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_int32(
kastore_t *self, const char *key, int32_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_INT32, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_uint32(
kastore_t *self, const char *key, uint32_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT32, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_int64(
kastore_t *self, const char *key, int64_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_INT64, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_uint64(
kastore_t *self, const char *key, uint64_t *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_UINT64, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_float32(
kastore_t *self, const char *key, float *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_FLOAT32, flags);
}
int KAS_WARN_UNUSED
kastore_oputs_float64(
kastore_t *self, const char *key, double *array, size_t array_len, int flags)
{
return kastore_oputs(self, key, (void *) array, array_len, KAS_FLOAT64, flags);
}
void
kastore_print_state(kastore_t *self, FILE *out)
{
kaitem_t *item;
size_t j;
fprintf(out, "============================\n");
fprintf(out, "kastore state\n");
fprintf(out, "file_version = %d.%d\n", self->file_version[0], self->file_version[1]);
fprintf(out, "mode = %d\n", self->mode);
fprintf(out, "flags = %d\n", self->flags);
fprintf(out, "num_items = %zu\n", self->num_items);
fprintf(out, "file_size = %zu\n", self->file_size);
fprintf(out, "own_file = %d\n", !!(self->flags & OWN_FILE));
fprintf(out, "file = '%p'\n", (void *) self->file);
fprintf(out, "============================\n");
for (j = 0; j < self->num_items; j++) {
item = self->items + j;
fprintf(out,
"%.*s: type=%d, key_start=%zu, key_len=%zu, key=%p, "
"array_start=%zu, array_len=%zu, array=%p\n",
(int) item->key_len, item->key, item->type, item->key_start, item->key_len,
(void *) item->key, item->array_start, item->array_len,
(void *) item->array);
}
fprintf(out, "============================\n");
}
================================================
FILE: c/subprojects/kastore/kastore.h
================================================
/**
* @file kastore.h
* @brief Public API for kastore.
*
* This is the API documentation for kastore.
*/
#ifndef KASTORE_H
#define KASTORE_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __GNUC__
#define KAS_WARN_UNUSED __attribute__((warn_unused_result))
#define KAS_UNUSED(x) KAS_UNUSED_##x __attribute__((__unused__))
#else
#define KAS_WARN_UNUSED
#define KAS_UNUSED(x) KAS_UNUSED_##x
#endif
#include
#include
#include
#include
/**
@defgroup ERROR_GROUP Error return values.
@{
*/
// clang-format off
/**
Generic error thrown when no other message can be generated.
*/
#define KAS_ERR_GENERIC -1
/**
An error occured during IO.
*/
#define KAS_ERR_IO -2
/**
An unrecognised mode string was passed to open().
*/
#define KAS_ERR_BAD_MODE -3
/**
Out-of-memory condition.
*/
#define KAS_ERR_NO_MEMORY -4
/**
Attempt to read an unknown file format.
*/
#define KAS_ERR_BAD_FILE_FORMAT -5
/**
The file is in kastore format, but the version is too old for this
version of the library to read.
*/
#define KAS_ERR_VERSION_TOO_OLD -6
/**
The file is in kastore format, but the version is too new for this
version of the library to read.
*/
#define KAS_ERR_VERSION_TOO_NEW -7
/**
An unknown type key was specified.
*/
#define KAS_ERR_BAD_TYPE -8
/**
A zero-length key was specified.
*/
#define KAS_ERR_EMPTY_KEY -9
/**
A duplicate key was specified.
*/
#define KAS_ERR_DUPLICATE_KEY -10
/**
The requested key does not exist in the store.
*/
#define KAS_ERR_KEY_NOT_FOUND -11
/**
The requestion function cannot be called in the current mode.
*/
#define KAS_ERR_ILLEGAL_OPERATION -12
/**
The requested type does not match the type of the stored values.
*/
#define KAS_ERR_TYPE_MISMATCH -13
/**
End of file was reached while reading data.
*/
#define KAS_ERR_EOF -14
/**
Unknown flags were provided to open.
*/
#define KAS_ERR_BAD_FLAGS -15
/** @} */
/* Flags for open */
#define KAS_READ_ALL (1 << 0)
#define KAS_GET_TAKES_OWNERSHIP (1 << 1)
/* Flags for put */
#define KAS_BORROWS_ARRAY (1 << 8)
/**
@defgroup TYPE_GROUP Data types.
@{
*/
#define KAS_INT8 0
#define KAS_UINT8 1
#define KAS_INT16 2
#define KAS_UINT16 3
#define KAS_INT32 4
#define KAS_UINT32 5
#define KAS_INT64 6
#define KAS_UINT64 7
#define KAS_FLOAT32 8
#define KAS_FLOAT64 9
/** @} */
#define KAS_NUM_TYPES 10
#define KAS_READ 1
#define KAS_WRITE 2
/**
@defgroup FILE_VERSION_GROUP File version macros.
@{
*/
/**
The file version major number. Incremented when any breaking changes are made
to the file format.
*/
#define KAS_FILE_VERSION_MAJOR 1
/**
The file version minor number. Incremented when non-breaking backward-compatible
changes are made to the file format.
*/
#define KAS_FILE_VERSION_MINOR 0
/** @} */
/**
@defgroup API_VERSION_GROUP API version macros.
@{
*/
/**
The library major version. Incremented when breaking changes to the API or ABI are
introduced. This includes any changes to the signatures of functions and the
sizes and types of externally visible structs.
*/
#define KAS_VERSION_MAJOR 2
/**
The library minor version. Incremented when non-breaking backward-compatible changes
to the API or ABI are introduced, i.e., the addition of a new function.
*/
#define KAS_VERSION_MINOR 1
/**
The library patch version. Incremented when any changes not relevant to the
to the API or ABI are introduced, i.e., internal refactors of bugfixes.
*/
#define KAS_VERSION_PATCH 2
/** @} */
#define KAS_HEADER_SIZE 64
#define KAS_ITEM_DESCRIPTOR_SIZE 64
#define KAS_MAGIC "\211KAS\r\n\032\n"
#define KAS_ARRAY_ALIGN 8
// clang-format on
#ifndef KAS_BUG_ASSERT_MESSAGE
#define KAS_BUG_ASSERT_MESSAGE \
"If you are using kastore directly please open an issue on" \
" GitHub, ideally with a reproducible example." \
" (https://github.com/tskit-dev/kastore/issues) If you are" \
" using software that uses kastore, please report an issue" \
" to that software's issue tracker, at least initially."
#endif
/**
We often wish to assert a condition that is unexpected, but using the normal `assert`
means compiling without NDEBUG. This macro still asserts when NDEBUG is defined.
*/
#define kas_bug_assert(condition) \
do { \
if (!(condition)) { \
fprintf(stderr, "Bug detected in %s at line %d. %s\n", __FILE__, __LINE__, \
KAS_BUG_ASSERT_MESSAGE); \
abort(); \
} \
} while (0)
typedef struct {
int type;
size_t key_len;
size_t array_len;
char *key;
/* Used when KAS_BORROWS_ARRAY is set */
const void *borrowed_array;
void *array;
size_t key_start;
size_t array_start;
} kaitem_t;
/**
@brief A file-backed store of key-array values.
*/
typedef struct {
int flags;
int mode;
int file_version[2];
size_t num_items;
kaitem_t *items;
FILE *file;
size_t file_size;
long file_offset;
char *key_read_buffer;
} kastore_t;
/**
@brief Library version information.
*/
typedef struct {
/** @brief The major version number. */
int major;
/** @brief The minor version number. */
int minor;
/** @brief The patch version number. */
int patch;
} kas_version_t;
/**
@brief Open a store from a given file in read ("r"), write ("w") or
append ("a") mode.
@rst
In read mode, a store can be queried using the :ref:`get functions
` and any attempts to write to the store will return an error.
In write and append mode, the store can written to using the :ref:`put
functions ` and any attempt to read will return an error.
After :c:func:`kastore_open` has been called on a particular store,
:c:func:`kastore_close` must be called to avoid leaking memory. This must also
be done when :c:func:`kastore_open` returns an error.
When opened in read-mode, the default is to read key/array values from file
on demand. This is useful when a subset of the data is required and we don't
wish to read the entire file. If the entire file is to be read, the
``KAS_READ_ALL`` flag may be specified to improve performance.
**Flags**
KAS_READ_ALL
If this option is specified, read the entire file at
open time. This will give slightly better performance as the file can
be read sequentially in a single pass.
KAS_GET_TAKES_OWNERSHIP
If this option is specified, all ``get`` operations will transfer
ownership of the array to the caller. ``kastore`` will not ``free``
the array memory and this is the responsibility of the caller.
If ``get`` is called on the same key multiple times, a new buffer will be
returned each time. Note that second and subsequent ``get`` calls
on a given key will result in ``seek`` operations even when the
KAS_READ_ALL flag is set, and will therefore fail on unseekable
streams.
@endrst
@param self A pointer to a kastore object.
@param filename The file path to open.
@param mode The open mode: can be read ("r"), write ("w") or append ("a").
@param flags The open flags.
@return Return 0 on success or a negative value on failure.
*/
int kastore_open(kastore_t *self, const char *filename, const char *mode, int flags);
/**
@brief Open a store from a given FILE pointer.
@rst
Behaviour, mode and flags follow that of :c:func:`kastore_open`,
except append mode is not supported.
The ``file`` argument must be opened in an appropriate mode (e.g. "r"
for a kastore in "r" mode). Files open with other modes will result
in KAS_ERR_IO being returned when read/write operations are attempted.
The FILE will not be closed when :c:func:`kastore_close` is called.
If the KAS_READ_ALL flag is supplied, no ``seek`` operations will be
performed on the FILE and so streams such as stdin, FIFOs etc are
supported. The FILE pointer will be positioned exactly at the end
of the kastore encoded bytes once reading is completed, and reading
multiple stores from the same FILE sequentially is fully supported.
@endrst
@param self A pointer to a kastore object.
@param file The FILE* to read/write the store from/to.
@param mode The open mode: can be read ("r") or write ("w").
@param flags The open flags.
@return Return 0 on success or a negative value on failure.
*/
int kastore_openf(kastore_t *self, FILE *file, const char *mode, int flags);
/**
@brief Close an opened store, freeing all resources.
Any store that has been opened must be closed to avoid memory leaks
(including cases in which errors have occured). It is not an error to
call ``kastore_close`` multiple times on the same object, but
``kastore_open`` must be called before ``kastore_close``.
@param self A pointer to a kastore object.
@return Return 0 on success or a negative value on failure.
*/
int kastore_close(kastore_t *self);
/**
@brief Return 1 if the store contains the specified key and 0 if it does not.
@rst
Queries the store for the specified key and returns 1 if it exists. If the
key does not exist, 0 is returned. If an error occurs (for example, if querying
the store while it is in write-mode), a negative value is returned.
For keys that are standard NULL terminated strings, the :c:func:`kastore_containss`
function may be more convenient.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param key_len The length of the key.
@return Return 1 if the key is present and 0 if it does not. If an error occurs,
return a negative value.
*/
int kastore_contains(kastore_t *self, const char *key, size_t key_len);
/**
@brief Return 1 if the store contains the specified NULL terminated key
and 0 if it does not.
@rst
Queries the store for the specified key, which must be a NULL terminated string,
and returns 1 if it exists. If the
key does not exist, 0 is returned. If an error occurs (for example, if querying
the store while it is in write-mode), a negative value is returned.
the array in the specified destination pointers.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@return Return 1 if the key is present and 0 if it does not. If an error occurs,
return a negative value.
*/
int kastore_containss(kastore_t *self, const char *key);
/**
@brief Get the array for the specified key.
@rst
Queries the store for the specified key and stores pointers to the memory for
the corresponding array, the number of elements in this array and the type of
the array in the specified destination pointers. This is the most general form
of ``get`` query in kastore, as non NULL-terminated strings can be used as
keys and the resulting array is returned in a generic pointer. When standard C
strings are used as keys and the type of the array is known, it is more
convenient to use the :ref:`typed variants ` of this function.
The returned array points to memory that is internally managed by the store
and must not be freed or modified. The pointer is guaranteed to be valid
until :c:func:`kastore_close` is called.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param key_len The length of the key.
@param array The destination pointer for the array.
@param array_len The destination pointer for the number of elements
in the array.
@param type The destination pointer for the type code of the array.
@return Return 0 on success or a negative value on failure.
*/
int kastore_get(kastore_t *self, const char *key, size_t key_len, void **array,
size_t *array_len, int *type);
/**
@brief Get the array for the specified NULL-terminated key.
@rst
As for :c:func:`kastore_get()` except the key is a NULL-terminated string.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param array The destination pointer for the array.
@param array_len The destination pointer for the number of elements
in the array.
@param type The destination pointer for the type code of the array.
@return Return 0 on success or a negative value on failure.
*/
int kastore_gets(
kastore_t *self, const char *key, void **array, size_t *array_len, int *type);
/**
@defgroup TYPED_GETS_GROUP Typed get functions.
@{
*/
int kastore_gets_int8(
kastore_t *self, const char *key, int8_t **array, size_t *array_len);
int kastore_gets_uint8(
kastore_t *self, const char *key, uint8_t **array, size_t *array_len);
int kastore_gets_int16(
kastore_t *self, const char *key, int16_t **array, size_t *array_len);
int kastore_gets_uint16(
kastore_t *self, const char *key, uint16_t **array, size_t *array_len);
int kastore_gets_int32(
kastore_t *self, const char *key, int32_t **array, size_t *array_len);
int kastore_gets_uint32(
kastore_t *self, const char *key, uint32_t **array, size_t *array_len);
int kastore_gets_int64(
kastore_t *self, const char *key, int64_t **array, size_t *array_len);
int kastore_gets_uint64(
kastore_t *self, const char *key, uint64_t **array, size_t *array_len);
int kastore_gets_float32(
kastore_t *self, const char *key, float **array, size_t *array_len);
int kastore_gets_float64(
kastore_t *self, const char *key, double **array, size_t *array_len);
/** @} */
/**
@brief Insert the specified key-array pair into the store.
@rst
A key with the specified length is inserted into the store and associated with
an array of the specified type and number of elements. The contents of the
specified key and array are copied unless the KAS_BORROWS_ARRAY flag is specified.
If KAS_BORROWS_ARRAY is specified the array buffer must persist until the
kastore is closed.
Keys can be any sequence of bytes but must be at least one byte long and be
unique. There is no restriction on the contents of arrays. This is the most
general form of ``put`` operation in kastore; when the type of the array
is known and the keys are standard C strings, it is usually more convenient
to use the :ref:`typed variants ` of this function.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param key_len The length of the key.
@param array The array.
@param array_len The number of elements in the array.
@param type The type of the array.
@param flags The insertion flags, only KAS_BORROWS_ARRAY or 0 is a valid.
@return Return 0 on success or a negative value on failure.
*/
int kastore_put(kastore_t *self, const char *key, size_t key_len, const void *array,
size_t array_len, int type, int flags);
/**
@brief Insert the specified NULL terminated key and array pair into the store.
@rst
As for :c:func:`kastore_put` except the key must be NULL-terminated C string.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param array The array.
@param array_len The number of elements in the array.
@param type The type of the array.
@param flags The insertion flags, only KAS_BORROWS_ARRAY or 0 is a valid.
@return Return 0 on success or a negative value on failure.
*/
int kastore_puts(kastore_t *self, const char *key, const void *array, size_t array_len,
int type, int flags);
/**
@defgroup TYPED_PUTS_GROUP Typed put functions.
@{
*/
int kastore_puts_int8(
kastore_t *self, const char *key, const int8_t *array, size_t array_len, int flags);
int kastore_puts_uint8(
kastore_t *self, const char *key, const uint8_t *array, size_t array_len, int flags);
int kastore_puts_int16(
kastore_t *self, const char *key, const int16_t *array, size_t array_len, int flags);
int kastore_puts_uint16(kastore_t *self, const char *key, const uint16_t *array,
size_t array_len, int flags);
int kastore_puts_int32(
kastore_t *self, const char *key, const int32_t *array, size_t array_len, int flags);
int kastore_puts_uint32(kastore_t *self, const char *key, const uint32_t *array,
size_t array_len, int flags);
int kastore_puts_int64(
kastore_t *self, const char *key, const int64_t *array, size_t array_len, int flags);
int kastore_puts_uint64(kastore_t *self, const char *key, const uint64_t *array,
size_t array_len, int flags);
int kastore_puts_float32(
kastore_t *self, const char *key, const float *array, size_t array_len, int flags);
int kastore_puts_float64(
kastore_t *self, const char *key, const double *array, size_t array_len, int flags);
/** @} */
/**
@brief Insert the specified key-array pair into the store, transferring ownership
of the malloced array buffer to the store (own-put).
@rst
A key with the specified length is inserted into the store and associated with
an array of the specified type and number of elements. The contents of the
specified key is copied, but the array buffer is taken directly and freed when
the store is closed. The array buffer must be a pointer returned by ``malloc``
or ``calloc``. Ownership of the buffer is not taken unless the function returns
successfully.
Apart from taking ownership of the array buffer, the semantics of this
function are identical to :c:func:`kastore_put`.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param key_len The length of the key.
@param array The array. Must be a pointer returned by malloc/calloc.
@param array_len The number of elements in the array.
@param type The type of the array.
@param flags The insertion flags. Currently unused.
@return Return 0 on success or a negative value on failure.
*/
int kastore_oput(kastore_t *self, const char *key, size_t key_len, void *array,
size_t array_len, int type, int flags);
/**
@brief Insert the specified NULL terminated key and array pair into the store,
transferring ownership of the malloced array buffer to the store (own-put).
@rst
As for :c:func:`kastore_oput` except the key must be NULL-terminated C string.
@endrst
@param self A pointer to a kastore object.
@param key The key.
@param array The array. Must be a pointer returned by malloc/calloc.
@param array_len The number of elements in the array.
@param type The type of the array.
@param flags The insertion flags. Currently unused.
@return Return 0 on success or a negative value on failure.
*/
int kastore_oputs(kastore_t *self, const char *key, void *array, size_t array_len,
int type, int flags);
/**
@defgroup TYPED_OPUTS_GROUP Typed own-and-put functions.
@{
*/
int kastore_oputs_int8(
kastore_t *self, const char *key, int8_t *array, size_t array_len, int flags);
int kastore_oputs_uint8(
kastore_t *self, const char *key, uint8_t *array, size_t array_len, int flags);
int kastore_oputs_int16(
kastore_t *self, const char *key, int16_t *array, size_t array_len, int flags);
int kastore_oputs_uint16(
kastore_t *self, const char *key, uint16_t *array, size_t array_len, int flags);
int kastore_oputs_int32(
kastore_t *self, const char *key, int32_t *array, size_t array_len, int flags);
int kastore_oputs_uint32(
kastore_t *self, const char *key, uint32_t *array, size_t array_len, int flags);
int kastore_oputs_int64(
kastore_t *self, const char *key, int64_t *array, size_t array_len, int flags);
int kastore_oputs_uint64(
kastore_t *self, const char *key, uint64_t *array, size_t array_len, int flags);
int kastore_oputs_float32(
kastore_t *self, const char *key, float *array, size_t array_len, int flags);
int kastore_oputs_float64(
kastore_t *self, const char *key, double *array, size_t array_len, int flags);
/** @} */
void kastore_print_state(kastore_t *self, FILE *out);
/**
@brief Returns a description of the specified error code.
@param err The error code.
@return String describing the error code.
*/
const char *kas_strerror(int err);
/**
@brief Returns the API version.
@rst
The API follows the `semver convention `_, where the
major, minor and patch numbers have specific meanings. The versioning
scheme here also takes into account ABI compatability.
@endrst
*/
kas_version_t kas_version(void);
#define kas_safe_free(pointer) \
do { \
if (pointer != NULL) { \
free(pointer); \
pointer = NULL; \
} \
} while (0)
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/subprojects/kastore/meson.build
================================================
project('kastore', ['c', 'cpp'],
version: files('VERSION.txt'),
default_options: [
'c_std=c99',
'cpp_std=c++11',
'warning_level=3',
'werror=true'])
if not meson.is_subproject()
add_global_arguments([
'-W', '-Wmissing-prototypes', '-Wstrict-prototypes',
'-Wconversion', '-Wshadow', '-Wpointer-arith', '-Wcast-align',
'-Wcast-qual', '-Wwrite-strings', '-Wnested-externs',
'-fshort-enums', '-fno-common'], language : 'c')
endif
# Subprojects should compile in the static library for simplicity.
kastore_inc = include_directories('.')
kastore = static_library('kastore', 'kastore.c')
kastore_dep = declare_dependency(link_with : kastore, include_directories: kastore_inc)
if not meson.is_subproject()
# The shared library can be installed into the system.
install_headers('kastore.h')
shared_library('kastore', 'kastore.c', install: true)
executable('example', ['example.c'], link_with: kastore)
cunit_dep = dependency('cunit')
src_root = meson.project_source_root()
tests_exe = executable('tests', ['tests.c', 'kastore.c'], dependencies: cunit_dep,
c_args: ['-DMESON_VERSION="@0@"'.format(meson.project_version())])
test('tests', tests_exe,
env: ['KAS_TEST_DATA_PREFIX=' + src_root + '/test-data/'])
cpp_tests_exe = executable('cpp_tests', ['cpp_tests.cpp'], link_with: kastore)
test('cpp_tests', cpp_tests_exe)
malloc_tests_exe = executable('malloc_tests', ['malloc_tests.c', 'kastore.c'],
dependencies: cunit_dep,
link_args:['-Wl,--wrap=malloc', '-Wl,--wrap=realloc', '-Wl,--wrap=calloc'])
test('malloc_tests', malloc_tests_exe, workdir: src_root)
io_tests_exe = executable('io_tests', ['io_tests.c', 'kastore.c'],
dependencies: cunit_dep,
link_args:[
'-Wl,--wrap=fwrite',
'-Wl,--wrap=fread',
'-Wl,--wrap=fclose',
'-Wl,--wrap=ftell',
'-Wl,--wrap=fseek'])
test('io_tests', io_tests_exe, workdir: src_root)
endif
================================================
FILE: c/tests/meson-subproject/example.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* Simple example testing that we compile and link in tskit and kastore
* when we use meson submodules.
*/
#include
#include
#include
#include
void
test_kas_strerror()
{
printf("test_kas_strerror\n");
const char *str = kas_strerror(KAS_ERR_NO_MEMORY);
assert(strcmp(str, "Out of memory") == 0);
}
void
test_strerror()
{
printf("test_strerror\n");
const char *str = tsk_strerror(TSK_ERR_NO_MEMORY);
assert(strcmp(str, "Out of memory. (TSK_ERR_NO_MEMORY)") == 0);
}
void
test_load_error()
{
printf("test_open_error\n");
tsk_treeseq_t ts;
int ret = tsk_treeseq_load(&ts, "no such file", 0);
assert(ret == TSK_ERR_IO);
tsk_treeseq_free(&ts);
}
void
test_table_basics()
{
printf("test_table_basics\n");
tsk_table_collection_t tables;
int ret = tsk_table_collection_init(&tables, 0);
assert(ret == 0);
ret = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
assert(ret == 0);
ret = tsk_node_table_add_row(&tables.nodes, 0, 2.0, TSK_NULL, TSK_NULL, NULL, 0);
assert(ret == 1);
assert(tables.nodes.num_rows == 2);
tsk_table_collection_free(&tables);
}
int
main()
{
test_kas_strerror();
test_strerror();
test_load_error();
test_table_basics();
return 0;
}
================================================
FILE: c/tests/meson-subproject/meson.build
================================================
project('example', 'c')
tskit_proj = subproject('tskit')
tskit_dep = tskit_proj.get_variable('tskit_dep')
executable('example',
'example.c',
dependencies : [tskit_dep],
install : true)
================================================
FILE: c/tests/test_convert.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
#include
#include
static void
test_single_tree_newick(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
size_t buffer_size = 1024;
char newick[buffer_size];
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0)
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK)
ret = tsk_convert_newick(&t, 0, 0, TSK_NEWICK_LEGACY_MS_LABELS, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Seems odd, but this is what a single node newick tree looks like.
* Newick parsers seems to accept it in any case */
CU_ASSERT_STRING_EQUAL(newick, "1;");
ret = tsk_convert_newick(&t, 0, 0, 0, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_STRING_EQUAL(newick, "n0;");
ret = tsk_convert_newick(&t, 4, 0, TSK_NEWICK_LEGACY_MS_LABELS, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_STRING_EQUAL(newick, "(1:1,2:1);");
ret = tsk_convert_newick(&t, 4, 0, 0, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_STRING_EQUAL(newick, "(n0:1,n1:1);");
ret = tsk_convert_newick(&t, 6, 0, TSK_NEWICK_LEGACY_MS_LABELS, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_STRING_EQUAL(newick, "((1:1,2:1):2,(3:2,4:2):1);");
ret = tsk_convert_newick(&t, 6, 0, 0, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_STRING_EQUAL(newick, "((n0:1,n1:1):2,(n2:2,n3:2):1);");
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_newick_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
size_t j, len;
size_t buffer_size = 1024;
char newick[buffer_size];
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0)
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK)
ret = tsk_convert_newick(&t, -1, 1, 0, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_convert_newick(&t, 7, 1, 0, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_convert_newick(&t, 6, 0, 0, buffer_size, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_convert_newick(&t, 6, 0, 0, buffer_size, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
len = 1 + strlen(newick);
for (j = 0; j < len; j++) {
ret = tsk_convert_newick(&t, 6, 0, 0, j, newick);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BUFFER_OVERFLOW);
}
ret = tsk_convert_newick(&t, 6, 0, TSK_NEWICK_LEGACY_MS_LABELS, len, newick);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_STRING_EQUAL(newick, "((1:1,2:1):2,(3:2,4:2):1);");
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_single_tree_newick", test_single_tree_newick },
{ "test_single_tree_newick_errors", test_single_tree_newick_errors },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_core.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
#include
#include
#include
static void
test_strerror(void)
{
int j;
const char *msg;
int max_error_code = 8192; /* totally arbitrary */
for (j = 0; j < max_error_code; j++) {
msg = tsk_strerror(-j);
CU_ASSERT_FATAL(msg != NULL);
CU_ASSERT(strlen(msg) > 0);
}
CU_ASSERT_STRING_EQUAL(
tsk_strerror(0), "Normal exit condition. This is not an error!");
}
static void
test_strerror_kastore(void)
{
int kastore_errors[]
= { KAS_ERR_NO_MEMORY, KAS_ERR_KEY_NOT_FOUND, KAS_ERR_BAD_FILE_FORMAT };
size_t j;
int err;
for (j = 0; j < sizeof(kastore_errors) / sizeof(*kastore_errors); j++) {
err = tsk_set_kas_error(kastore_errors[j]);
CU_ASSERT_TRUE(tsk_is_kas_error(err));
CU_ASSERT_EQUAL_FATAL(tsk_get_kas_error(err), kastore_errors[j]);
CU_ASSERT_STRING_EQUAL(tsk_strerror(err), kas_strerror(kastore_errors[j]));
}
}
static void
test_generate_uuid(void)
{
size_t uuid_size = 36;
char uuid[uuid_size + 1];
char other_uuid[uuid_size + 1];
int ret;
ret = tsk_generate_uuid(uuid, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(strlen(uuid), uuid_size);
CU_ASSERT_EQUAL(uuid[8], '-');
CU_ASSERT_EQUAL(uuid[13], '-');
CU_ASSERT_EQUAL(uuid[18], '-');
CU_ASSERT_EQUAL(uuid[23], '-');
ret = tsk_generate_uuid(other_uuid, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(strlen(other_uuid), uuid_size);
CU_ASSERT_STRING_NOT_EQUAL(uuid, other_uuid);
}
static void
test_double_round(void)
{
struct test_case {
double source;
unsigned int num_digits;
double result;
};
struct test_case test_cases[] = {
{ 1.555, 3, 1.555 },
{ 1.5555, 2, 1.56 },
/* catch the halfway between integers case */
{ 1.5555, 3, 1.556 },
{ 1.5111, 3, 1.511 },
{ 1.5112, 3, 1.511 },
{ 3.141592653589793, 0, 3.0 },
{ 3.141592653589793, 1, 3.1 },
{ 3.141592653589793, 2, 3.14 },
{ 3.141592653589793, 3, 3.142 },
{ 3.141592653589793, 4, 3.1416 },
{ 3.141592653589793, 5, 3.14159 },
{ 3.141592653589793, 6, 3.141593 },
{ 3.141592653589793, 7, 3.1415927 },
{ 3.141592653589793, 8, 3.14159265 },
{ 3.141592653589793, 9, 3.141592654 },
{ 3.141592653589793, 10, 3.1415926536 },
{ 3.141592653589793, 11, 3.14159265359 },
{ 3.141592653589793, 12, 3.14159265359 },
{ 3.141592653589793, 13, 3.1415926535898 },
{ 3.141592653589793, 14, 3.14159265358979 },
{ 3.141592653589793, 15, 3.141592653589793 },
{ 3.141592653589793, 16, 3.141592653589793 },
{ 3.141592653589793, 17, 3.141592653589793 },
{ 3.141592653589793, 18, 3.141592653589793 },
{ 3.141592653589793, 19, 3.141592653589793 },
/* We have tiny differences in precision at k=20; not worth worrying about. */
{ 3.141592653589793, 21, 3.141592653589793 },
{ 3.141592653589793, 22, 3.141592653589793 },
{ 3.141592653589793, 23, 3.141592653589793 },
{ 0.3333333333333333, 0, 0.0 },
{ 0.3333333333333333, 1, 0.3 },
{ 0.3333333333333333, 2, 0.33 },
{ 0.3333333333333333, 3, 0.333 },
{ 0.3333333333333333, 4, 0.3333 },
{ 0.3333333333333333, 5, 0.33333 },
{ 0.3333333333333333, 6, 0.333333 },
{ 0.3333333333333333, 7, 0.3333333 },
{ 0.3333333333333333, 8, 0.33333333 },
{ 0.3333333333333333, 9, 0.333333333 },
{ 0.3333333333333333, 10, 0.3333333333 },
{ 0.3333333333333333, 11, 0.33333333333 },
{ 0.3333333333333333, 12, 0.333333333333 },
{ 0.3333333333333333, 13, 0.3333333333333 },
{ 0.3333333333333333, 14, 0.33333333333333 },
{ 0.3333333333333333, 15, 0.333333333333333 },
{ 0.3333333333333333, 16, 0.3333333333333333 },
{ 0.3333333333333333, 17, 0.3333333333333333 },
{ 0.3333333333333333, 18, 0.3333333333333333 },
{ 0.3333333333333333, 19, 0.3333333333333333 },
{ 0.3333333333333333, 20, 0.3333333333333333 },
{ 0.3333333333333333, 21, 0.3333333333333333 },
{ 0.3333333333333333, 22, 0.3333333333333333 },
{ 0.3333333333333333, 23, 0.3333333333333333 },
{ 0.6666666666666666, 0, 1.0 },
{ 0.6666666666666666, 1, 0.7 },
{ 0.6666666666666666, 2, 0.67 },
{ 0.6666666666666666, 3, 0.667 },
{ 0.6666666666666666, 4, 0.6667 },
{ 0.6666666666666666, 5, 0.66667 },
{ 0.6666666666666666, 6, 0.666667 },
{ 0.6666666666666666, 7, 0.6666667 },
{ 0.6666666666666666, 8, 0.66666667 },
{ 0.6666666666666666, 9, 0.666666667 },
{ 0.6666666666666666, 10, 0.6666666667 },
{ 0.6666666666666666, 11, 0.66666666667 },
{ 0.6666666666666666, 12, 0.666666666667 },
{ 0.6666666666666666, 13, 0.6666666666667 },
{ 0.6666666666666666, 14, 0.66666666666667 },
{ 0.6666666666666666, 15, 0.666666666666667 },
{ 0.6666666666666666, 16, 0.6666666666666666 },
{ 0.6666666666666666, 17, 0.6666666666666666 },
{ 0.6666666666666666, 18, 0.6666666666666666 },
{ 0.6666666666666666, 19, 0.6666666666666666 },
{ 0.6666666666666666, 20, 0.6666666666666666 },
{ 0.6666666666666666, 21, 0.6666666666666666 },
{ 0.6666666666666666, 22, 0.6666666666666666 },
{ 0.6666666666666666, 23, 0.6666666666666666 },
{ 0.07692307692307693, 0, 0.0 },
{ 0.07692307692307693, 1, 0.1 },
{ 0.07692307692307693, 2, 0.08 },
{ 0.07692307692307693, 3, 0.077 },
{ 0.07692307692307693, 4, 0.0769 },
{ 0.07692307692307693, 5, 0.07692 },
{ 0.07692307692307693, 6, 0.076923 },
{ 0.07692307692307693, 7, 0.0769231 },
{ 0.07692307692307693, 8, 0.07692308 },
{ 0.07692307692307693, 9, 0.076923077 },
{ 0.07692307692307693, 10, 0.0769230769 },
{ 0.07692307692307693, 11, 0.07692307692 },
{ 0.07692307692307693, 12, 0.076923076923 },
{ 0.07692307692307693, 13, 0.0769230769231 },
{ 0.07692307692307693, 14, 0.07692307692308 },
{ 0.07692307692307693, 15, 0.076923076923077 },
{ 0.07692307692307693, 16, 0.0769230769230769 },
{ 0.07692307692307693, 17, 0.07692307692307693 },
{ 0.07692307692307693, 18, 0.07692307692307693 },
{ 0.07692307692307693, 19, 0.07692307692307693 },
{ 0.07692307692307693, 20, 0.07692307692307693 },
/* Tiny difference in precision at k=21 */
{ 0.07692307692307693, 22, 0.07692307692307693 },
{ 0.07692307692307693, 23, 0.07692307692307693 },
{ 1e-21, 0, 0.0 },
{ 1e-21, 1, 0.0 },
{ 1e-21, 2, 0.0 },
{ 1e-21, 3, 0.0 },
{ 1e-21, 4, 0.0 },
{ 1e-21, 5, 0.0 },
{ 1e-21, 6, 0.0 },
{ 1e-21, 7, 0.0 },
{ 1e-21, 8, 0.0 },
{ 1e-21, 9, 0.0 },
{ 1e-21, 10, 0.0 },
{ 1e-21, 11, 0.0 },
{ 1e-21, 12, 0.0 },
{ 1e-21, 13, 0.0 },
{ 1e-21, 14, 0.0 },
{ 1e-21, 15, 0.0 },
{ 1e-21, 16, 0.0 },
{ 1e-21, 17, 0.0 },
{ 1e-21, 18, 0.0 },
{ 1e-21, 19, 0.0 },
{ 1e-21, 20, 0.0 },
{ 1e-21, 21, 1e-21 },
{ 1e-21, 22, 1e-21 },
{ 1e-21, 23, 1e-21 },
{ 1e-10, 0, 0.0 },
{ 1e-10, 1, 0.0 },
{ 1e-10, 2, 0.0 },
{ 1e-10, 3, 0.0 },
{ 1e-10, 4, 0.0 },
{ 1e-10, 5, 0.0 },
{ 1e-10, 6, 0.0 },
{ 1e-10, 7, 0.0 },
{ 1e-10, 8, 0.0 },
{ 1e-10, 9, 0.0 },
{ 1e-10, 10, 1e-10 },
{ 1e-10, 11, 1e-10 },
{ 1e-10, 12, 1e-10 },
{ 1e-10, 13, 1e-10 },
{ 1e-10, 14, 1e-10 },
{ 1e-10, 15, 1e-10 },
{ 1e-10, 16, 1e-10 },
{ 1e-10, 17, 1e-10 },
{ 1e-10, 18, 1e-10 },
{ 1e-10, 19, 1e-10 },
{ 1e-10, 20, 1e-10 },
{ 1e-10, 21, 1e-10 },
{ 1e-10, 22, 1e-10 },
{ 1e-10, 23, 1e-10 },
{ 3.141592653589793e-08, 0, 0.0 },
{ 3.141592653589793e-08, 1, 0.0 },
{ 3.141592653589793e-08, 2, 0.0 },
{ 3.141592653589793e-08, 3, 0.0 },
{ 3.141592653589793e-08, 4, 0.0 },
{ 3.141592653589793e-08, 5, 0.0 },
{ 3.141592653589793e-08, 6, 0.0 },
{ 3.141592653589793e-08, 7, 0.0 },
{ 3.141592653589793e-08, 8, 3e-08 },
{ 3.141592653589793e-08, 9, 3.1e-08 },
{ 3.141592653589793e-08, 10, 3.14e-08 },
{ 3.141592653589793e-08, 11, 3.142e-08 },
{ 3.141592653589793e-08, 12, 3.1416e-08 },
{ 3.141592653589793e-08, 13, 3.14159e-08 },
{ 3.141592653589793e-08, 14, 3.141593e-08 },
{ 3.141592653589793e-08, 15, 3.1415927e-08 },
{ 3.141592653589793e-08, 16, 3.14159265e-08 },
{ 3.141592653589793e-08, 17, 3.141592654e-08 },
{ 3.141592653589793e-08, 18, 3.1415926536e-08 },
{ 3.141592653589793e-08, 19, 3.14159265359e-08 },
{ 3.141592653589793e-08, 20, 3.14159265359e-08 },
{ 3.141592653589793e-08, 21, 3.1415926535898e-08 },
/* Tiny precision mismatch at k=22 */
{ 3.141592653589793e-08, 23, 3.141592653589793e-08 },
};
size_t num_test_cases = sizeof(test_cases) / sizeof(*test_cases);
size_t j;
for (j = 0; j < num_test_cases; j++) {
CU_ASSERT_EQUAL_FATAL(tsk_round(test_cases[j].source, test_cases[j].num_digits),
test_cases[j].result);
}
}
static void
test_blkalloc(void)
{
tsk_blkalloc_t alloc;
int ret;
size_t j, block_size;
void *mem;
ret = tsk_blkalloc_init(&alloc, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_blkalloc_free(&alloc);
for (block_size = 1; block_size < 10; block_size++) {
ret = tsk_blkalloc_init(&alloc, block_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < 10; j++) {
mem = tsk_blkalloc_get(&alloc, block_size);
CU_ASSERT_TRUE(mem != NULL);
CU_ASSERT_EQUAL(alloc.num_chunks, j + 1);
tsk_memset(mem, 0, block_size);
}
mem = tsk_blkalloc_get(&alloc, block_size + 1);
CU_ASSERT_EQUAL(mem, NULL);
mem = tsk_blkalloc_get(&alloc, block_size + 2);
CU_ASSERT_EQUAL(mem, NULL);
tsk_blkalloc_print_state(&alloc, _devnull);
tsk_blkalloc_free(&alloc);
}
/* Allocate awkward sized chunk */
ret = tsk_blkalloc_init(&alloc, 100);
CU_ASSERT_EQUAL_FATAL(ret, 0);
mem = tsk_blkalloc_get(&alloc, 90);
CU_ASSERT_FATAL(mem != NULL);
tsk_memset(mem, 0, 90);
mem = tsk_blkalloc_get(&alloc, 10);
CU_ASSERT_FATAL(mem != NULL);
tsk_memset(mem, 0, 10);
CU_ASSERT_EQUAL(alloc.num_chunks, 1);
mem = tsk_blkalloc_get(&alloc, 90);
CU_ASSERT_FATAL(mem != NULL);
tsk_memset(mem, 0, 90);
CU_ASSERT_EQUAL(alloc.num_chunks, 2);
mem = tsk_blkalloc_get(&alloc, 11);
CU_ASSERT_FATAL(mem != NULL);
tsk_memset(mem, 0, 11);
CU_ASSERT_EQUAL(alloc.num_chunks, 3);
tsk_blkalloc_free(&alloc);
}
static void
test_unknown_time(void)
{
CU_ASSERT_TRUE(tsk_isnan(TSK_UNKNOWN_TIME));
CU_ASSERT_TRUE(tsk_is_unknown_time(TSK_UNKNOWN_TIME));
CU_ASSERT_FALSE(tsk_is_unknown_time(NAN));
CU_ASSERT_FALSE(tsk_is_unknown_time(0));
CU_ASSERT_FALSE(tsk_is_unknown_time(INFINITY));
CU_ASSERT_FALSE(tsk_is_unknown_time(1));
}
static void
test_malloc_zero(void)
{
void *p = tsk_malloc(0);
CU_ASSERT_FATAL(p != NULL);
free(p);
p = tsk_calloc(0, 1);
CU_ASSERT_FATAL(p != NULL);
free(p);
}
static void
test_malloc_overflow(void)
{
#if TSK_MAX_SIZE > SIZE_MAX
tsk_size_t size_max = SIZE_MAX;
void *p = tsk_malloc(size_max + 1);
CU_ASSERT_FATAL(p == NULL);
p = tsk_calloc(size_max + 1, 1);
CU_ASSERT_FATAL(p == NULL);
#endif
}
static void
test_debug_stream(void)
{
FILE *f = fopen(_tmp_file_name, "w");
CU_ASSERT_FATAL(tsk_get_debug_stream() == stdout);
CU_ASSERT_FATAL(tsk_get_debug_stream() == stdout);
tsk_set_debug_stream(f);
CU_ASSERT_FATAL(tsk_get_debug_stream() == f);
tsk_set_debug_stream(stdout);
CU_ASSERT_FATAL(tsk_get_debug_stream() == stdout);
fclose(f);
}
static int
validate_avl_node(tsk_avl_node_int_t *node)
{
int height, lheight, rheight;
if (node == NULL) {
return 0;
}
lheight = validate_avl_node(node->llink);
rheight = validate_avl_node(node->rlink);
height = 1 + TSK_MAX(lheight, rheight);
if (lheight != 0 && rheight != 0) {
CU_ASSERT_FATAL(node->balance == rheight - lheight);
} else if (lheight == 0 && rheight == 0) {
CU_ASSERT_FATAL(height == 1);
CU_ASSERT_FATAL(node->balance == 0);
} else {
CU_ASSERT_FATAL(height == 2);
if (lheight == 0) {
CU_ASSERT_FATAL(node->balance == 1);
} else {
CU_ASSERT_FATAL(node->balance == -1);
}
}
return height;
}
static void
test_avl_empty(void)
{
int height;
tsk_avl_tree_int_t tree;
tsk_avl_tree_int_init(&tree);
height = validate_avl_node(tree.head.rlink);
CU_ASSERT_EQUAL((tsk_size_t) height, tree.height);
CU_ASSERT_EQUAL(0, tree.size);
tsk_avl_tree_int_print_state(&tree, _devnull);
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, -1), NULL);
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, 0), NULL);
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, 1), NULL);
tsk_avl_tree_int_free(&tree);
}
static void
validate_avl(size_t num_keys, int64_t *keys)
{
size_t j, k;
int ret, height;
tsk_avl_tree_int_t tree;
tsk_avl_node_int_t *nodes = malloc(num_keys * sizeof(*nodes));
tsk_avl_node_int_t **ordered_nodes = malloc(num_keys * sizeof(*ordered_nodes));
tsk_avl_node_int_t *node;
tsk_avl_node_int_t tmp_node;
CU_ASSERT_FATAL(nodes != NULL);
CU_ASSERT_FATAL(ordered_nodes != NULL);
tsk_avl_tree_int_init(&tree);
/* Assumes the keys are unique */
for (j = 0; j < num_keys; j++) {
node = nodes + j;
node->key = keys[j];
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, keys[j]), NULL);
ret = tsk_avl_tree_int_insert(&tree, node);
CU_ASSERT_FATAL(ret == 0);
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, keys[j]), node);
tmp_node.key = keys[j];
ret = tsk_avl_tree_int_insert(&tree, &tmp_node);
CU_ASSERT_FATAL(ret == 1);
height = validate_avl_node(tree.head.rlink);
CU_ASSERT_EQUAL((tsk_size_t) height, tree.height);
CU_ASSERT_EQUAL(j + 1, tree.size);
tsk_avl_tree_int_print_state(&tree, _devnull);
for (k = j + 1; k < num_keys; k++) {
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&tree, keys[k]), NULL);
}
}
tsk_avl_tree_int_ordered_nodes(&tree, ordered_nodes);
for (j = 0; j < num_keys; j++) {
if (j > 0) {
CU_ASSERT_FATAL(ordered_nodes[j - 1]->key < ordered_nodes[j]->key);
}
}
tsk_avl_tree_int_free(&tree);
free(nodes);
free(ordered_nodes);
}
static void
test_avl_sequential(void)
{
int64_t keys[] = { 0, 1, 2, 3, 4, 5, 6, 7 };
int64_t reversed_keys[] = { 7, 6, 5, 4, 3, 2, 1, 0 };
validate_avl(8, keys);
validate_avl(8, reversed_keys);
}
static void
test_avl_interleaved(void)
{
size_t num_keys = 100;
size_t j;
int64_t *keys = malloc(num_keys * sizeof(*keys));
CU_ASSERT_FATAL(keys != NULL);
for (j = 0; j < num_keys; j++) {
keys[j] = (int64_t) j;
if (j % 2 == 0) {
keys[j] *= -1;
}
}
validate_avl(num_keys, keys);
free(keys);
}
static void
test_avl_random(void)
{
/* This example goes through all the code paths in the AVL insert algorithm */
int64_t keys[] = { 2, 79, -8, -86, 6, -29, 88, -80, 21, -26, -13, 16, -1, 3, 51, 30,
49, -48, -99, 57, -63, 29, 91, 87, 60, -43, -79, -12, -52, -42, 69, 89, 74, -50,
7, -46, -37, 34, -28, 66, -83, 31, -41, -87, -92, -11, -17, -9, 10, 98, 71, -93,
-66, -20, 63, -51, 33, -47, 5, -97, 90, 45, -57, 61, -6, -53, 99, -61, -19, -77,
53, 23, -60, 56, -56, -36, -30, 28, 35, -38, 38, 62, -68, 22, -96, -73, -89,
50 };
validate_avl(sizeof(keys) / sizeof(*keys), keys);
}
static void
test_bit_arrays(void)
{
// NB: This test is only valid for the 32 bit implementation of bit arrays. If we
// were to change the chunk size of a bit array, we'd need to update these tests
tsk_bitset_t arr;
tsk_id_t items_truth[64] = { 0 }, items[64] = { 0 };
tsk_size_t n_items = 0, n_items_truth = 0;
// test item retrieval
tsk_bitset_init(&arr, 90, 1);
CU_ASSERT_EQUAL_FATAL(arr.len, 1);
CU_ASSERT_EQUAL_FATAL(arr.row_len, 3);
tsk_bitset_get_items(&arr, 0, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);
for (tsk_bitset_val_t i = 0; i < 20; i++) {
tsk_bitset_set_bit(&arr, 0, i);
items_truth[n_items_truth] = (tsk_id_t) i;
n_items_truth++;
}
tsk_bitset_set_bit(&arr, 0, 63);
tsk_bitset_set_bit(&arr, 0, 65);
// these assertions are only valid for 32-bit values
CU_ASSERT_EQUAL_FATAL(arr.data[0], 1048575);
CU_ASSERT_EQUAL_FATAL(arr.data[1], 2147483648);
CU_ASSERT_EQUAL_FATAL(arr.data[2], 2);
// verify our assumptions about bit array counting
CU_ASSERT_EQUAL_FATAL(tsk_bitset_count(&arr, 0), 22);
tsk_bitset_get_items(&arr, 0, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);
tsk_memset(items, 0, 64);
tsk_memset(items_truth, 0, 64);
n_items = n_items_truth = 0;
tsk_bitset_free(&arr);
// create a length-2 array with 64 bit capacity (two chunks per row)
tsk_bitset_init(&arr, 64, 2);
CU_ASSERT_EQUAL_FATAL(arr.len, 2);
CU_ASSERT_EQUAL_FATAL(arr.row_len, 2);
// fill the first 50 bits of the first row
for (tsk_bitset_val_t i = 0; i < 50; i++) {
tsk_bitset_set_bit(&arr, 0, i);
items_truth[n_items_truth] = (tsk_id_t) i;
n_items_truth++;
}
tsk_bitset_get_items(&arr, 0, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);
tsk_memset(items, 0, 64);
tsk_memset(items_truth, 0, 64);
n_items = n_items_truth = 0;
// fill bits 20-40 of the second row
for (tsk_bitset_val_t i = 20; i < 40; i++) {
tsk_bitset_set_bit(&arr, 1, i);
items_truth[n_items_truth] = (tsk_id_t) i;
n_items_truth++;
}
tsk_bitset_get_items(&arr, 1, items, &n_items);
assert_arrays_equal(n_items_truth, items, items_truth);
tsk_memset(items, 0, 64);
tsk_memset(items_truth, 0, 64);
n_items = n_items_truth = 0;
// verify our assumptions about row selection
CU_ASSERT_EQUAL_FATAL(arr.data[0], 4294967295); // row1 elem1
CU_ASSERT_EQUAL_FATAL(arr.data[1], 262143); // row1 elem2
CU_ASSERT_EQUAL_FATAL(arr.data[2], 4293918720); // row2 elem1
CU_ASSERT_EQUAL_FATAL(arr.data[3], 255); // row2 elem2
// subtract the second from the first row, store in first
tsk_bitset_subtract(&arr, 0, &arr, 1);
// verify our assumptions about subtraction
CU_ASSERT_EQUAL_FATAL(arr.data[0], 1048575);
CU_ASSERT_EQUAL_FATAL(arr.data[1], 261888);
tsk_bitset_t int_result;
tsk_bitset_init(&int_result, 64, 1);
CU_ASSERT_EQUAL_FATAL(int_result.len, 1);
CU_ASSERT_EQUAL_FATAL(int_result.row_len, 2);
// their intersection should be zero
tsk_bitset_intersect(&arr, 0, &arr, 1, &int_result);
CU_ASSERT_EQUAL_FATAL(int_result.data[0], 0);
CU_ASSERT_EQUAL_FATAL(int_result.data[1], 0);
// now, add them back together, storing back in a
tsk_bitset_union(&arr, 0, &arr, 1);
// now, their intersection should be the subtracted chunk (20-40)
tsk_bitset_intersect(&arr, 0, &arr, 1, &int_result);
CU_ASSERT_EQUAL_FATAL(int_result.data[0], 4293918720);
CU_ASSERT_EQUAL_FATAL(int_result.data[1], 255);
tsk_bitset_free(&int_result);
tsk_bitset_free(&arr);
}
static void
test_meson_version(void)
{
char version[100];
sprintf(
version, "%d.%d.%d", TSK_VERSION_MAJOR, TSK_VERSION_MINOR, TSK_VERSION_PATCH);
/* the MESON_PROJECT_VERSION define is passed in by meson when compiling */
CU_ASSERT_STRING_EQUAL(version, MESON_PROJECT_VERSION);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_strerror", test_strerror },
{ "test_strerror_kastore", test_strerror_kastore },
{ "test_generate_uuid", test_generate_uuid },
{ "test_double_round", test_double_round },
{ "test_blkalloc", test_blkalloc },
{ "test_unknown_time", test_unknown_time },
{ "test_malloc_zero", test_malloc_zero },
{ "test_malloc_overflow", test_malloc_overflow },
{ "test_debug_stream", test_debug_stream },
{ "test_avl_empty", test_avl_empty },
{ "test_avl_sequential", test_avl_sequential },
{ "test_avl_interleaved", test_avl_interleaved },
{ "test_avl_random", test_avl_random },
{ "test_bit_arrays", test_bit_arrays },
{ "test_meson_version", test_meson_version },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_file_format.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, mergetest, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
typedef struct {
const char *name;
void *array;
tsk_size_t len;
int type;
} write_table_col_t;
static void
write_table_cols(kastore_t *store, write_table_col_t *write_cols, size_t num_cols)
{
size_t j;
int ret;
for (j = 0; j < num_cols; j++) {
ret = kastore_puts(store, write_cols[j].name, write_cols[j].array,
(size_t) write_cols[j].len, write_cols[j].type, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
}
static void
copy_store_drop_columns(
tsk_treeseq_t *ts, size_t num_drop_cols, const char **drop_cols, const char *outfile)
{
int ret = 0;
char tmpfile[] = "/tmp/tsk_c_test_copy_XXXXXX";
int fd;
kastore_t read_store, write_store;
kaitem_t *item;
size_t j, k;
bool keep;
fd = mkstemp(tmpfile);
CU_ASSERT_FATAL(fd != -1);
close(fd);
ret = tsk_treeseq_dump(ts, tmpfile, 0);
if (ret != 0) {
unlink(tmpfile);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
ret = kastore_open(&read_store, tmpfile, "r", KAS_READ_ALL);
/* We can now unlink the file as either kastore has read it all, or failed */
unlink(tmpfile);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_open(&write_store, outfile, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Note: this API is not a documented part of kastore, so may be subject to
* change. */
for (j = 0; j < read_store.num_items; j++) {
item = &read_store.items[j];
keep = true;
for (k = 0; k < num_drop_cols; k++) {
if (strlen(drop_cols[k]) == item->key_len
&& strncmp(drop_cols[k], item->key, item->key_len) == 0) {
keep = false;
break;
}
}
if (keep) {
ret = kastore_put(&write_store, item->key, item->key_len, item->array,
item->array_len, item->type, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
}
kastore_close(&read_store);
ret = kastore_close(&write_store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
static void
test_format_data_load_errors(void)
{
size_t uuid_size = 36;
char uuid[uuid_size];
char format_name[TSK_FILE_FORMAT_NAME_LENGTH];
double L[2];
uint32_t version[2]
= { TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR };
write_table_col_t write_cols[] = {
{ "format/name", (void *) format_name, sizeof(format_name), KAS_INT8 },
{ "format/version", (void *) version, 2, KAS_UINT32 },
{ "sequence_length", (void *) L, 1, KAS_FLOAT64 },
{ "uuid", (void *) uuid, (tsk_size_t) uuid_size, KAS_INT8 },
};
tsk_table_collection_t tables;
kastore_t store;
size_t j;
int ret;
L[0] = 1;
L[1] = 0;
tsk_memcpy(format_name, TSK_FILE_FORMAT_NAME, sizeof(format_name));
/* Note: this will fail if we ever start parsing the form of the UUID */
tsk_memset(uuid, 0, uuid_size);
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
/* We've only defined the format headers, so we should fail immediately
* after with required columns not found */
CU_ASSERT_FALSE(tsk_is_kas_error(ret));
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Version too old */
version[0] = TSK_FILE_FORMAT_VERSION_MAJOR - 1;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_VERSION_TOO_OLD);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Version too new */
version[0] = TSK_FILE_FORMAT_VERSION_MAJOR + 1;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_VERSION_TOO_NEW);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
version[0] = TSK_FILE_FORMAT_VERSION_MAJOR;
/* Bad version length */
write_cols[1].len = 0;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[1].len = 2;
/* Bad format name length */
write_cols[0].len = 0;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[0].len = TSK_FILE_FORMAT_NAME_LENGTH;
/* Bad format name */
format_name[0] = 'X';
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
format_name[0] = 't';
/* Bad type for sequence length. */
write_cols[2].type = KAS_FLOAT32;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_TRUE(tsk_is_kas_error(ret));
CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_TYPE_MISMATCH);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[2].type = KAS_FLOAT64;
/* Bad length for sequence length. */
write_cols[2].len = 2;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[2].len = 1;
/* Bad value for sequence length. */
L[0] = -1;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
L[0] = 1;
/* Wrong length for uuid */
write_cols[3].len = 1;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[3].len = (tsk_size_t) uuid_size;
/* Missing keys */
for (j = 0; j < sizeof(write_cols) / sizeof(*write_cols) - 1; j++) {
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, j);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
}
static void
test_missing_optional_column_pairs(void)
{
int ret;
size_t j;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1, t2;
const char *required_cols[][2] = { { "edges/metadata", "edges/metadata_offset" },
{ "migrations/metadata", "migrations/metadata_offset" },
{ "individuals/parents", "individuals/parents_offset" } };
const char *drop_cols[2];
ret = tsk_treeseq_copy_tables(ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) {
drop_cols[0] = required_cols[j][0];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);
tsk_table_collection_free(&t2);
drop_cols[0] = required_cols[j][1];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);
tsk_table_collection_free(&t2);
drop_cols[0] = required_cols[j][0];
drop_cols[1] = required_cols[j][1];
copy_store_drop_columns(ts, 2, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
}
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_missing_required_column_pairs(void)
{
int ret;
size_t j;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t;
const char *required_cols[][2] = {
{ "individuals/location", "individuals/location_offset" },
{ "individuals/metadata", "individuals/metadata_offset" },
{ "mutations/derived_state", "mutations/derived_state_offset" },
{ "mutations/metadata", "mutations/metadata_offset" },
{ "nodes/metadata", "nodes/metadata_offset" },
{ "populations/metadata", "populations/metadata_offset" },
{ "provenances/record", "provenances/record_offset" },
{ "provenances/timestamp", "provenances/timestamp_offset" },
{ "sites/ancestral_state", "sites/ancestral_state_offset" },
{ "sites/metadata", "sites/metadata_offset" },
};
const char *drop_cols[2];
for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) {
drop_cols[0] = required_cols[j][0];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);
tsk_table_collection_free(&t);
drop_cols[0] = required_cols[j][1];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);
tsk_table_collection_free(&t);
copy_store_drop_columns(ts, 2, required_cols[j], _tmp_file_name);
ret = tsk_table_collection_load(&t, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);
tsk_table_collection_free(&t);
}
tsk_treeseq_free(ts);
free(ts);
}
static void
verify_bad_offset_columns(tsk_treeseq_t *ts, const char *offset_col)
{
int ret = 0;
kastore_t store;
tsk_table_collection_t tables;
uint32_t *offset_array, *offset_copy;
size_t offset_len;
int type;
uint32_t data_len;
ret = tsk_treeseq_dump(ts, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_open(&store, _tmp_file_name, "r", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_gets(&store, offset_col, (void **) &offset_array, &offset_len, &type);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(type, KAS_UINT32);
offset_copy = malloc(offset_len * sizeof(*offset_array));
CU_ASSERT_FATAL(offset_copy != NULL);
tsk_memcpy(offset_copy, offset_array, offset_len * sizeof(*offset_array));
data_len = offset_array[offset_len - 1];
CU_ASSERT_TRUE(data_len > 0);
kastore_close(&store);
offset_copy[0] = UINT32_MAX;
copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_UINT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_OFFSET);
tsk_table_collection_free(&tables);
offset_copy[0] = 0;
offset_copy[offset_len - 1] = 0;
copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_UINT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_OFFSET);
tsk_table_collection_free(&tables);
offset_copy[offset_len - 1] = data_len + 1;
copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_UINT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_OFFSET);
tsk_table_collection_free(&tables);
copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, offset_col, NULL, 0, KAS_UINT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
tsk_table_collection_free(&tables);
copy_store_drop_columns(ts, 1, &offset_col, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, offset_col, offset_copy, offset_len, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_table_collection_free(&tables);
free(offset_copy);
}
static void
test_bad_offset_columns(void)
{
size_t j;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
/* We exclude "provenances/timestamp_offset" here because there are no
* non-ragged columns in the provenances table, so this doesn't quite
* fit into the same pattern as the other tables */
const char *cols[] = {
"edges/metadata_offset",
"migrations/metadata_offset",
"individuals/location_offset",
"individuals/parents_offset",
"individuals/metadata_offset",
"mutations/derived_state_offset",
"mutations/metadata_offset",
"nodes/metadata_offset",
"populations/metadata_offset",
"provenances/record_offset",
"sites/ancestral_state_offset",
"sites/metadata_offset",
};
for (j = 0; j < sizeof(cols) / sizeof(*cols); j++) {
verify_bad_offset_columns(ts, cols[j]);
}
tsk_treeseq_free(ts);
free(ts);
}
static void
test_force_offset_64(void)
{
int ret;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1;
tsk_table_collection_t t2;
kastore_t store;
kaitem_t *item;
const char *suffix;
const char *offset_str = "_offset";
int num_found = 0;
size_t j;
ret = tsk_treeseq_dump(ts, _tmp_file_name, TSK_DUMP_FORCE_OFFSET_64);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_open(&store, _tmp_file_name, "r", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < store.num_items; j++) {
item = &store.items[j];
/* Does the key end in "_offset"? */
if (item->key_len > strlen(offset_str)) {
suffix = item->key + (item->key_len - strlen(offset_str));
if (strncmp(suffix, offset_str, strlen(offset_str)) == 0) {
CU_ASSERT_EQUAL(item->type, KAS_UINT64);
num_found++;
}
}
}
CU_ASSERT_TRUE(num_found > 0);
kastore_close(&store);
ret = tsk_table_collection_load(&t1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_copy_tables(ts, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_missing_indexes(void)
{
int ret;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1, t2;
const char *cols[]
= { "indexes/edge_insertion_order", "indexes/edge_removal_order" };
const char *drop_cols[2];
ret = tsk_treeseq_copy_tables(ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
drop_cols[0] = cols[0];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);
tsk_table_collection_free(&t2);
drop_cols[0] = cols[1];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BOTH_COLUMNS_REQUIRED);
tsk_table_collection_free(&t2);
copy_store_drop_columns(ts, 2, cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
CU_ASSERT_FALSE(tsk_table_collection_has_index(&t2, 0));
tsk_table_collection_free(&t2);
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_malformed_indexes(void)
{
int ret;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t tables;
tsk_treeseq_t ts2;
tsk_size_t num_edges = tsk_treeseq_get_num_edges(ts);
tsk_id_t *bad_index = tsk_calloc(num_edges, sizeof(tsk_id_t));
tsk_id_t *good_index = tsk_calloc(num_edges, sizeof(tsk_id_t));
kastore_t store;
const char *cols[]
= { "indexes/edge_insertion_order", "indexes/edge_removal_order" };
CU_ASSERT_FATAL(bad_index != NULL);
CU_ASSERT_FATAL(good_index != NULL);
/* If both columns are not the same length as the number of edges we
* should raise an error */
copy_store_drop_columns(ts, 2, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[0], NULL, 0, TSK_ID_STORAGE_TYPE, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[1], NULL, 0, TSK_ID_STORAGE_TYPE, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
tsk_table_collection_free(&tables);
bad_index[0] = -1;
copy_store_drop_columns(ts, 2, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(
&store, cols[0], good_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(
&store, cols[1], bad_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts2);
copy_store_drop_columns(ts, 2, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(
&store, cols[0], bad_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(
&store, cols[1], good_index, (size_t) num_edges, TSK_ID_STORAGE_TYPE, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts2);
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[0], bad_index, (size_t) num_edges, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_treeseq_free(&ts2);
free(good_index);
free(bad_index);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_missing_reference_sequence(void)
{
int ret;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1, t2;
const char *cols[] = { "reference_sequence/data", "reference_sequence/url",
"reference_sequence/metadata_schema", "reference_sequence/metadata" };
CU_ASSERT_TRUE(tsk_treeseq_has_reference_sequence(ts));
ret = tsk_treeseq_copy_tables(ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&t2));
tsk_table_collection_free(&t2);
copy_store_drop_columns(ts, 2, cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&t2));
tsk_table_collection_free(&t2);
copy_store_drop_columns(ts, 3, cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&t2));
tsk_table_collection_free(&t2);
/* Dropping all the columns gives us a NULL reference_sequence, though */
copy_store_drop_columns(ts, 4, cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&t2));
tsk_table_collection_free(&t2);
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_bad_column_types(void)
{
int ret;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t tables;
tsk_size_t num_edges = tsk_treeseq_get_num_edges(ts);
/* make sure we have enough memory in all cases */
tsk_id_t *col_memory = tsk_calloc(num_edges + 1, sizeof(double));
kastore_t store;
const char *cols[1];
CU_ASSERT_FATAL(col_memory != NULL);
cols[0] = "edges/left";
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[0], col_memory, (size_t) num_edges, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_table_collection_free(&tables);
cols[0] = "edges/metadata_offset";
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(
&store, cols[0], col_memory, (size_t) num_edges + 1, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_table_collection_free(&tables);
cols[0] = "edges/metadata";
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[0], NULL, 0, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_table_collection_free(&tables);
cols[0] = "edges/metadata_schema";
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[0], NULL, 0, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_table_collection_free(&tables);
cols[0] = "reference_sequence/metadata";
copy_store_drop_columns(ts, 1, cols, _tmp_file_name);
ret = kastore_open(&store, _tmp_file_name, "a", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_puts(&store, cols[0], NULL, 0, KAS_FLOAT32, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
tsk_table_collection_free(&tables);
free(col_memory);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_missing_required_columns(void)
{
int ret;
size_t j;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t;
const char *required_cols[] = {
"edges/child",
"edges/left",
"edges/parent",
"edges/right",
"format/name",
"format/version",
"individuals/flags",
"migrations/dest",
"migrations/left",
"migrations/node",
"migrations/right",
"migrations/source",
"migrations/time",
"mutations/node",
"mutations/parent",
"mutations/site",
"nodes/flags",
"nodes/individual",
"nodes/population",
"nodes/time",
"sequence_length",
"sites/position",
"uuid",
};
const char *drop_cols[1];
for (j = 0; j < sizeof(required_cols) / sizeof(*required_cols); j++) {
drop_cols[0] = required_cols[j];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);
tsk_table_collection_free(&t);
}
tsk_treeseq_free(ts);
free(ts);
}
static void
test_metadata_schemas_optional(void)
{
int ret;
size_t j;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1, t2;
const char *cols[] = {
"metadata",
"metadata_schema",
"reference_sequence/metadata",
"reference_sequence/metadata_schema",
"individuals/metadata_schema",
"populations/metadata_schema",
"nodes/metadata_schema",
"edges/metadata_schema",
"sites/metadata_schema",
"mutations/metadata_schema",
"migrations/metadata_schema",
};
const char *drop_cols[1];
ret = tsk_treeseq_copy_tables(ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < sizeof(cols) / sizeof(*cols); j++) {
drop_cols[0] = cols[j];
copy_store_drop_columns(ts, 1, drop_cols, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* metadata schemas are included in data comparisons */
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
}
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts);
free(ts);
}
/* This test is problematic on windows because of the different off_t
* types. Doesn't seem worth the trouble of getting it working.
*/
static void
test_load_bad_file_formats(void)
{
#if !defined(_WIN32)
tsk_table_collection_t tables;
tsk_treeseq_t ts;
int ret, ret2;
off_t offset;
FILE *f;
/* A zero byte file is TSK_ERR_EOF */
f = fopen(_tmp_file_name, "w+");
ret = tsk_table_collection_loadf(&tables, f, 0);
ret2 = tsk_treeseq_loadf(&ts, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, ret2);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
fclose(f);
for (offset = 1; offset < 100; offset++) {
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
ret = tsk_table_collection_dump(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret2 = truncate(_tmp_file_name, offset);
CU_ASSERT_EQUAL_FATAL(ret2, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_BAD_FILE_FORMAT);
tsk_table_collection_free(&tables);
}
#endif
}
static void
test_load_errors(void)
{
tsk_table_collection_t tables;
tsk_treeseq_t ts;
int ret, ret2;
const char *str;
FILE *f;
ret = tsk_table_collection_load(&tables, "/", 0);
ret2 = tsk_treeseq_load(&ts, "/", 0);
CU_ASSERT_EQUAL_FATAL(ret, ret2);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
CU_ASSERT_STRING_EQUAL(str, strerror(EISDIR));
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
ret = tsk_table_collection_load(&tables, "/bin/theres_no_way_this_file_exists", 0);
ret2 = tsk_treeseq_load(&ts, "/bin/theres_no_way_this_file_exists", 0);
CU_ASSERT_EQUAL_FATAL(ret, ret2);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
CU_ASSERT_STRING_EQUAL(str, strerror(ENOENT));
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
ret = tsk_table_collection_load(&tables, "/bin/sh", 0);
ret2 = tsk_treeseq_load(&ts, "/bin/sh", 0);
CU_ASSERT_EQUAL_FATAL(ret, ret2);
CU_ASSERT_TRUE(tsk_is_kas_error(ret));
CU_ASSERT_EQUAL_FATAL(ret ^ (1 << TSK_KAS_ERR_BIT), KAS_ERR_BAD_FILE_FORMAT);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
tsk_table_collection_free(&tables);
/* open a file in the wrong mode */
f = fopen(_tmp_file_name, "w");
ret = tsk_table_collection_loadf(&tables, f, 0);
ret2 = tsk_treeseq_loadf(&ts, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, ret2);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
CU_ASSERT_STRING_EQUAL(str, strerror(EBADF));
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
fclose(f);
}
static void
test_load_eof(void)
{
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t tables;
int ret;
FILE *f;
f = fopen(_tmp_file_name, "w+");
CU_ASSERT_NOT_EQUAL(f, NULL);
ret = tsk_table_collection_loadf(&tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
fclose(f);
tsk_table_collection_free(&tables);
/* Reading an empty file also returns EOF */
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
tsk_table_collection_free(&tables);
f = fopen(_tmp_file_name, "w+");
CU_ASSERT_NOT_EQUAL(f, NULL);
ret = tsk_treeseq_dumpf(ts, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Reading from the end of the stream gives EOF */
ret = tsk_table_collection_loadf(&tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
tsk_table_collection_free(&tables);
/* Reading the start of the stream is fine */
fseek(f, 0, SEEK_SET);
ret = tsk_table_collection_loadf(&tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
/* And we should be back to the end of the stream */
ret = tsk_table_collection_loadf(&tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
tsk_table_collection_free(&tables);
/* Trying to read the same end stream should give the same
* result. */
ret = tsk_table_collection_loadf(&tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
tsk_table_collection_free(&tables);
/* A previously init'd tables should be good too */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_loadf(&tables, f, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EOF);
tsk_table_collection_free(&tables);
fclose(f);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_dump_errors(void)
{
tsk_table_collection_t tables;
int ret;
FILE *f;
const char *str;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
ret = tsk_table_collection_dump(&tables, "/", 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
CU_ASSERT_STRING_EQUAL(str, strerror(EISDIR));
/* We're assuming that we don't have write access to /bin, so don't run this
* as root! */
ret = tsk_table_collection_dump(&tables, "/bin/theres_no_way_this_file_exists", 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
CU_ASSERT_TRUE(
(strcmp(str, strerror(EACCES)) == 0) || (strcmp(str, strerror(EPERM)) == 0));
/* open a file in the wrong mode */
f = fopen(_tmp_file_name, "r");
ret = tsk_table_collection_dumpf(&tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IO);
str = tsk_strerror(ret);
CU_ASSERT_TRUE(strlen(str) > 0);
CU_ASSERT_STRING_EQUAL(str, strerror(EBADF));
fclose(f);
/* We'd like to catch close errors also, but it's hard to provoke them
* without intercepting calls to fclose() */
tsk_table_collection_free(&tables);
}
/* FIXME these are good tests, but we want to make them more general so that
* they can be applied to other tables.*/
static void
test_load_node_table_errors(void)
{
char format_name[TSK_FILE_FORMAT_NAME_LENGTH];
size_t uuid_size = 36;
char uuid[uuid_size];
double L = 1;
double time = 0;
double flags = 0;
tsk_id_t population = 0;
tsk_id_t individual = 0;
int8_t metadata = 0;
uint32_t metadata_offset[] = { 0, 1 };
uint32_t version[2]
= { TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR };
write_table_col_t write_cols[] = {
{ "nodes/time", (void *) &time, 1, KAS_FLOAT64 },
{ "nodes/flags", (void *) &flags, 1, TSK_FLAGS_STORAGE_TYPE },
{ "nodes/population", (void *) &population, 1, TSK_ID_STORAGE_TYPE },
{ "nodes/individual", (void *) &individual, 1, TSK_ID_STORAGE_TYPE },
{ "nodes/metadata", (void *) &metadata, 1, KAS_UINT8 },
{ "nodes/metadata_offset", (void *) metadata_offset, 2, KAS_UINT32 },
{ "format/name", (void *) format_name, sizeof(format_name), KAS_INT8 },
{ "format/version", (void *) version, 2, KAS_UINT32 },
{ "uuid", (void *) uuid, uuid_size, KAS_INT8 },
{ "sequence_length", (void *) &L, 1, KAS_FLOAT64 },
};
tsk_table_collection_t tables;
kastore_t store;
int ret;
tsk_memcpy(format_name, TSK_FILE_FORMAT_NAME, sizeof(format_name));
/* Note: this will fail if we ever start parsing the form of the UUID */
tsk_memset(uuid, 0, uuid_size);
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
/* We've only defined the format headers and nodes, so we should fail immediately
* after with key not found */
CU_ASSERT_FALSE(tsk_is_kas_error(ret));
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_REQUIRED_COL_NOT_FOUND);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Wrong type for time */
write_cols[0].type = KAS_INT64;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COLUMN_TYPE);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[0].type = KAS_FLOAT64;
/* Wrong length for flags */
write_cols[1].len = 0;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[1].len = 1;
/* Wrong length for metadata offset */
write_cols[5].len = 1;
ret = kastore_open(&store, _tmp_file_name, "w", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_table_cols(&store, write_cols, sizeof(write_cols) / sizeof(*write_cols));
ret = kastore_close(&store);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_FILE_FORMAT);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
write_cols[5].len = 2;
}
static void
test_example_round_trip(void)
{
int ret;
tsk_treeseq_t *ts1 = caterpillar_tree(5, 3, 3);
tsk_treeseq_t ts2;
tsk_table_collection_t t1, t2;
FILE *f;
ret = tsk_treeseq_copy_tables(ts1, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
/* Reading multiple times into the same tables with TSK_NO_INIT is supported. */
ret = tsk_table_collection_load(&t2, _tmp_file_name, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
/* Do the same thing with treeseq API */
remove(_tmp_file_name);
ret = tsk_treeseq_dump(ts1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_load(&ts2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));
tsk_treeseq_free(&ts2);
/* Use loadf form */
f = fopen(_tmp_file_name, "w+");
ret = tsk_table_collection_dumpf(&t1, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
fseek(f, 0, SEEK_SET);
ret = tsk_table_collection_loadf(&t2, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
fclose(f);
/* Do the same thing with treeseq API */
f = fopen(_tmp_file_name, "w+");
ret = tsk_treeseq_dumpf(ts1, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
fseek(f, 0, SEEK_SET);
ret = tsk_treeseq_loadf(&ts2, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));
tsk_treeseq_free(&ts2);
fclose(f);
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts1);
free(ts1);
}
static void
test_multiple_round_trip(void)
{
int ret;
tsk_size_t j;
tsk_size_t num_examples = 10;
tsk_treeseq_t *ts;
tsk_table_collection_t in_tables[num_examples];
tsk_table_collection_t out_tables;
FILE *f = fopen(_tmp_file_name, "w+");
CU_ASSERT_NOT_EQUAL_FATAL(f, NULL);
for (j = 0; j < num_examples; j++) {
ts = caterpillar_tree(5 + j, 3 + j, 3 + j);
ret = tsk_treeseq_copy_tables(ts, &in_tables[j], 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_dumpf(ts, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(ts);
free(ts);
}
fseek(f, 0, SEEK_SET);
for (j = 0; j < num_examples; j++) {
ret = tsk_table_collection_loadf(&out_tables, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0));
tsk_table_collection_free(&out_tables);
}
/* Can do the same with the same set of previously init'd tables. */
ret = tsk_table_collection_init(&out_tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
fseek(f, 0, SEEK_SET);
for (j = 0; j < num_examples; j++) {
ret = tsk_table_collection_loadf(&out_tables, f, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0));
}
tsk_table_collection_free(&out_tables);
/* Can also read until EOF to do the same thing */
ret = tsk_table_collection_init(&out_tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
fseek(f, 0, SEEK_SET);
j = 0;
while (true) {
ret = tsk_table_collection_loadf(&out_tables, f, TSK_NO_INIT);
if (ret == TSK_ERR_EOF) {
break;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&in_tables[j], &out_tables, 0));
j++;
}
tsk_table_collection_free(&out_tables);
CU_ASSERT_EQUAL_FATAL(j, num_examples);
for (j = 0; j < num_examples; j++) {
tsk_table_collection_free(&in_tables[j]);
}
fclose(f);
}
static void
test_copy_store_drop_columns(void)
{
int ret;
tsk_treeseq_t *ts = caterpillar_tree(5, 3, 3);
tsk_table_collection_t t1, t2;
ret = tsk_treeseq_copy_tables(ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Dropping no columns should have no effect on the data */
copy_store_drop_columns(ts, 0, NULL, _tmp_file_name);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_skip_tables(void)
{
int ret;
tsk_treeseq_t *ts1 = caterpillar_tree(5, 3, 3);
tsk_treeseq_t ts2;
tsk_table_collection_t t1, t2;
FILE *f;
ret = tsk_treeseq_dump(ts1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&t1, _tmp_file_name, TSK_LOAD_SKIP_TABLES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts1->tables, TSK_CMP_IGNORE_TABLES));
CU_ASSERT_EQUAL(t1.individuals.num_rows, 0);
CU_ASSERT_EQUAL(t1.nodes.num_rows, 0);
CU_ASSERT_EQUAL(t1.edges.num_rows, 0);
CU_ASSERT_EQUAL(t1.migrations.num_rows, 0);
CU_ASSERT_EQUAL(t1.sites.num_rows, 0);
CU_ASSERT_EQUAL(t1.mutations.num_rows, 0);
CU_ASSERT_EQUAL(t1.provenances.num_rows, 0);
/* Test _loadf code path as well */
f = fopen(_tmp_file_name, "r+");
ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_TABLES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
fclose(f);
tsk_table_collection_free(&t2);
/* Without TSK_LOAD_SKIP_TABLES we reach end of file */
f = fopen(_tmp_file_name, "r+");
ret = tsk_table_collection_loadf(&t2, f, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(fgetc(f), EOF);
fclose(f);
tsk_table_collection_free(&t2);
/* Setting TSK_LOAD_SKIP_TABLES only reads part of the file */
f = fopen(_tmp_file_name, "r+");
ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_TABLES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NOT_EQUAL(fgetc(f), EOF);
fclose(f);
tsk_table_collection_free(&t2);
/* We should be able to make a tree sequence */
ret = tsk_treeseq_init(&ts2, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts2);
/* Do the same thing with treeseq API */
ret = tsk_treeseq_load(&ts2, _tmp_file_name, TSK_LOAD_SKIP_TABLES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));
tsk_treeseq_free(&ts2);
f = fopen(_tmp_file_name, "r+");
ret = tsk_treeseq_loadf(&ts2, f, TSK_LOAD_SKIP_TABLES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));
fclose(f);
tsk_treeseq_free(&ts2);
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts1);
free(ts1);
}
static void
test_skip_reference_sequence(void)
{
int ret;
tsk_treeseq_t *ts1 = caterpillar_tree(5, 3, 3);
tsk_treeseq_t ts2;
tsk_table_collection_t t1, t2;
FILE *f;
CU_ASSERT_TRUE(tsk_treeseq_has_reference_sequence(ts1));
ret = tsk_treeseq_dump(ts1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(
&t1, _tmp_file_name, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, ts1->tables, 0));
CU_ASSERT_TRUE(tsk_table_collection_equals(
&t1, ts1->tables, TSK_CMP_IGNORE_REFERENCE_SEQUENCE));
CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&t1));
/* Test _loadf code path as well */
f = fopen(_tmp_file_name, "r+");
ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
fclose(f);
tsk_table_collection_free(&t2);
/* Setting TSK_LOAD_SKIP_REFERENCE_SEQUENCE only reads part of the file */
f = fopen(_tmp_file_name, "r+");
ret = tsk_table_collection_loadf(&t2, f, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NOT_EQUAL(fgetc(f), EOF);
fclose(f);
tsk_table_collection_free(&t2);
/* We should be able to make a tree sequence */
ret = tsk_treeseq_init(&ts2, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts2);
/* Do the same thing with treeseq API */
ret = tsk_treeseq_load(&ts2, _tmp_file_name, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));
tsk_treeseq_free(&ts2);
f = fopen(_tmp_file_name, "r+");
ret = tsk_treeseq_loadf(&ts2, f, TSK_LOAD_SKIP_REFERENCE_SEQUENCE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, ts2.tables, 0));
fclose(f);
tsk_treeseq_free(&ts2);
tsk_table_collection_free(&t1);
tsk_treeseq_free(ts1);
free(ts1);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_format_data_load_errors", test_format_data_load_errors },
{ "test_missing_indexes", test_missing_indexes },
{ "test_malformed_indexes", test_malformed_indexes },
{ "test_missing_reference_sequence", test_missing_reference_sequence },
{ "test_bad_column_types", test_bad_column_types },
{ "test_missing_required_columns", test_missing_required_columns },
{ "test_missing_optional_column_pairs", test_missing_optional_column_pairs },
{ "test_missing_required_column_pairs", test_missing_required_column_pairs },
{ "test_bad_offset_columns", test_bad_offset_columns },
{ "test_force_offset_64", test_force_offset_64 },
{ "test_metadata_schemas_optional", test_metadata_schemas_optional },
{ "test_load_node_table_errors", test_load_node_table_errors },
{ "test_load_bad_file_formats", test_load_bad_file_formats },
{ "test_load_errors", test_load_errors },
{ "test_load_eof", test_load_eof },
{ "test_dump_errors", test_dump_errors },
{ "test_example_round_trip", test_example_round_trip },
{ "test_multiple_round_trip", test_multiple_round_trip },
{ "test_copy_store_drop_columns", test_copy_store_drop_columns },
{ "test_skip_tables", test_skip_tables },
{ "test_skip_reference_sequence", test_skip_reference_sequence },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_genotypes.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
#include
#include
#include
static void
test_simplest_missing_data(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n";
const char *sites = "0.0 A\n";
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_TRUE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);
CU_ASSERT_EQUAL(var->genotypes[1], TSK_MISSING_DATA);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_simplest_missing_data_user_alleles(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n";
const char *sites = "0.0 A\n";
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
const char *alleles[] = { "A", NULL };
int ret;
tsk_id_t samples[] = { 0 };
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_TRUE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);
CU_ASSERT_EQUAL(var->genotypes[1], TSK_MISSING_DATA);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_TRUE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_simplest_missing_data_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n";
const char *sites = "0.0 A\n";
const char *mutations = "0 0 T -1\n";
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
const char *alleles[] = { "A", "T", NULL };
int ret;
tsk_id_t samples[] = { 0 };
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 1);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_TRUE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], TSK_MISSING_DATA);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, samples, 1, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_simplest_missing_data_mutations_all_samples(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n";
const char *sites = "0.0 A\n";
const char *mutations = "0 0 T -1\n"
"0 1 T -1\n";
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
const char *alleles[] = { "A", "T", NULL };
int ret;
tsk_id_t samples[] = { 0, 1 };
tsk_treeseq_from_text(&ts, 1, nodes, "", NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, samples, 2, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_user_alleles(void)
{
int ret = 0;
const char *sites = "0.0 G\n"
"0.125 A\n"
"0.25 C\n"
"0.5 A\n";
const char *mutations
= "0 0 T -1\n"
"1 1 C -1\n"
"2 0 G -1\n"
"2 1 A -1\n"
"2 2 T -1\n" // A bunch of different sample mutations
"3 4 T -1\n"
"3 0 A 5\n"; // A back mutation from T -> A
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
const char *alleles[] = { "A", "C", "G", "T", NULL };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
sites, mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_EQUAL_FATAL(var->num_alleles, 4);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_EQUAL(var->allele_lengths[2], 1);
CU_ASSERT_EQUAL(var->allele_lengths[3], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "C", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[2], "G", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[3], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 3);
CU_ASSERT_EQUAL(var->genotypes[1], 2);
CU_ASSERT_EQUAL(var->genotypes[2], 2);
CU_ASSERT_EQUAL(var->genotypes[3], 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.125);
CU_ASSERT_EQUAL(var->num_alleles, 4);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_EQUAL(var->allele_lengths[2], 1);
CU_ASSERT_EQUAL(var->allele_lengths[3], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "C", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[2], "G", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[3], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.25);
CU_ASSERT_EQUAL(var->num_alleles, 4);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_EQUAL(var->allele_lengths[2], 1);
CU_ASSERT_EQUAL(var->allele_lengths[3], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "C", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[2], "G", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[3], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 2);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[2], 3);
CU_ASSERT_EQUAL(var->genotypes[3], 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.5);
CU_ASSERT_EQUAL(var->num_alleles, 4);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_EQUAL(var->allele_lengths[2], 1);
CU_ASSERT_EQUAL(var->allele_lengths[3], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "C", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[2], "G", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[3], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 3);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_char_alphabet(void)
{
int ret = 0;
const char *sites = "0.0 A\n"
"0.125 A\n"
"0.25 C\n"
"0.5 A\n";
const char *mutations
= "0 0 T -1\n"
"1 1 TTTAAGGG -1\n"
"2 0 G -1\n"
"2 1 AT -1\n"
"2 2 T -1\n" // A bunch of different sample mutations
"3 4 T -1\n"
"3 0 A 5\n"; // A back mutation from T -> A
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
sites, mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.125);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 8);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "TTTAAGGG", 8);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.25);
CU_ASSERT_EQUAL(var->num_alleles, 4);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_EQUAL(var->allele_lengths[2], 2);
CU_ASSERT_EQUAL(var->allele_lengths[3], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "C", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "G", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[2], "AT", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[3], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 2);
CU_ASSERT_EQUAL(var->genotypes[2], 3);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->site.position, 0.5);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_EQUAL(var->allele_lengths[0], 1);
CU_ASSERT_EQUAL(var->allele_lengths[1], 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "A", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "T", 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_binary_alphabet(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[2], 1);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 1);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 1);
CU_ASSERT_EQUAL(var->genotypes[3], 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 2);
CU_ASSERT_EQUAL(var->site.mutations_length, 4);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_non_samples(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
/* Non sample internal nodes we want to generate genotypes for */
tsk_id_t samples[] = { 4, 5 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 1);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 2);
CU_ASSERT_EQUAL(var->site.mutations_length, 4);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 1);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 2);
CU_ASSERT_EQUAL(var->site.mutations_length, 4);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_isolated_internal_node(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
/* Two sample nodes (0,1), plus an internal non-sample node u=2 with no edges */
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 1 -1 -1\n";
const char *sites = "2.0 A\n"
"9.0 T\n";
tsk_id_t samples[] = { 2 };
tsk_treeseq_from_text(&ts, 10, nodes, "", NULL, sites, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL_FATAL(tsk_treeseq_get_num_sites(&ts), 2);
/* Default options (isolated_as_missing=True): internal node is isolated everywhere
*/
ret = tsk_vargen_init(&vargen, &ts, samples, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_TRUE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_TRUE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], TSK_MISSING_DATA);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
/* Impute missing (isolated_as_missing=False): genotypes should be ancestral (0) */
ret = tsk_vargen_init(&vargen, &ts, samples, 1, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_FALSE(var->has_missing_data);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_id_t samples[] = { 0, 3 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
samples[0] = -1;
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_vargen_free(&vargen);
samples[0] = 7;
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_vargen_free(&vargen);
samples[0] = 3;
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_user_alleles_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
/* The maximium number of alleles is 127. We need space for one more plus the
* sentinel */
const char *acct_alleles[] = { "A", "C", "G", "T", NULL };
const char *zero_allele[] = { "0", NULL };
const char *no_alleles[] = { NULL };
tsk_id_t samples[] = { 0, 3 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
/* these are 0/1 alleles */
ret = tsk_vargen_init(&vargen, &ts, samples, 2, acct_alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);
tsk_vargen_free(&vargen);
/* pass just the 0 allele alleles at all */
ret = tsk_vargen_init(&vargen, &ts, samples, 2, zero_allele, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);
tsk_vargen_free(&vargen);
/* Empty allele list is an error */
ret = tsk_vargen_init(&vargen, &ts, samples, 2, no_alleles, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ZERO_ALLELES);
tsk_vargen_free(&vargen);
// for (j = 0; j < max_alleles; j++) {
// many_alleles[j] = "0";
// }
// many_alleles[128] = NULL;
// ret = tsk_vargen_init(&vargen, &ts, samples, 2, many_alleles, 0);
// CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TOO_MANY_ALLELES);
// tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_subsample(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
tsk_id_t samples[] = { 0, 3 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, samples, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 1);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 2);
CU_ASSERT_EQUAL(var->site.mutations_length, 4);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Zero samples */
ret = tsk_vargen_init(&vargen, &ts, samples, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 1);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 2);
CU_ASSERT_EQUAL(var->site.mutations_length, 4);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_many_alleles(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
tsk_size_t num_alleles = 257;
tsk_id_t j, k;
char alleles[num_alleles];
tsk_table_collection_t tables;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_FATAL(ret == 0);
tsk_treeseq_free(&ts);
tsk_memset(alleles, 'X', (size_t) num_alleles);
ret_id = tsk_site_table_add_row(&tables.sites, 0, "Y", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
/* Add j mutations over a single node. */
for (j = 0; j < (tsk_id_t) num_alleles; j++) {
/* When j = 0 we get a parent of -1, which is the NULL_NODE */
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, j - 1,
TSK_UNKNOWN_TIME, alleles, (tsk_size_t) j, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
/* We have j + 2 alleles. So, if j >= 126, we should fail with 8bit
* genotypes */
// if (j >= 126) {
// CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TOO_MANY_ALLELES);
// } else {
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "Y", 1);
for (k = 1; k < (tsk_id_t) var->num_alleles; k++) {
CU_ASSERT_EQUAL(k - 1, (tsk_id_t) var->allele_lengths[k]);
CU_ASSERT_NSTRING_EQUAL(var->alleles[k], alleles, var->allele_lengths[k]);
}
CU_ASSERT_EQUAL(var->num_alleles, (tsk_size_t) j + 2);
// }
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
tsk_table_collection_free(&tables);
}
static void
test_single_tree_silent_mutations(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
/* Add some silent mutations */
const char *silent_ex_sites = "0.125 0\n"
"0.25 0\n"
"0.5 0\n"
"0.75 0\n";
/* site, node, derived_state, [parent, time] */
const char *silent_ex_mutations
= "0 5 0 -1\n" /* Silent mutation over mutation 1 */
"0 2 1 0\n"
"1 4 1 -1\n"
"1 0 0 2\n" /* Back mutation over 0 */
"1 0 0 3\n" /* Silent mutation under back mutation */
"2 0 1 -1\n" /* recurrent mutations over samples */
"2 1 1 -1\n"
"2 2 1 -1\n"
"2 3 1 -1\n"
"3 0 0 -1\n" /* Single silent mutation at a site */
;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
silent_ex_sites, silent_ex_mutations, NULL, NULL, 0);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[2], 1);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 1);
CU_ASSERT_EQUAL(var->site.mutations_length, 3);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 1);
CU_ASSERT_EQUAL(var->genotypes[3], 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 2);
CU_ASSERT_EQUAL(var->site.mutations_length, 4);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->genotypes[3], 0);
CU_ASSERT_EQUAL(var->num_alleles, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->site.id, 3);
CU_ASSERT_EQUAL(var->site.mutations_length, 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_free(&vargen);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_multiple_variant_decode(void)
{
int ret = 0;
tsk_size_t k;
tsk_id_t s;
tsk_treeseq_t ts;
tsk_variant_t var;
tsk_variant_t var_subset;
tsk_id_t samples[] = { 0, 1, 3 };
int32_t genos[12];
int32_t genos_expected[] = { 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1 };
int32_t genos_subset[9];
int32_t genos_expected_subset[] = { 0, 0, 0, 1, 0, 0, 0, 1, 1 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
/* Sample subset, no sample lists */
ret = tsk_variant_init(&var_subset, &ts, samples, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (s = 0; (tsk_size_t) s < tsk_treeseq_get_num_sites(&ts); s++) {
ret = tsk_variant_decode(&var_subset, s, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < 3; ++k) {
genos_subset[k + ((tsk_size_t) s * 3)] = var_subset.genotypes[k];
}
}
CU_ASSERT_EQUAL(
0, memcmp(genos_subset, genos_expected_subset, sizeof(genos_expected_subset)));
memset(genos_subset, 0, sizeof(genos_subset));
/* All samples with TSK_SAMPLE_LISTS, at the same time as a subset */
s = 0;
ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (s = 0; (tsk_size_t) s < tsk_treeseq_get_num_sites(&ts); s++) {
ret = tsk_variant_decode(&var, s, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < 4; ++k) {
genos[k + ((tsk_size_t) s * 4)] = var.genotypes[k];
}
ret = tsk_variant_decode(&var_subset, s, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < 3; ++k) {
genos_subset[k + ((tsk_size_t) s * 3)] = var_subset.genotypes[k];
}
}
CU_ASSERT_EQUAL(
0, memcmp(genos_subset, genos_expected_subset, sizeof(genos_expected_subset)));
CU_ASSERT_EQUAL(0, memcmp(genos, genos_expected, sizeof(genos_expected)));
tsk_variant_free(&var);
tsk_variant_free(&var_subset);
tsk_treeseq_free(&ts);
}
static void
test_variant_decode_errors(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_variant_t var;
tsk_id_t bad_samples[] = { 0, 1, 32 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
/* Bad samples */
ret = tsk_variant_init(&var, &ts, bad_samples, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_variant_free(&var);
/* Site out of bounds */
ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(&var, 42, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tsk_variant_free(&var);
tsk_treeseq_free(&ts);
}
/* Checks that the data represented by the specified pair of variants exposed
* by the public API is equal. */
static void
assert_variants_equal(const tsk_variant_t *v1, const tsk_variant_t *v2)
{
tsk_size_t j;
CU_ASSERT_EQUAL(v1->num_samples, v2->num_samples);
CU_ASSERT_EQUAL(v1->num_alleles, v2->num_alleles);
for (j = 0; j < v1->num_alleles; j++) {
CU_ASSERT_EQUAL(v1->allele_lengths[j], v2->allele_lengths[j]);
CU_ASSERT_EQUAL(
0, memcmp(v1->alleles[j], v2->alleles[j], (size_t) v1->allele_lengths[j]));
}
CU_ASSERT_EQUAL(v1->has_missing_data, v2->has_missing_data);
CU_ASSERT_EQUAL(v1->num_samples, v2->num_samples);
for (j = 0; j < v1->num_samples; j++) {
CU_ASSERT_EQUAL(v1->samples[j], v2->samples[j]);
CU_ASSERT_EQUAL(v1->genotypes[j], v2->genotypes[j]);
}
CU_ASSERT_EQUAL(v1->site.id, v2->site.id);
CU_ASSERT_EQUAL(v1->site.position, v2->site.position);
CU_ASSERT_EQUAL(v1->site.ancestral_state_length, v2->site.ancestral_state_length);
CU_ASSERT_EQUAL(0, memcmp(v1->site.ancestral_state, v2->site.ancestral_state,
(size_t) v1->site.ancestral_state_length));
CU_ASSERT_EQUAL(v1->site.mutations_length, v2->site.mutations_length);
/* We're pointing back to the same memory for embedded pointers */
CU_ASSERT_EQUAL(v1->site.mutations, v2->site.mutations);
CU_ASSERT_EQUAL(v1->site.metadata, v2->site.metadata);
}
static void
test_variant_copy(void)
{
int ret = 0;
tsk_size_t j;
tsk_treeseq_t ts;
tsk_variant_t var, var_copy;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {
ret = tsk_variant_decode(&var, (tsk_id_t) j, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(&var, &var_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(&var_copy, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);
assert_variants_equal(&var, &var_copy);
CU_ASSERT_EQUAL(
0, memcmp(var.tree_sequence, var.tree_sequence, sizeof(*var.tree_sequence)));
CU_ASSERT_EQUAL(0, memcmp(&var.tree, &var_copy.tree, sizeof(tsk_tree_t)));
CU_ASSERT_EQUAL(0, memcmp(&var.site, &var_copy.site, sizeof(tsk_site_t)));
CU_ASSERT_EQUAL(var_copy.traversal_stack, NULL);
CU_ASSERT_EQUAL(var_copy.sample_index_map, NULL);
CU_ASSERT_EQUAL(var_copy.alt_samples, NULL);
CU_ASSERT_EQUAL(var_copy.alt_sample_index_map, NULL);
tsk_variant_free(&var_copy);
}
tsk_variant_free(&var);
tsk_treeseq_free(&ts);
}
static void
test_variant_copy_long_alleles(void)
{
int ret = 0;
const char *sites = "0.0 GGGG\n"
"0.125 AAAAA\n"
"0.25 CCCCCC\n"
"0.5 AAAAAAA\n";
const char *mutations = "0 0 TTT -1\n"
"1 1 CCCCCCC -1\n"
"2 0 GGGGGGG -1\n"
"2 1 AG -1\n"
"2 2 TTTTTTT -1\n"
"3 4 TGGGGGG -1\n"
"3 0 AAA 5\n";
tsk_treeseq_t ts;
tsk_variant_t var, copy, copy_of_copy;
tsk_size_t j;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_init(&var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {
ret = tsk_variant_decode(&var, (tsk_id_t) j, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(&var, ©);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(&var, ©);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(©, ©_of_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(&var, ©_of_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_variant_free(©_of_copy);
tsk_variant_free(©);
}
tsk_variant_free(&var);
tsk_treeseq_free(&ts);
}
static void
test_variant_copy_memory_management(void)
{
int ret = 0;
tsk_size_t j;
tsk_treeseq_t ts;
tsk_variant_t *var;
tsk_variant_t copy, copy_of_copy;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
for (j = 0; j < tsk_treeseq_get_num_sites(&ts); j++) {
var = tsk_malloc(sizeof(*var));
CU_ASSERT_FATAL(var != NULL);
ret = tsk_variant_init(var, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_decode(var, (tsk_id_t) j, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_variant_restricted_copy(var, ©);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(var, ©);
/* Free var to make sure we're not pointing to any of the original memory. */
tsk_variant_free(var);
free(var);
ret = tsk_variant_restricted_copy(©, ©_of_copy);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_variants_equal(©, ©_of_copy);
ret = tsk_variant_decode(©, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);
ret = tsk_variant_decode(©_of_copy, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_VARIANT_CANT_DECODE_COPY);
tsk_variant_free(©);
tsk_variant_free(©_of_copy);
}
tsk_treeseq_free(&ts);
}
static void
build_balanced_three_example_align(tsk_treeseq_t *ts)
{
const char *nodes = "1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"0 1 0 -1\n"
"0 2 0 -1\n";
const char *edges = "0 10 3 1,2\n"
"0 10 4 0,3\n";
const char *sites = "2 A\n"
"9 T\n";
const char *mutations = "0 0 G\n"
"1 3 C\n";
tsk_treeseq_from_text(ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
}
static void
test_alignments_basic_default(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
const tsk_id_t *samples;
tsk_size_t n, L;
char *buf;
build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
L = 10;
buf = tsk_malloc(n * L);
CU_ASSERT_PTR_NOT_NULL_FATAL(buf);
ret = tsk_treeseq_decode_alignments(
&ts, ref, (tsk_size_t) strlen(ref), samples, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, "NNGNNNNNNT", L);
CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, "NNANNNNNNC", L);
CU_ASSERT_NSTRING_EQUAL(buf + 2 * L, "NNANNNNNNC", L);
tsk_safe_free(buf);
tsk_treeseq_free(&ts);
}
static void
test_alignments_reference_sequence(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "0123456789";
const tsk_id_t *samples;
tsk_size_t n, L = 10;
char *buf = NULL;
build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
buf = tsk_malloc(n * L);
CU_ASSERT_PTR_NOT_NULL_FATAL(buf);
ret = tsk_treeseq_decode_alignments(
&ts, ref, (tsk_size_t) strlen(ref), samples, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, "01G345678T", L);
CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, "01A345678C", L);
CU_ASSERT_NSTRING_EQUAL(buf + 2 * L, "01A345678C", L);
tsk_safe_free(buf);
tsk_treeseq_free(&ts);
}
static void
test_alignments_partial_isolation(void)
{
int ret = 0;
const char *nodes = "0 1 0 -1\n" /* parent */
"1 0 0 -1\n"; /* child sample */
const char *edges = "3 7 0 1\n";
const char *sites = "5 A\n";
const char *mutations = "0 1 G\n";
tsk_treeseq_t ts;
const char *ref = "0123456789";
tsk_id_t node = 1;
char buf[10];
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf, "NNN34G6NNN", 10);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 2, 8, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf, "N34G6N", 6);
tsk_treeseq_free(&ts);
}
static void
test_alignments_return_code_truncated_interval(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n"
"1 0 0 -1\n"
"0 1 0 -1\n";
/* Tree over [0,5): samples 0 and 1 under root 2.
* Tree over [5,10): only sample 1 under root 2 (sample 0 isolated). */
const char *edges = "0 5 2 0\n"
"0 10 2 1\n";
tsk_treeseq_t ts;
const tsk_id_t *samples;
tsk_size_t n;
char buf[10];
const char *ref = "NNNNNNNNNN";
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf + 0 * 5, "NNNNN", 5);
CU_ASSERT_NSTRING_EQUAL(buf + 1 * 5, "NNNNN", 5);
tsk_treeseq_free(&ts);
}
static void
test_alignments_invalid_allele_length(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n";
const char *edges = "";
const char *sites = "2 AC\n";
tsk_treeseq_t ts;
tsk_id_t node = 0;
char buf[5];
const char *ref = "NNNNN";
tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, sites, NULL, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ALLELE_LENGTH);
tsk_treeseq_free(&ts);
}
static void
test_alignments_bad_reference_length(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n";
const char *edges = "";
tsk_treeseq_t ts;
tsk_id_t node = 0;
char buf[5];
const char *ref = "NNNNN";
tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 4, &node, 1, 0, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}
static void
test_alignments_non_integer_bounds(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n";
const char *edges = "";
tsk_treeseq_t ts;
tsk_id_t node = 0;
char buf[5];
const char *ref = "NNNNN";
tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0.5, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}
static void
test_alignments_discrete_genome_required(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n";
const char *edges = "";
const char *sites = "0.5 A\n";
tsk_treeseq_t ts;
tsk_id_t node = 0;
char buf[5];
const char *ref = "NNNNN";
tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, sites, NULL, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}
static void
test_alignments_null_reference(void)
{
int ret = 0;
tsk_treeseq_t ts;
const tsk_id_t *samples;
tsk_size_t n;
char buf[10];
build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
ret = tsk_treeseq_decode_alignments(&ts, NULL, 10, samples, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}
static void
test_alignments_null_nodes_or_buf(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
const tsk_id_t *samples;
tsk_size_t n;
char buf[30];
build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, NULL, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 10, 'N', NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}
static void
test_alignments_node_out_of_bounds(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
tsk_id_t bad_node;
char buf[10];
build_balanced_three_example_align(&ts);
bad_node = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &bad_node, 1, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
}
static void
test_alignments_isolated_as_not_missing(void)
{
int ret = 0;
const char *nodes = "0 1 0 -1\n" /* parent */
"1 0 0 -1\n"; /* child sample */
const char *edges = "3 7 0 1\n";
const char *sites = "5 A\n";
const char *mutations = "0 1 G\n";
tsk_treeseq_t ts;
const char *ref = "0123456789";
tsk_id_t node = 1;
char buf[10];
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(
&ts, ref, 10, &node, 1, 0, 10, 'N', buf, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf, "01234G6789", 10);
ret = tsk_treeseq_decode_alignments(
&ts, ref, 10, &node, 1, 2, 8, 'N', buf, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf, "234G67", 6);
tsk_treeseq_free(&ts);
}
static void
test_alignments_internal_node_non_sample(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
tsk_id_t node = 3; /* internal node */
char buf[10];
build_balanced_three_example_align(&ts);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf, "NNANNNNNNC", 10);
tsk_treeseq_free(&ts);
}
static void
test_alignments_missing_char_collision(void)
{
int ret = 0;
const char *nodes = "1 0 0 -1\n";
const char *edges = "";
const char *sites = "2 A\n";
const char *mutations = "0 0 Q\n"; /* allele equals missing char */
tsk_treeseq_t ts;
tsk_id_t node = 0;
char buf[5];
const char *ref = "NNNNN";
tsk_treeseq_from_text(&ts, 5, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 5, &node, 1, 0, 5, 'Q', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MISSING_CHAR_COLLISION);
tsk_treeseq_free(&ts);
}
static void
test_alignments_zero_nodes_ok(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
build_balanced_three_example_align(&ts);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, NULL, 0, 0, 10, 'N', NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_alignments_bad_bounds_cases(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
tsk_id_t node = 0;
char buf[1];
build_balanced_three_example_align(&ts);
/* left == right invalid */
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 5, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* left negative */
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, -1, 5, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_treeseq_free(&ts);
}
static void
test_alignments_order_preserved(void)
{
int ret = 0;
tsk_treeseq_t ts;
const char *ref = "NNNNNNNNNN";
tsk_id_t nodes_arr[3];
char buf[30];
tsk_size_t L = 10;
build_balanced_three_example_align(&ts);
nodes_arr[0] = 2;
nodes_arr[1] = 0;
nodes_arr[2] = 1;
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, nodes_arr, 3, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, "NNANNNNNNC", L);
CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, "NNGNNNNNNT", L);
CU_ASSERT_NSTRING_EQUAL(buf + 2 * L, "NNANNNNNNC", L);
tsk_treeseq_free(&ts);
}
static void
test_alignments_missing_char_custom(void)
{
int ret = 0;
const char *nodes = "0 1 0 -1\n" /* parent */
"1 0 0 -1\n"; /* child sample */
const char *edges = "3 7 0 1\n";
const char *sites = "5 A\n";
const char *mutations = "0 1 G\n";
tsk_treeseq_t ts;
const char *ref = "0123456789";
tsk_id_t node = 1;
char buf[10];
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, &node, 1, 0, 10, 'Q', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_NSTRING_EQUAL(buf, "QQQ34G6QQQ", 10);
tsk_treeseq_free(&ts);
}
static void
test_alignments_embedded_null_reference(void)
{
int ret = 0;
tsk_treeseq_t ts;
char ref[10] = { '0', '1', '2', '3', '\0', '5', '6', '7', '8', '9' };
const tsk_id_t *samples;
tsk_size_t n, L = 10;
char *buf = NULL;
char exp0[10] = { '0', '1', 'G', '3', '\0', '5', '6', '7', '8', 'T' };
char exp1[10] = { '0', '1', 'A', '3', '\0', '5', '6', '7', '8', 'C' };
char exp2[10] = { '0', '1', 'A', '3', '\0', '5', '6', '7', '8', 'C' };
build_balanced_three_example_align(&ts);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
buf = tsk_malloc(n * L);
CU_ASSERT_PTR_NOT_NULL_FATAL(buf);
ret = tsk_treeseq_decode_alignments(&ts, ref, 10, samples, n, 0, 10, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(0, memcmp(buf + 0 * L, exp0, (size_t) L));
CU_ASSERT_EQUAL(0, memcmp(buf + 1 * L, exp1, (size_t) L));
CU_ASSERT_EQUAL(0, memcmp(buf + 2 * L, exp2, (size_t) L));
tsk_safe_free(buf);
tsk_treeseq_free(&ts);
}
static void
test_alignments_growing_allele_buffer(void)
{
/* Verify we handle sites with increasing allele counts without per-site realloc
* churn. */
int ret = 0;
/* Two samples (0,1) with root 2 over [0,3). */
const char *nodes = "1 0 0 -1\n"
"1 0 0 -1\n"
"0 1 0 -1\n";
const char *edges = "0 3 2 0\n"
"0 3 2 1\n";
/* Sites: pos 1 ancestral A; pos 2 ancestral A. */
const char *sites = "1 A\n"
"2 A\n";
/* Mutations: at site 0 (pos 1) node 0 -> G (2 alleles total).
* at site 1 (pos 2) node 0 -> C and node 1 -> T (3 alleles total). */
const char *mutations = "0 0 G\n"
"1 0 C\n"
"1 1 T\n";
tsk_treeseq_t ts;
const char *ref = "NNN";
const tsk_id_t *samples;
tsk_size_t n, L = 3;
char *buf = NULL;
tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
samples = tsk_treeseq_get_samples(&ts);
n = tsk_treeseq_get_num_samples(&ts);
buf = tsk_malloc(n * L);
CU_ASSERT_PTR_NOT_NULL_FATAL(buf);
ret = tsk_treeseq_decode_alignments(&ts, ref, 3, samples, n, 0, 3, 'N', buf, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Expected: sample 0 -> NGC; sample 1 -> NAT */
CU_ASSERT_NSTRING_EQUAL(buf + 0 * L, "NGC", L);
CU_ASSERT_NSTRING_EQUAL(buf + 1 * L, "NAT", L);
tsk_safe_free(buf);
tsk_treeseq_free(&ts);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_simplest_missing_data", test_simplest_missing_data },
{ "test_simplest_missing_data_user_alleles",
test_simplest_missing_data_user_alleles },
{ "test_simplest_missing_data_mutations", test_simplest_missing_data_mutations },
{ "test_simplest_missing_data_mutations_all_samples",
test_simplest_missing_data_mutations_all_samples },
{ "test_single_tree_user_alleles", test_single_tree_user_alleles },
{ "test_single_tree_char_alphabet", test_single_tree_char_alphabet },
{ "test_single_tree_binary_alphabet", test_single_tree_binary_alphabet },
{ "test_single_tree_non_samples", test_single_tree_non_samples },
{ "test_isolated_internal_node", test_isolated_internal_node },
{ "test_single_tree_errors", test_single_tree_errors },
{ "test_single_tree_user_alleles_errors", test_single_tree_user_alleles_errors },
{ "test_single_tree_subsample", test_single_tree_subsample },
{ "test_single_tree_many_alleles", test_single_tree_many_alleles },
{ "test_single_tree_silent_mutations", test_single_tree_silent_mutations },
{ "test_multiple_variant_decode", test_multiple_variant_decode },
{ "test_variant_decode_errors", test_variant_decode_errors },
{ "test_variant_copy", test_variant_copy },
{ "test_variant_copy_long_alleles", test_variant_copy_long_alleles },
{ "test_variant_copy_memory_management", test_variant_copy_memory_management },
{ "test_alignments_basic_default", test_alignments_basic_default },
{ "test_alignments_reference_sequence", test_alignments_reference_sequence },
{ "test_alignments_partial_isolation", test_alignments_partial_isolation },
{ "test_alignments_return_code_truncated_interval",
test_alignments_return_code_truncated_interval },
{ "test_alignments_isolated_as_not_missing",
test_alignments_isolated_as_not_missing },
{ "test_alignments_internal_node_non_sample",
test_alignments_internal_node_non_sample },
{ "test_alignments_invalid_allele_length",
test_alignments_invalid_allele_length },
{ "test_alignments_bad_reference_length", test_alignments_bad_reference_length },
{ "test_alignments_non_integer_bounds", test_alignments_non_integer_bounds },
{ "test_alignments_discrete_genome_required",
test_alignments_discrete_genome_required },
{ "test_alignments_null_reference", test_alignments_null_reference },
{ "test_alignments_null_nodes_or_buf", test_alignments_null_nodes_or_buf },
{ "test_alignments_node_out_of_bounds", test_alignments_node_out_of_bounds },
{ "test_alignments_missing_char_collision",
test_alignments_missing_char_collision },
{ "test_alignments_zero_nodes_ok", test_alignments_zero_nodes_ok },
{ "test_alignments_bad_bounds_cases", test_alignments_bad_bounds_cases },
{ "test_alignments_order_preserved", test_alignments_order_preserved },
{ "test_alignments_missing_char_custom", test_alignments_missing_char_custom },
{ "test_alignments_embedded_null_reference",
test_alignments_embedded_null_reference },
{ "test_alignments_growing_allele_buffer",
test_alignments_growing_allele_buffer },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_haplotype_matching.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2023 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
#include
#include
static void
test_single_tree_missing_alleles(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t forward;
tsk_viterbi_matrix_t viterbi;
double rho[] = { 0, 0.25, 0.25 };
double mu[] = { 0.125, 0.125, 0.125 };
int32_t h[] = { 0, 0, 0, 0 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, TSK_ALLELES_ACGT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ALLELE_NOT_FOUND);
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&forward);
tsk_viterbi_matrix_free(&viterbi);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_exact_match(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t forward;
tsk_viterbi_matrix_t viterbi;
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0, 0, 0 };
int32_t h[] = { 1, 1, 1 };
tsk_id_t path[3];
double decoded_compressed_matrix[12];
unsigned int precision;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_compressed_matrix_print_state(&forward, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_compressed_matrix_decode(&forward, decoded_compressed_matrix);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(path[0], 2);
CU_ASSERT_EQUAL(path[1], 1);
CU_ASSERT_EQUAL(path[2], 1);
/* Should get the same answer at lower precision */
for (precision = 1; precision < 24; precision++) {
ret = tsk_ls_hmm_set_precision(&ls_hmm, precision);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(path[0], 2);
CU_ASSERT_EQUAL(path[1], 1);
CU_ASSERT_EQUAL(path[2], 1);
}
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&forward);
tsk_viterbi_matrix_free(&viterbi);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_missing_haplotype_data(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t forward;
tsk_viterbi_matrix_t viterbi;
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0, 0, 0 };
int32_t h[] = { 1, TSK_MISSING_DATA, 1 };
tsk_id_t path[3];
double decoded_compressed_matrix[12];
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_compressed_matrix_print_state(&forward, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_compressed_matrix_decode(&forward, decoded_compressed_matrix);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(path[0], 2);
CU_ASSERT_EQUAL(path[1], 2);
CU_ASSERT_EQUAL(path[2], 2);
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&forward);
tsk_viterbi_matrix_free(&viterbi);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_match_impossible(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t forward;
tsk_compressed_matrix_t backward;
tsk_viterbi_matrix_t viterbi;
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0, 0, 0 };
/* This haplotype can't happen with a mutation rate of 0 */
int32_t h[] = { 0, 0, 0 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MATCH_IMPOSSIBLE);
tsk_compressed_matrix_print_state(&forward, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MATCH_IMPOSSIBLE);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_ls_hmm_backward(&ls_hmm, h, forward.normalisation_factor, &backward, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MATCH_IMPOSSIBLE);
tsk_compressed_matrix_print_state(&backward, _devnull);
/* tsk_compressed_matrix_print_state(&forward, stdout); */
/* tsk_compressed_matrix_print_state(&backward, stdout); */
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&forward);
tsk_compressed_matrix_free(&backward);
tsk_viterbi_matrix_free(&viterbi);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_errors(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t forward;
tsk_viterbi_matrix_t viterbi;
tsk_value_transition_t T[1];
double decoded[3][4];
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0, 0, 0 };
int32_t h[] = { 0, 0, 0 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_viterbi_matrix_init(&viterbi, &ts, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_compressed_matrix_init(&forward, &ts, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
viterbi.matrix.tree_sequence = NULL;
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
viterbi.matrix.tree_sequence = &ts;
forward.tree_sequence = NULL;
ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
forward.tree_sequence = &ts;
ret = tsk_compressed_matrix_store_site(&forward, 3, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
ret = tsk_compressed_matrix_store_site(&forward, 4, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
/* FIXME disabling this tests for now because we filter out negative
* nodes when storing now, to accomodate some oddness in the initial
* conditions of the backward matrix. */
/* T[0].tree_node = -1; */
/* T[0].value = 0; */
/* ret = tsk_compressed_matrix_store_site(&forward, 0, 1, 1, T); */
/* CU_ASSERT_EQUAL_FATAL(ret, 0); */
/* ret = tsk_compressed_matrix_decode(&forward, (double *) decoded); */
/* CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS); */
T[0].tree_node = 7;
T[0].value = 0;
ret = tsk_compressed_matrix_store_site(&forward, 0, 1, 1, T);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_compressed_matrix_decode(&forward, (double *) decoded);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&forward);
tsk_viterbi_matrix_free(&viterbi);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_compressed_matrix(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_compressed_matrix_t matrix;
tsk_ls_hmm_t ls_hmm;
tsk_size_t max_transitions = 1024;
tsk_value_transition_t T[max_transitions];
double decoded[3][4];
int j;
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0.1, 0.1, 0.1 };
int32_t h[] = { 0, 0, 0 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_compressed_matrix_init(&matrix, &ts, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_compressed_matrix_print_state(&matrix, _devnull);
T[0].tree_node = 6;
T[0].value = 0;
for (j = 0; j < 3; j++) {
T[1].tree_node = j;
T[1].value = 1;
ret = tsk_compressed_matrix_store_site(&matrix, j, 1.0, 2, T);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
tsk_compressed_matrix_print_state(&matrix, _devnull);
ret = tsk_compressed_matrix_decode(&matrix, (double *) decoded);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(decoded[0][0], 1.0);
CU_ASSERT_EQUAL(decoded[0][1], 0.0);
CU_ASSERT_EQUAL(decoded[0][2], 0.0);
CU_ASSERT_EQUAL(decoded[1][0], 0.0);
CU_ASSERT_EQUAL(decoded[1][1], 1.0);
CU_ASSERT_EQUAL(decoded[1][2], 0.0);
CU_ASSERT_EQUAL(decoded[2][0], 0.0);
CU_ASSERT_EQUAL(decoded[2][1], 0.0);
CU_ASSERT_EQUAL(decoded[2][2], 1.0);
/* Cleared matrix should be zero everywhere */
tsk_compressed_matrix_clear(&matrix);
ret = tsk_compressed_matrix_decode(&matrix, (double *) decoded);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < 3; j++) {
CU_ASSERT_EQUAL(decoded[j][0], 0.0);
CU_ASSERT_EQUAL(decoded[j][1], 0.0);
CU_ASSERT_EQUAL(decoded[j][2], 0.0);
}
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &matrix, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_compressed_matrix_print_state(&matrix, _devnull);
ret = tsk_compressed_matrix_decode(&matrix, (double *) decoded);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_compressed_matrix_free(&matrix);
tsk_ls_hmm_free(&ls_hmm);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_viterbi_matrix(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_viterbi_matrix_t viterbi;
tsk_ls_hmm_t ls_hmm;
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0, 0, 0 };
int32_t h[] = { 1, 1, 1 };
tsk_id_t path[3];
tsk_value_transition_t T[2];
int j;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_viterbi_matrix_init(&viterbi, &ts, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_VITERBI_MATRIX);
T[0].tree_node = 6;
T[0].value = 0;
T[1].tree_node = 1;
T[1].value = 1;
for (j = 0; j < 3; j++) {
ret = tsk_compressed_matrix_store_site(&viterbi.matrix, j, 1.0, 2, T);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* We need to have one record per site, so we put in a record
* at the root saying we don't need to recombine */
ret = tsk_viterbi_matrix_add_recombination_required(&viterbi, j, 6, false);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(path[0], 1);
CU_ASSERT_EQUAL_FATAL(path[1], 1);
CU_ASSERT_EQUAL_FATAL(path[2], 1);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_viterbi_matrix_clear(&viterbi);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_VITERBI_MATRIX);
tsk_viterbi_matrix_free(&viterbi);
ret = tsk_viterbi_matrix_init(&viterbi, &ts, 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Make sure we hit the realloc case for recombination records */
for (j = 0; j < 100; j++) {
ret = tsk_viterbi_matrix_add_recombination_required(&viterbi, 0, 6, false);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_viterbi_matrix_free(&viterbi);
tsk_ls_hmm_free(&ls_hmm);
tsk_treeseq_free(&ts);
}
static void
test_multi_tree_exact_match(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t forward, backward;
tsk_viterbi_matrix_t viterbi;
double rho[] = { 0.0, 0.25, 0.25 };
double mu[] = { 0, 0, 0 };
int32_t h[] = { 1, 1, 1 };
tsk_id_t path[3];
double decoded_compressed_matrix[12];
unsigned int precision;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_ls_hmm_init(&ls_hmm, &ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &forward, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
tsk_compressed_matrix_print_state(&forward, _devnull);
ret = tsk_compressed_matrix_decode(&forward, decoded_compressed_matrix);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_backward(&ls_hmm, h, forward.normalisation_factor, &backward, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
tsk_compressed_matrix_print_state(&backward, _devnull);
ret = tsk_compressed_matrix_decode(&backward, decoded_compressed_matrix);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(path[0], 2);
CU_ASSERT_EQUAL(path[1], 0);
CU_ASSERT_EQUAL(path[2], 1);
/* Should get the same answer at lower precision */
for (precision = 4; precision < 24; precision++) {
ret = tsk_ls_hmm_set_precision(&ls_hmm, precision);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_viterbi(&ls_hmm, h, &viterbi, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_viterbi_matrix_print_state(&viterbi, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
ret = tsk_viterbi_matrix_traceback(&viterbi, path, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(path[0], 2);
CU_ASSERT_EQUAL(path[1], 0);
CU_ASSERT_EQUAL(path[2], 1);
}
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&forward);
tsk_compressed_matrix_free(&backward);
tsk_viterbi_matrix_free(&viterbi);
tsk_treeseq_free(&ts);
}
static void
test_multi_tree_errors(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_compressed_matrix_t forward;
tsk_value_transition_t T[1];
double decoded[3][4];
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_compressed_matrix_init(&forward, &ts, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* We want a tree node that is not in the first tree */
T[0].tree_node = 7;
T[0].value = 0;
ret = tsk_compressed_matrix_store_site(&forward, 0, 1, 1, T);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_compressed_matrix_decode(&forward, (double *) decoded);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_COMPRESSED_MATRIX_NODE);
tsk_compressed_matrix_free(&forward);
tsk_treeseq_free(&ts);
}
static void
test_caterpillar_tree_many_values(void)
{
int ret = 0;
tsk_ls_hmm_t ls_hmm;
tsk_compressed_matrix_t matrix;
double rho[] = { 0.1, 0.1, 0.1, 0.1, 0.1 };
double mu[] = { 0.0, 0.0, 0.0, 0.0, 0.0 };
int32_t h[] = { 0, 0, 0, 0, 0 };
tsk_size_t n[] = {
8,
16,
32,
64,
};
tsk_treeseq_t *ts;
tsk_size_t j;
for (j = 0; j < sizeof(n) / sizeof(*n); j++) {
ts = caterpillar_tree(n[j], 5, n[j] - 2);
ret = tsk_ls_hmm_init(&ls_hmm, ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_compressed_matrix_init(&matrix, ts, 1 << 10, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ls_hmm_forward(&ls_hmm, h, &matrix, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_compressed_matrix_print_state(&matrix, _devnull);
tsk_ls_hmm_print_state(&ls_hmm, _devnull);
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&matrix);
tsk_treeseq_free(ts);
free(ts);
}
j = 40;
ts = caterpillar_tree(j, 5, j - 2);
ret = tsk_ls_hmm_init(&ls_hmm, ts, rho, mu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_compressed_matrix_init(&matrix, ts, 1 << 20, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Short circuit this value so we can run the test */
ls_hmm.max_parsimony_words = 0;
ret = tsk_ls_hmm_forward(&ls_hmm, h, &matrix, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TOO_MANY_VALUES);
tsk_ls_hmm_free(&ls_hmm);
tsk_compressed_matrix_free(&matrix);
tsk_treeseq_free(ts);
free(ts);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_single_tree_missing_alleles", test_single_tree_missing_alleles },
{ "test_single_tree_exact_match", test_single_tree_exact_match },
{ "test_single_tree_missing_haplotype_data",
test_single_tree_missing_haplotype_data },
{ "test_single_tree_match_impossible", test_single_tree_match_impossible },
{ "test_single_tree_errors", test_single_tree_errors },
{ "test_single_tree_compressed_matrix", test_single_tree_compressed_matrix },
{ "test_single_tree_viterbi_matrix", test_single_tree_viterbi_matrix },
{ "test_multi_tree_exact_match", test_multi_tree_exact_match },
{ "test_multi_tree_errors", test_multi_tree_errors },
{ "test_caterpillar_tree_many_values", test_caterpillar_tree_many_values },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_minimal_cpp.cpp
================================================
/* * MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/* Minimal tests to make sure that tskit at least compiles and links
* in a simple C++ program */
#include
#include
#include
#include
#include
#include
#include
using namespace std;
void
test_kas_strerror()
{
std::cout << "test_kas_strerror" << endl;
std::ostringstream o;
o << kas_strerror(KAS_ERR_NO_MEMORY);
assert(std::string("Out of memory").compare(o.str()) == 0);
}
void
test_strerror()
{
std::cout << "test_strerror" << endl;
std::ostringstream o;
o << tsk_strerror(TSK_ERR_NO_MEMORY);
assert(std::string("Out of memory. (TSK_ERR_NO_MEMORY)").compare(o.str()) == 0);
}
void
test_load_error()
{
std::cout << "test_open_error" << endl;
tsk_treeseq_t ts;
int ret = tsk_treeseq_load(&ts, "no such file", 0);
assert(ret == TSK_ERR_IO);
tsk_treeseq_free(&ts);
}
void
test_table_basics()
{
std::cout << "test_table_basics" << endl;
tsk_table_collection_t tables;
int ret = tsk_table_collection_init(&tables, 0);
assert(ret == 0);
ret = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
assert(ret == 0);
ret = tsk_node_table_add_row(&tables.nodes, 0, 2.0, TSK_NULL, TSK_NULL, NULL, 0);
assert(ret == 1);
assert(tables.nodes.num_rows == 2);
tsk_table_collection_free(&tables);
}
/* A definition of sort_edges that uses C++ std::sort and inlining of the
* comparison function to achieve significantly better performance than
* the builtin method in tskit.
*/
int
cpp_sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)
{
struct _edge {
double left, right;
tsk_id_t parent, child;
_edge(double l, double r, tsk_id_t p, tsk_id_t c)
: left{ l }, right{ r }, parent{ p }, child{ c }
{
}
};
tsk_edge_table_t *edges = &sorter->tables->edges;
const double *node_time = sorter->tables->nodes.time;
std::vector<_edge> sorted_edges;
size_t num_edges = edges->num_rows;
size_t j;
/* This is the comparison function. We cannot define an
* operator < for _edge because we need to bind the node times
* so we have to use a functional method. This is a copy of the cmp
* from fwdpp. Only difference is the final time comparison
* (fwdpp table times go forwards). */
const auto cmp = [&node_time](const _edge &lhs, const _edge &rhs) {
auto tl = node_time[lhs.parent];
auto tr = node_time[rhs.parent];
if (tl == tr) {
if (lhs.parent == rhs.parent) {
if (lhs.child == rhs.child) {
return lhs.left < rhs.left;
}
return lhs.child < rhs.child;
}
return lhs.parent < rhs.parent;
}
return tl < tr;
};
assert(start == 0);
/* Let's not bother with metadata */
assert(edges->metadata_length == 0);
sorted_edges.reserve(num_edges);
for (j = 0; j < num_edges; j++) {
sorted_edges.emplace_back(
edges->left[j], edges->right[j], edges->parent[j], edges->child[j]);
}
std::sort(begin(sorted_edges), end(sorted_edges), cmp);
for (j = 0; j < num_edges; j++) {
edges->left[j] = sorted_edges[j].left;
edges->right[j] = sorted_edges[j].right;
edges->parent[j] = sorted_edges[j].parent;
edges->child[j] = sorted_edges[j].child;
}
return 0;
}
void
test_edge_sorting()
{
std::cout << "test_edge_sorting" << endl;
tsk_table_collection_t tables;
tsk_id_t n = 10;
tsk_id_t j;
int ret = tsk_table_collection_init(&tables, 0);
assert(ret == 0);
tables.sequence_length = 1.0;
/* Make a stick tree */
/* Add nodes and edges */
for (j = 0; j < n; j++) {
ret = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, j + 1, TSK_NULL, TSK_NULL, NULL, 0);
assert(ret == j);
}
for (j = n - 1; j > 0; j--) {
tsk_edge_table_add_row(&tables.edges, 0, 1, j, j - 1, NULL, 0);
}
assert(tables.nodes.num_rows == (tsk_size_t) n);
assert(tables.edges.num_rows == (tsk_size_t) n - 1);
/* Make sure the edges are unsorted */
/* Not calling TSK_CHECK_TREES so casting is safe */
ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING);
assert(ret == TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);
/* Sort the tables */
tsk_table_sorter_t sorter;
ret = tsk_table_sorter_init(&sorter, &tables, 0);
assert(ret == 0);
/* Set the sort_edges to our local C++ version. We could also set some
* persistent state in sorter.params if we wanted to. */
sorter.sort_edges = cpp_sort_edges;
ret = tsk_table_sorter_run(&sorter, NULL);
assert(ret == 0);
tsk_table_sorter_free(&sorter);
/* Make sure the edges are now sorted */
ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_EDGE_ORDERING);
assert(ret == 0);
tsk_table_collection_free(&tables);
}
int
sort_edges_raises_exception(tsk_table_sorter_t *sorter, tsk_size_t start)
{
throw std::exception();
return 0;
}
int
sort_edges_raises_non_exception(tsk_table_sorter_t *sorter, tsk_size_t start)
{
throw 42;
return 0;
}
int
safe_sort_edges(tsk_table_sorter_t *sorter, tsk_size_t start)
{
int ret = 0;
if (sorter->user_data == NULL) {
try {
ret = sort_edges_raises_exception(sorter, start);
} catch (...) {
ret = -12345;
}
} else {
try {
ret = sort_edges_raises_non_exception(sorter, start);
} catch (...) {
ret = -12346;
}
}
return ret;
}
void
test_edge_sorting_errors()
{
/* Some inexplicable error happened here on 32 bit Windows where the
* exceptions were not being caught as expected. This seems much
* more likely to be a platform quirk that a real bug in our code,
* so just disabling the test there.
*
* https://github.com/tskit-dev/tskit/issues/1790
* https://github.com/tskit-dev/tskit/pull/1791
*/
#if !defined(_WIN32)
std::cout << "test_edge_sorting_errors" << endl;
tsk_table_collection_t tables;
tsk_table_sorter_t sorter;
tsk_id_t ret = tsk_table_collection_init(&tables, 0);
assert(ret == 0);
tables.sequence_length = 1.0;
ret = tsk_table_sorter_init(&sorter, &tables, 0);
assert(ret == 0);
sorter.sort_edges = safe_sort_edges;
ret = tsk_table_sorter_run(&sorter, NULL);
assert(ret == -12345);
/* Use the user_data as a way to communicate with the sorter
* function. Here, we want to try out two different types
* of exception that get thrown. */
sorter.user_data = &tables;
ret = tsk_table_sorter_run(&sorter, NULL);
assert(ret == -12346);
tsk_table_sorter_free(&sorter);
tsk_table_collection_free(&tables);
#endif
}
int
main()
{
test_kas_strerror();
test_strerror();
test_load_error();
test_table_basics();
test_edge_sorting();
test_edge_sorting_errors();
return 0;
}
================================================
FILE: c/tests/test_stats.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
#include
#include
#include
#include
static bool
multi_mutations_exist(tsk_treeseq_t *ts, tsk_id_t start, tsk_id_t end)
{
int ret;
tsk_id_t j;
tsk_site_t site;
for (j = start; j < TSK_MIN((tsk_id_t) tsk_treeseq_get_num_sites(ts), end); j++) {
ret = tsk_treeseq_get_site(ts, j, &site);
CU_ASSERT_EQUAL_FATAL(ret, 0);
if (site.mutations_length > 1) {
return true;
}
}
return false;
}
static void
verify_ld(tsk_treeseq_t *ts)
{
int ret;
tsk_size_t num_sites = tsk_treeseq_get_num_sites(ts);
tsk_site_t *sites = tsk_malloc(num_sites * sizeof(tsk_site_t));
int *num_site_mutations = tsk_malloc(num_sites * sizeof(int));
tsk_ld_calc_t ld_calc;
double *r2, *r2_prime, x;
tsk_id_t j;
tsk_size_t num_r2_values;
double eps = 1e-6;
r2 = tsk_calloc(num_sites, sizeof(double));
r2_prime = tsk_calloc(num_sites, sizeof(double));
CU_ASSERT_FATAL(r2 != NULL);
CU_ASSERT_FATAL(r2_prime != NULL);
CU_ASSERT_FATAL(sites != NULL);
CU_ASSERT_FATAL(num_site_mutations != NULL);
ret = tsk_ld_calc_init(&ld_calc, ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_ld_calc_print_state(&ld_calc, _devnull);
for (j = 0; j < (tsk_id_t) num_sites; j++) {
ret = tsk_treeseq_get_site(ts, j, sites + j);
CU_ASSERT_EQUAL_FATAL(ret, 0);
num_site_mutations[j] = (int) sites[j].mutations_length;
ret = tsk_ld_calc_get_r2(&ld_calc, j, j, &x);
if (num_site_mutations[j] <= 1) {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(x, 1.0, eps);
} else {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
}
}
if (num_sites > 0) {
/* Some checks in the forward direction */
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 0, TSK_DIR_FORWARD, num_sites, DBL_MAX, r2, &num_r2_values);
if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
} else {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);
}
tsk_ld_calc_print_state(&ld_calc, _devnull);
ret = tsk_ld_calc_get_r2_array(&ld_calc, (tsk_id_t) num_sites - 2,
TSK_DIR_FORWARD, num_sites, DBL_MAX, r2_prime, &num_r2_values);
if (multi_mutations_exist(ts, (tsk_id_t) num_sites - 2, (tsk_id_t) num_sites)) {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
} else {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);
}
tsk_ld_calc_print_state(&ld_calc, _devnull);
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 0, TSK_DIR_FORWARD, num_sites, DBL_MAX, r2_prime, &num_r2_values);
if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
} else {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);
for (j = 0; j < (tsk_id_t) num_r2_values; j++) {
CU_ASSERT_EQUAL_FATAL(r2[j], r2_prime[j]);
ret = tsk_ld_calc_get_r2(&ld_calc, 0, j + 1, &x);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(r2[j], x, eps);
}
}
/* Some checks in the reverse direction */
ret = tsk_ld_calc_get_r2_array(&ld_calc, (tsk_id_t) num_sites - 1,
TSK_DIR_REVERSE, num_sites, DBL_MAX, r2, &num_r2_values);
if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
} else {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);
}
tsk_ld_calc_print_state(&ld_calc, _devnull);
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 1, TSK_DIR_REVERSE, num_sites, DBL_MAX, r2_prime, &num_r2_values);
if (multi_mutations_exist(ts, 0, 2)) {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
} else {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);
}
ret = tsk_ld_calc_get_r2_array(&ld_calc, (tsk_id_t) num_sites - 1,
TSK_DIR_REVERSE, num_sites, DBL_MAX, r2_prime, &num_r2_values);
if (multi_mutations_exist(ts, 0, (tsk_id_t) num_sites)) {
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
} else {
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, num_sites - 1);
tsk_ld_calc_print_state(&ld_calc, _devnull);
for (j = 0; j < (tsk_id_t) num_r2_values; j++) {
CU_ASSERT_EQUAL_FATAL(r2[j], r2_prime[j]);
ret = tsk_ld_calc_get_r2(&ld_calc, (tsk_id_t) num_sites - 1,
(tsk_id_t) num_sites - j - 2, &x);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(r2[j], x, eps);
}
}
/* Check some error conditions */
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 0, 0, num_sites, DBL_MAX, r2, &num_r2_values);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
}
/* Check some error conditions */
for (j = (tsk_id_t) num_sites; j < (tsk_id_t) num_sites + 2; j++) {
ret = tsk_ld_calc_get_r2_array(
&ld_calc, j, TSK_DIR_FORWARD, num_sites, DBL_MAX, r2, &num_r2_values);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
ret = tsk_ld_calc_get_r2(&ld_calc, j, 0, r2);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
ret = tsk_ld_calc_get_r2(&ld_calc, 0, j, r2);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
}
tsk_ld_calc_free(&ld_calc);
free(r2);
free(r2_prime);
free(sites);
free(num_site_mutations);
}
/* FIXME: this test is weak and should check the return value somehow.
* We should also have simplest and single tree tests along with separate
* tests for the error conditions. This should be done as part of the general
* stats framework.
*/
static void
verify_genealogical_nearest_neighbours(tsk_treeseq_t *ts)
{
int ret;
const tsk_id_t *samples;
const tsk_id_t *sample_sets[2];
tsk_size_t sample_set_size[2];
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *A = tsk_malloc(2 * num_samples * sizeof(double));
CU_ASSERT_FATAL(A != NULL);
samples = tsk_treeseq_get_samples(ts);
sample_sets[0] = samples;
sample_set_size[0] = num_samples / 2;
sample_sets[1] = samples + sample_set_size[0];
sample_set_size[1] = num_samples - sample_set_size[0];
ret = tsk_treeseq_genealogical_nearest_neighbours(
ts, samples, num_samples, sample_sets, sample_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sample_sets[0] = samples;
sample_set_size[0] = 1;
sample_sets[1] = samples + 1;
sample_set_size[1] = 1;
ret = tsk_treeseq_genealogical_nearest_neighbours(
ts, samples, num_samples, sample_sets, sample_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, 0);
free(A);
}
/* FIXME: this test is weak and should check the return value somehow.
* We should also have simplest and single tree tests along with separate
* tests for the error conditions. This should be done as part of the general
* stats framework.
*/
static void
verify_mean_descendants(tsk_treeseq_t *ts)
{
int ret;
tsk_id_t *samples;
const tsk_id_t *sample_sets[2];
tsk_size_t sample_set_size[2];
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *C = tsk_malloc(2 * tsk_treeseq_get_num_nodes(ts) * sizeof(double));
CU_ASSERT_FATAL(C != NULL);
samples = tsk_malloc(num_samples * sizeof(*samples));
tsk_memcpy(samples, tsk_treeseq_get_samples(ts), num_samples * sizeof(*samples));
sample_sets[0] = samples;
sample_set_size[0] = num_samples / 2;
sample_sets[1] = samples + sample_set_size[0];
sample_set_size[1] = num_samples - sample_set_size[0];
ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 2, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check some error conditions */
ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 0, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
samples[0] = -1;
ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 2, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(ts) + 1;
ret = tsk_treeseq_mean_descendants(ts, sample_sets, sample_set_size, 2, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
free(samples);
free(C);
}
/* Check the divergence matrix by running against the stats API equivalent
* code.
*/
static void
verify_divergence_matrix(tsk_treeseq_t *ts, tsk_flags_t options)
{
int ret;
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
tsk_size_t sample_set_sizes[n];
tsk_id_t index_tuples[2 * n * n];
double D1[n * n], D2[n * n];
tsk_size_t i, j, k;
for (j = 0; j < n; j++) {
sample_set_sizes[j] = 1;
for (k = 0; k < n; k++) {
index_tuples[2 * (j * n + k)] = (tsk_id_t) j;
index_tuples[2 * (j * n + k) + 1] = (tsk_id_t) k;
}
}
ret = tsk_treeseq_divergence(
ts, n, sample_set_sizes, samples, n * n, index_tuples, 0, NULL, options, D1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_divergence_matrix(
ts, n, sample_set_sizes, samples, 0, NULL, options, D2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < n; j++) {
for (k = 0; k < n; k++) {
i = j * n + k;
/* printf("%d\t%d\t%f\t%f\n", (int) j, (int) k, D1[i], D2[i]); */
if (j == k) {
CU_ASSERT_EQUAL(D2[i], 0);
} else {
CU_ASSERT_DOUBLE_EQUAL(D1[i], D2[i], 1E-6);
}
}
}
}
/* Check coalescence counts */
static void
verify_pair_coalescence_counts(tsk_treeseq_t *ts, tsk_flags_t options)
{
int ret;
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
const tsk_size_t T = tsk_treeseq_get_num_trees(ts);
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
const double *breakpoints = tsk_treeseq_get_breakpoints(ts);
const tsk_size_t P = 2;
const tsk_size_t I = P * (P + 1) / 2;
const tsk_size_t B = 8;
tsk_id_t sample_sets[n];
tsk_size_t sample_set_sizes[P];
tsk_id_t index_tuples[2 * I];
tsk_id_t node_bin_map[N];
tsk_size_t dim = T * N * I;
double C[dim];
double C_B[T * B * I];
double C_Nh[T * (N / 2) * I];
tsk_size_t i, j, k;
for (i = 0; i < n; i++) {
sample_sets[i] = samples[i];
}
for (i = 0; i < P; i++) {
sample_set_sizes[i] = 0;
}
for (j = 0; j < n; j++) {
i = j / ((n + P - 1) / P);
sample_set_sizes[i]++;
}
for (j = 0, i = 0; j < P; j++) {
for (k = j; k < P; k++) {
index_tuples[i++] = (tsk_id_t) j;
index_tuples[i++] = (tsk_id_t) k;
}
}
/* test various bin assignments */
for (i = 0; i < N; i++) {
node_bin_map[i] = ((tsk_id_t) (i % B));
}
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, options, C_B);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (i = 0; i < N; i++) {
node_bin_map[i] = i < N / 2 ? ((tsk_id_t) i) : TSK_NULL;
}
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N / 2, node_bin_map, options, C_Nh);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (i = 0; i < N; i++) {
node_bin_map[i] = (tsk_id_t) i;
}
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* cover errors */
double bad_breakpoints[2] = { breakpoints[1], 0.0 };
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, 1, bad_breakpoints, N, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
index_tuples[0] = (tsk_id_t) P;
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
index_tuples[0] = 0;
tsk_size_t tmp = sample_set_sizes[0];
sample_set_sizes[0] = 0;
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
sample_set_sizes[0] = tmp;
sample_sets[1] = 0;
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
sample_sets[1] = 1;
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N - 1, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, 0, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);
node_bin_map[0] = -2;
ret = tsk_treeseq_pair_coalescence_counts(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, N, node_bin_map, options, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP);
node_bin_map[0] = 0;
}
/* Check coalescence quantiles */
static void
verify_pair_coalescence_quantiles(tsk_treeseq_t *ts)
{
int ret;
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
const tsk_size_t T = tsk_treeseq_get_num_trees(ts);
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
const double *breakpoints = tsk_treeseq_get_breakpoints(ts);
const double *nodes_time = ts->tables->nodes.time;
const double max_time = ts->max_time;
const tsk_size_t P = 2;
const tsk_size_t Q = 5;
const tsk_size_t B = 4;
const tsk_size_t I = P * (P + 1) / 2;
double quantiles[] = { 0.0, 0.25, 0.5, 0.75, 1.0 };
double epochs[] = { 0.0, max_time / 4, max_time / 2, max_time, INFINITY };
tsk_id_t sample_sets[n];
tsk_size_t sample_set_sizes[P];
tsk_id_t index_tuples[2 * I];
tsk_id_t node_bin_map[N];
tsk_id_t node_bin_map_empty[N];
tsk_id_t node_bin_map_shuff[N];
tsk_size_t dim = T * Q * I;
double C[dim];
tsk_size_t i, j, k;
for (i = 0; i < N; i++) {
node_bin_map_empty[i] = TSK_NULL;
node_bin_map_shuff[i] = (tsk_id_t) (i % B);
for (j = 0; j < B; j++) {
if (nodes_time[i] >= epochs[j] && nodes_time[i] < epochs[j + 1]) {
node_bin_map[i] = (tsk_id_t) j;
}
}
}
for (i = 0; i < n; i++) {
sample_sets[i] = samples[i];
}
for (i = 0; i < P; i++) {
sample_set_sizes[i] = 0;
}
for (j = 0; j < n; j++) {
i = j / (n / P);
sample_set_sizes[i]++;
}
for (j = 0, i = 0; j < P; j++) {
for (k = j; k < P; k++) {
index_tuples[i++] = (tsk_id_t) j;
index_tuples[i++] = (tsk_id_t) k;
}
}
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
quantiles[Q - 1] = 0.9;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
quantiles[Q - 1] = 1.0;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map_empty, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* cover errors */
quantiles[0] = -1.0;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_QUANTILES);
quantiles[0] = 0.0;
quantiles[Q - 1] = 2.0;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_QUANTILES);
quantiles[Q - 1] = 1.0;
quantiles[1] = 0.0;
quantiles[0] = 0.25;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_QUANTILES);
quantiles[0] = 0.0;
quantiles[1] = 0.25;
ts->tables->nodes.time[N - 1] = -1.0;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map_shuff, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_TIMES);
ts->tables->nodes.time[N - 1] = max_time;
node_bin_map[0] = (tsk_id_t) B;
ret = tsk_treeseq_pair_coalescence_quantiles(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, Q, quantiles, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);
node_bin_map[0] = 0;
}
/* Check coalescence rates */
static void
verify_pair_coalescence_rates(tsk_treeseq_t *ts)
{
int ret;
const tsk_size_t n = tsk_treeseq_get_num_samples(ts);
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
const tsk_size_t T = tsk_treeseq_get_num_trees(ts);
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
const double *breakpoints = tsk_treeseq_get_breakpoints(ts);
const double *nodes_time = ts->tables->nodes.time;
const double max_time = ts->max_time;
const tsk_size_t P = 2;
const tsk_size_t B = 5;
const tsk_size_t I = P * (P + 1) / 2;
double epochs[]
= { 0.0, max_time / 4, max_time / 2, max_time, max_time * 2, INFINITY };
tsk_id_t sample_sets[n];
tsk_size_t sample_set_sizes[P];
tsk_id_t index_tuples[2 * I];
tsk_id_t node_bin_map[N];
tsk_id_t empty_node_bin_map[N];
tsk_size_t dim = T * B * I;
double C[dim];
tsk_size_t i, j, k;
for (i = 0; i < N; i++) {
node_bin_map[i] = TSK_NULL;
for (j = 0; j < B; j++) {
if (nodes_time[i] >= epochs[j] && nodes_time[i] < epochs[j + 1]) {
node_bin_map[i] = (tsk_id_t) j;
}
}
empty_node_bin_map[i] = TSK_NULL;
}
for (i = 0; i < n; i++) {
sample_sets[i] = samples[i];
}
for (i = 0; i < P; i++) {
sample_set_sizes[i] = 0;
}
for (j = 0; j < n; j++) {
i = j / (n / P);
sample_set_sizes[i]++;
}
for (j = 0, i = 0; j < P; j++) {
for (k = j; k < P; k++) {
index_tuples[i++] = (tsk_id_t) j;
index_tuples[i++] = (tsk_id_t) k;
}
}
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
node_bin_map[0] = TSK_NULL;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
node_bin_map[0] = 0;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, empty_node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* cover errors */
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, 0, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_DIM);
epochs[0] = epochs[1] / 2;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_PAIR_TIMES);
epochs[0] = 0.0;
epochs[2] = epochs[1];
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);
epochs[2] = max_time / 2;
epochs[B] = DBL_MAX;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_END);
epochs[B] = INFINITY;
node_bin_map[0] = (tsk_id_t) B;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_BIN_MAP_DIM);
node_bin_map[0] = 0;
node_bin_map[0] = (tsk_id_t) (B - 1);
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_WINDOW);
node_bin_map[0] = 0;
node_bin_map[N - 1] = 0;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_WINDOW);
node_bin_map[N - 1] = 3;
tsk_size_t tmp = sample_set_sizes[0];
sample_set_sizes[0] = 0;
ret = tsk_treeseq_pair_coalescence_rates(ts, P, sample_set_sizes, sample_sets, I,
index_tuples, T, breakpoints, B, node_bin_map, epochs, 0, C);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
sample_set_sizes[0] = tmp;
}
typedef struct {
int call_count;
int error_on;
int error_code;
} general_stat_error_params_t;
static int
general_stat_error(tsk_size_t TSK_UNUSED(K), const double *TSK_UNUSED(X), tsk_size_t M,
double *Y, void *params)
{
int ret = 0;
CU_ASSERT_FATAL(M == 1);
Y[0] = 0;
general_stat_error_params_t *the_params = (general_stat_error_params_t *) params;
if (the_params->call_count == the_params->error_on) {
ret = the_params->error_code;
}
the_params->call_count++;
return ret;
}
static void
verify_window_errors(tsk_treeseq_t *ts, tsk_flags_t mode)
{
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *W = tsk_calloc(num_samples, sizeof(double));
/* node mode requires this much space at least */
double *sigma = tsk_calloc(tsk_treeseq_get_num_nodes(ts), sizeof(double));
double windows[] = { 0, 0, 0 };
tsk_flags_t options = mode;
/* Window errors */
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 0, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = -1;
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[1] = -1;
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 1, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 10;
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 0;
windows[2] = tsk_treeseq_get_sequence_length(ts) + 1;
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 0;
windows[1] = -1;
windows[2] = tsk_treeseq_get_sequence_length(ts);
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, NULL, 2, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
free(W);
free(sigma);
}
static void
verify_summary_func_errors(tsk_treeseq_t *ts, tsk_flags_t mode)
{
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *W = tsk_calloc(num_samples, sizeof(double));
/* We need this much space for NODE mode */
double *sigma = tsk_calloc(tsk_treeseq_get_num_nodes(ts), sizeof(double));
int j;
general_stat_error_params_t params;
CU_ASSERT_FATAL(W != NULL);
/* Errors in the summary function */
j = 1;
while (true) {
params.call_count = 0;
params.error_on = j;
params.error_code = -j;
ret = tsk_treeseq_general_stat(ts, 1, W, 1, general_stat_error, ¶ms, 0, NULL,
TSK_STAT_POLARISED | mode, sigma);
if (ret == 0) {
break;
}
CU_ASSERT_EQUAL_FATAL(ret, params.error_code);
j++;
}
CU_ASSERT_FATAL(j > 1);
j = 1;
while (true) {
params.call_count = 0;
params.error_on = j;
params.error_code = -j;
ret = tsk_treeseq_general_stat(
ts, 1, W, 1, general_stat_error, ¶ms, 0, NULL, mode, sigma);
if (ret == 0) {
break;
}
CU_ASSERT_EQUAL_FATAL(ret, params.error_code);
j++;
}
CU_ASSERT_FATAL(j > 1);
free(W);
free(sigma);
}
static void
verify_branch_general_stat_errors(tsk_treeseq_t *ts)
{
verify_summary_func_errors(ts, TSK_STAT_BRANCH);
verify_window_errors(ts, TSK_STAT_BRANCH);
}
static void
verify_site_general_stat_errors(tsk_treeseq_t *ts)
{
verify_window_errors(ts, TSK_STAT_SITE);
verify_summary_func_errors(ts, TSK_STAT_SITE);
}
static void
verify_node_general_stat_errors(tsk_treeseq_t *ts)
{
verify_window_errors(ts, TSK_STAT_NODE);
verify_summary_func_errors(ts, TSK_STAT_NODE);
}
static void
verify_one_way_weighted_func_errors(tsk_treeseq_t *ts, one_way_weighted_method *method)
{
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *weights = tsk_malloc(num_samples * sizeof(double));
double bad_windows[] = { 0, -1 };
double result;
tsk_size_t j;
for (j = 0; j < num_samples; j++) {
weights[j] = 1.0;
}
ret = method(ts, 0, weights, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_WEIGHTS);
ret = method(ts, 1, weights, 1, bad_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
free(weights);
}
static void
verify_one_way_weighted_covariate_func_errors(
tsk_treeseq_t *ts, one_way_covariates_method *method)
{
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *weights = tsk_malloc(num_samples * sizeof(double));
double *covariates = NULL;
double bad_windows[] = { 0, -1 };
double result;
tsk_size_t j;
for (j = 0; j < num_samples; j++) {
weights[j] = 1.0;
}
ret = method(ts, 0, weights, 0, covariates, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_WEIGHTS);
ret = method(ts, 1, weights, 0, covariates, 1, bad_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
free(weights);
}
static void
verify_one_way_stat_func_errors(tsk_treeseq_t *ts, one_way_sample_stat_method *method)
{
int ret;
tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(ts);
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes = 4;
double windows[] = { 0, 0, 0 };
double result;
ret = method(ts, 0, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
samples[0] = TSK_NULL;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = -10;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes + 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes - 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);
samples[0] = 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
samples[0] = 0;
sample_set_sizes = 0;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
sample_set_sizes = 4;
/* Window errors */
ret = method(ts, 1, &sample_set_sizes, samples, 0, windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
ret = method(ts, 1, &sample_set_sizes, samples, 2, windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
}
// Temporary definition for time_windows in tsk_treeseq_allele_frequency_spectrum
typedef int one_way_sample_stat_method_tw(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,
tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,
double *result);
// Temporary duplicate for time-windows-having methods
static void
verify_one_way_stat_func_errors_tw(
tsk_treeseq_t *ts, one_way_sample_stat_method_tw *method)
{
int ret;
tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(ts);
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes = 4;
double windows[] = { 0, 0, 0 };
double time_windows[] = { -1, 0.5, INFINITY };
double result;
ret = method(ts, 0, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
samples[0] = TSK_NULL;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = -10;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes + 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = num_nodes - 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);
samples[0] = 1;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
samples[0] = 0;
sample_set_sizes = 0;
ret = method(ts, 1, &sample_set_sizes, samples, 0, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
sample_set_sizes = 4;
/* Window errors */
ret = method(ts, 1, &sample_set_sizes, samples, 0, windows, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
ret = method(ts, 1, &sample_set_sizes, samples, 2, windows, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
/* Time window errors */
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 0, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS_DIM);
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);
time_windows[0] = 0.1;
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);
time_windows[0] = 0;
time_windows[1] = 0;
ret = method(
ts, 1, &sample_set_sizes, samples, 0, NULL, 2, time_windows, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TIME_WINDOWS);
}
static void
verify_two_way_stat_func_errors(
tsk_treeseq_t *ts, general_sample_stat_method *method, tsk_flags_t options)
{
int ret;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t set_indexes[] = { 0, 1 };
double result;
ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
options | TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
ret = method(ts, 2, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
options | TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
set_indexes[0] = -1;
ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
options | TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
set_indexes[0] = 0;
set_indexes[1] = 2;
ret = method(ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
options | TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
}
static void
verify_two_way_weighted_stat_func_errors(
tsk_treeseq_t *ts, two_way_weighted_method *method, tsk_flags_t options)
{
int ret;
tsk_id_t indexes[] = { 0, 0, 0, 1 };
double bad_windows[] = { -1, -1 };
double weights[10];
double result[10];
memset(weights, 0, sizeof(weights));
ret = method(ts, 2, weights, 2, indexes, 0, NULL, result, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = method(ts, 2, weights, 2, indexes, 0, NULL, result,
options | TSK_STAT_SITE | TSK_STAT_NODE);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
ret = method(ts, 0, weights, 2, indexes, 0, NULL, result, options);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_WEIGHTS);
ret = method(ts, 2, weights, 2, indexes, 1, bad_windows, result, options);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
}
static void
verify_three_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method *method)
{
int ret;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 1, 1, 2 };
tsk_id_t set_indexes[] = { 0, 1, 2 };
double result;
ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
ret = method(ts, 3, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
set_indexes[0] = -1;
ret = method(ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
set_indexes[0] = 0;
set_indexes[1] = 3;
ret = method(ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
}
static void
verify_four_way_stat_func_errors(tsk_treeseq_t *ts, general_sample_stat_method *method)
{
int ret;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 1, 1, 1, 1 };
tsk_id_t set_indexes[] = { 0, 1, 2, 3 };
double result;
ret = method(ts, 0, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
ret = method(ts, 4, sample_set_sizes, samples, 0, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
set_indexes[0] = -1;
ret = method(ts, 4, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
set_indexes[0] = 0;
set_indexes[1] = 4;
ret = method(ts, 4, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
}
static int
general_stat_identity(
tsk_size_t K, const double *restrict X, tsk_size_t M, double *Y, void *params)
{
tsk_size_t k;
CU_ASSERT_FATAL(M == K);
CU_ASSERT_FATAL(params == NULL);
for (k = 0; k < K; k++) {
Y[k] = X[k];
}
return 0;
}
static void
verify_branch_general_stat_identity(tsk_treeseq_t *ts)
{
CU_ASSERT_FATAL(ts != NULL);
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *W = tsk_malloc(num_samples * sizeof(double));
tsk_id_t *nodes = tsk_malloc(tsk_treeseq_get_num_nodes(ts) * sizeof(*nodes));
tsk_id_t u;
tsk_size_t num_nodes;
double s, branch_length;
double *sigma = tsk_malloc(tsk_treeseq_get_num_trees(ts) * sizeof(*sigma));
tsk_tree_t tree;
tsk_size_t j;
CU_ASSERT_FATAL(W != NULL);
CU_ASSERT_FATAL(nodes != NULL);
for (j = 0; j < num_samples; j++) {
W[j] = 1;
}
ret = tsk_treeseq_general_stat(ts, 1, W, 1, general_stat_identity, NULL,
tsk_treeseq_get_num_trees(ts), tsk_treeseq_get_breakpoints(ts),
TSK_STAT_BRANCH | TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, sigma);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_init(&tree, ts, 0);
CU_ASSERT_EQUAL(ret, 0);
for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {
ret = tsk_tree_preorder(&tree, nodes, &num_nodes);
CU_ASSERT_EQUAL_FATAL(ret, 0);
s = 0;
for (j = 0; j < num_nodes; j++) {
u = nodes[j];
ret = tsk_tree_get_branch_length(&tree, u, &branch_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
s += branch_length * (double) tree.num_samples[u];
}
CU_ASSERT_DOUBLE_EQUAL_FATAL(sigma[tree.index], s, 1e-6);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
free(nodes);
tsk_tree_free(&tree);
free(W);
free(sigma);
}
static int
general_stat_sum(
tsk_size_t K, const double *restrict X, tsk_size_t M, double *Y, void *params)
{
tsk_size_t k, m;
double s = 0;
CU_ASSERT_FATAL(params == NULL);
s = 0;
for (k = 0; k < K; k++) {
s += X[k];
}
for (m = 0; m < M; m++) {
Y[m] = s;
}
return 0;
}
static void
verify_general_stat_dims(
tsk_treeseq_t *ts, tsk_size_t K, tsk_size_t M, tsk_flags_t options)
{
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *W = tsk_malloc(K * num_samples * sizeof(double));
/* We need this much space for NODE mode; no harm for other modes. */
double *sigma = tsk_calloc(tsk_treeseq_get_num_nodes(ts) * M, sizeof(double));
tsk_size_t j, k;
CU_ASSERT_FATAL(W != NULL);
for (j = 0; j < num_samples; j++) {
for (k = 0; k < K; k++) {
W[j * K + k] = 1;
}
}
ret = tsk_treeseq_general_stat(
ts, K, W, M, general_stat_sum, NULL, 0, NULL, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, 0);
free(W);
free(sigma);
}
static void
verify_general_stat_windows(
tsk_treeseq_t *ts, tsk_size_t num_windows, tsk_flags_t options)
{
int ret;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *W = tsk_malloc(num_samples * sizeof(double));
tsk_size_t M = 5;
/* We need this much space for NODE mode; no harm for other modes. */
double *sigma
= tsk_calloc(M * tsk_treeseq_get_num_nodes(ts) * num_windows, sizeof(double));
double *windows = tsk_malloc((num_windows + 1) * sizeof(*windows));
double L = tsk_treeseq_get_sequence_length(ts);
tsk_size_t j;
CU_ASSERT_FATAL(W != NULL);
CU_ASSERT_FATAL(sigma != NULL);
CU_ASSERT_FATAL(windows != NULL);
for (j = 0; j < num_samples; j++) {
W[j] = 1;
}
windows[0] = 0;
windows[num_windows] = L;
for (j = 1; j < num_windows; j++) {
windows[j] = ((double) j) * L / (double) num_windows;
}
ret = tsk_treeseq_general_stat(
ts, 1, W, M, general_stat_sum, NULL, num_windows, windows, options, sigma);
CU_ASSERT_EQUAL_FATAL(ret, 0);
free(W);
free(sigma);
free(windows);
}
static void
verify_default_general_stat(tsk_treeseq_t *ts)
{
int ret;
tsk_size_t K = 2;
tsk_size_t M = 1;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
double *W = tsk_malloc(K * num_samples * sizeof(double));
double sigma1, sigma2;
tsk_size_t j, k;
CU_ASSERT_FATAL(W != NULL);
for (j = 0; j < num_samples; j++) {
for (k = 0; k < K; k++) {
W[j * K + k] = 1;
}
}
ret = tsk_treeseq_general_stat(
ts, K, W, M, general_stat_sum, NULL, 0, NULL, TSK_STAT_SITE, &sigma1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_general_stat(
ts, K, W, M, general_stat_sum, NULL, 0, NULL, 0, &sigma2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(sigma1, sigma2);
free(W);
}
static void
verify_general_stat(tsk_treeseq_t *ts, tsk_flags_t mode)
{
CU_ASSERT_FATAL(ts != NULL);
verify_general_stat_dims(ts, 4, 2, mode);
verify_general_stat_dims(ts, 4, 2, mode | TSK_STAT_POLARISED);
verify_general_stat_dims(ts, 1, 20, mode);
verify_general_stat_dims(ts, 1, 20, mode | TSK_STAT_POLARISED);
verify_general_stat_dims(ts, 100, 1, mode);
verify_general_stat_dims(ts, 100, 1, mode | TSK_STAT_POLARISED);
verify_general_stat_dims(ts, 10, 12, mode);
verify_general_stat_dims(ts, 10, 12, mode | TSK_STAT_POLARISED);
verify_general_stat_windows(ts, 1, mode);
verify_general_stat_windows(ts, 1, mode | TSK_STAT_SPAN_NORMALISE);
verify_general_stat_windows(ts, 2, mode);
verify_general_stat_windows(ts, 2, mode | TSK_STAT_SPAN_NORMALISE);
verify_general_stat_windows(ts, 3, mode);
verify_general_stat_windows(ts, 3, mode | TSK_STAT_SPAN_NORMALISE);
verify_general_stat_windows(ts, 10, mode);
verify_general_stat_windows(ts, 10, mode | TSK_STAT_SPAN_NORMALISE);
verify_general_stat_windows(ts, 100, mode);
verify_general_stat_windows(ts, 100, mode | TSK_STAT_SPAN_NORMALISE);
}
static void
verify_afs(tsk_treeseq_t *ts)
{
int ret;
tsk_size_t n = tsk_treeseq_get_num_samples(ts);
tsk_size_t sample_set_sizes[2];
double time_windows[] = { 0, 1 };
const tsk_id_t *samples = tsk_treeseq_get_samples(ts);
double *result = tsk_malloc(n * n * sizeof(*result));
CU_ASSERT_FATAL(sample_set_sizes != NULL);
sample_set_sizes[0] = n - 2;
sample_set_sizes[1] = 2;
ret = tsk_treeseq_allele_frequency_spectrum(
ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(
ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_POLARISED | TSK_STAT_SPAN_NORMALISE,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(ts, 2, sample_set_sizes, samples, 0,
NULL, 1, time_windows, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
free(result);
}
static void
test_general_stat_input_errors(void)
{
tsk_treeseq_t ts;
double result;
double W;
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
/* Bad input dimensions */
ret = tsk_treeseq_general_stat(
&ts, 0, &W, 1, general_stat_sum, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_STATE_DIMS);
ret = tsk_treeseq_general_stat(
&ts, 1, &W, 0, general_stat_sum, NULL, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_RESULT_DIMS);
/* Multiple stats*/
ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,
TSK_STAT_SITE | TSK_STAT_BRANCH, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,
TSK_STAT_SITE | TSK_STAT_NODE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
ret = tsk_treeseq_general_stat(&ts, 1, &W, 1, general_stat_sum, NULL, 0, NULL,
TSK_STAT_BRANCH | TSK_STAT_NODE, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
tsk_treeseq_free(&ts);
}
static void
test_empty_ts_ld(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(
&ts, 1, single_tree_ex_nodes, "", NULL, NULL, NULL, NULL, NULL, 0);
verify_ld(&ts);
tsk_treeseq_free(&ts);
}
static void
test_empty_ts_mean_descendants(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(
&ts, 1, single_tree_ex_nodes, "", NULL, NULL, NULL, NULL, NULL, 0);
verify_mean_descendants(&ts);
tsk_treeseq_free(&ts);
}
static void
test_empty_ts_genealogical_nearest_neighbours(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(
&ts, 1, single_tree_ex_nodes, "", NULL, NULL, NULL, NULL, NULL, 0);
verify_genealogical_nearest_neighbours(&ts);
tsk_treeseq_free(&ts);
}
static void
test_empty_ts_general_stat(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(
&ts, 1, single_tree_ex_nodes, "", NULL, NULL, NULL, NULL, NULL, 0);
verify_branch_general_stat_identity(&ts);
verify_default_general_stat(&ts);
verify_general_stat(&ts, TSK_STAT_BRANCH);
verify_general_stat(&ts, TSK_STAT_SITE);
verify_general_stat(&ts, TSK_STAT_NODE);
tsk_treeseq_free(&ts);
}
static void
test_empty_ts_afs(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(
&ts, 1, single_tree_ex_nodes, "", NULL, NULL, NULL, NULL, NULL, 0);
verify_afs(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_ld(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_ld(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_mean_descendants(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_mean_descendants(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_genealogical_nearest_neighbours(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_genealogical_nearest_neighbours(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_general_stat(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_branch_general_stat_identity(&ts);
verify_default_general_stat(&ts);
verify_general_stat(&ts, TSK_STAT_BRANCH);
verify_general_stat(&ts, TSK_STAT_SITE);
verify_general_stat(&ts, TSK_STAT_NODE);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_general_stat_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_branch_general_stat_errors(&ts);
verify_site_general_stat_errors(&ts);
verify_node_general_stat_errors(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_divergence_matrix(void)
{
tsk_treeseq_t ts;
int ret;
double result[16];
double D_branch[16] = { 0, 2, 6, 6, 2, 0, 6, 6, 6, 6, 0, 4, 6, 6, 4, 0 };
double D_site[16] = { 0, 1, 1, 0, 1, 0, 2, 1, 1, 2, 0, 1, 0, 1, 1, 0 };
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D_branch);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D_site);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sample_set_sizes[0] = 3;
sample_set_sizes[1] = 1;
ret = tsk_treeseq_divergence_matrix(
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, sample_set_sizes, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* assert_arrays_almost_equal(4, result, D_site); */
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_divergence_matrix_internal_samples(void)
{
tsk_treeseq_t ts;
int ret;
double *result = malloc(16 * sizeof(double));
double D[16] = { 0, 2, 4, 3, 2, 0, 4, 3, 4, 4, 0, 1, 3, 3, 1, 0 };
const char *nodes = "1 0 -1 -1\n" /* 2.00┊ 6 ┊ */
"1 0 -1 -1\n" /* ┊ ┏━┻━┓ ┊ */
"1 0 -1 -1\n" /* 1.00┊ 4 5* ┊ */
"0 0 -1 -1\n" /* ┊ ┏┻┓ ┏┻┓ ┊ */
"0 1 -1 -1\n" /* 0.00┊ 0 1 2 3 ┊ */
"1 1 -1 -1\n" /* 0 * * * 1 */
"0 2 -1 -1\n";
const char *edges = "0 1 4 0,1\n"
"0 1 5 2,3\n"
"0 1 6 4,5\n";
/* One mutations per branch so we get the same as the branch length value */
const char *sites = "0.1 A\n"
"0.2 A\n"
"0.3 A\n"
"0.4 A\n"
"0.5 A\n"
"0.6 A\n";
const char *mutations = "0 0 T -1\n"
"1 1 T -1\n"
"2 2 T -1\n"
"3 3 T -1\n"
"4 4 T -1\n"
"5 5 T -1\n";
tsk_id_t samples[] = { 0, 1, 2, 5 };
tsk_size_t sizes[] = { 1, 1, 1, 1 };
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 4, sizes, samples, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 4, sizes, samples, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 4, NULL, samples, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
ret = tsk_treeseq_divergence_matrix(
&ts, 4, NULL, samples, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
free(result);
}
static void
test_single_tree_divergence_matrix_multi_root(void)
{
tsk_treeseq_t ts;
int ret;
double result[16];
double D_branch[16] = { 0, 2, 3, 3, 2, 0, 3, 3, 3, 3, 0, 4, 3, 3, 4, 0 };
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n" /* 2.00┊ 5 ┊ */
"1 0 -1 -1\n" /* 1.00┊ 4 ┊ */
"1 0 -1 -1\n" /* ┊ ┏┻┓ ┏┻┓ ┊ */
"0 1 -1 -1\n" /* 0.00┊ 0 1 2 3 ┊ */
"0 2 -1 -1\n"; /* 0 * * * * 1 */
const char *edges = "0 1 4 0,1\n"
"0 1 5 2,3\n";
/* Two mutations per branch */
const char *sites = "0.1 A\n"
"0.2 A\n"
"0.3 A\n"
"0.4 A\n";
const char *mutations = "0 0 B -1\n"
"0 0 C 0\n"
"1 1 B -1\n"
"1 1 C 2\n"
"2 2 B -1\n"
"2 2 C 4\n"
"2 2 D 5\n"
"2 2 E 6\n"
"3 3 B -1\n"
"3 3 C 8\n"
"3 3 D 9\n"
"3 3 E 10\n";
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(16, result, D_branch);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_ld(void)
{
tsk_treeseq_t ts;
tsk_ld_calc_t ld_calc;
double r2[3];
tsk_size_t num_r2_values;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_ld(&ts);
/* Check early exit corner cases */
ret = tsk_ld_calc_init(&ld_calc, &ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 0, TSK_DIR_FORWARD, 1, DBL_MAX, r2, &num_r2_values);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 2, TSK_DIR_REVERSE, 1, DBL_MAX, r2, &num_r2_values);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, 1);
tsk_ld_calc_free(&ld_calc);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_mean_descendants(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_mean_descendants(&ts);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_genealogical_nearest_neighbours(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_genealogical_nearest_neighbours(&ts);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_general_stat(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_branch_general_stat_identity(&ts);
verify_default_general_stat(&ts);
verify_general_stat(&ts, TSK_STAT_BRANCH);
verify_general_stat(&ts, TSK_STAT_SITE);
verify_general_stat(&ts, TSK_STAT_NODE);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_general_stat_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_branch_general_stat_errors(&ts);
verify_site_general_stat_errors(&ts);
verify_node_general_stat_errors(&ts);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_diversity_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_stat_func_errors(&ts, tsk_treeseq_diversity);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_diversity(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes = 4;
double pi;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_diversity(
&ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &pi);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(pi, 1.5, 1e-6);
/* A sample set size of 1 leads to NaN */
sample_set_sizes = 1;
ret = tsk_treeseq_diversity(
&ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &pi);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(pi));
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_trait_covariance_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_weighted_func_errors(&ts, tsk_treeseq_trait_covariance);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_trait_covariance(void)
{
tsk_treeseq_t ts;
double result;
double *weights;
tsk_size_t j;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
weights = tsk_malloc(4 * sizeof(double));
weights[0] = weights[1] = 0.0;
weights[2] = weights[3] = 1.0;
ret = tsk_treeseq_trait_covariance(&ts, 1, weights, 0, NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 1.0 / 12.0, 1e-6);
/* weights of 0 leads to 0 */
for (j = 0; j < 4; j++) {
weights[j] = 0.0;
}
ret = tsk_treeseq_trait_covariance(&ts, 1, weights, 0, NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 0.0, 1e-6);
tsk_treeseq_free(&ts);
free(weights);
}
static void
test_paper_ex_trait_correlation_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_weighted_func_errors(&ts, tsk_treeseq_trait_correlation);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_trait_correlation(void)
{
tsk_treeseq_t ts;
double result;
double *weights;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
weights = tsk_malloc(4 * sizeof(double));
weights[0] = weights[1] = 0.0;
weights[2] = weights[3] = 1.0;
ret = tsk_treeseq_trait_correlation(
&ts, 1, weights, 0, NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 1.0, 1e-6);
tsk_treeseq_free(&ts);
free(weights);
}
static void
test_paper_ex_trait_linear_model_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_weighted_covariate_func_errors(&ts, tsk_treeseq_trait_linear_model);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_trait_linear_model(void)
{
tsk_treeseq_t ts;
double result;
double *weights;
double *covariates;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
weights = tsk_malloc(4 * sizeof(double));
covariates = tsk_malloc(8 * sizeof(double));
weights[0] = weights[1] = 0.0;
weights[2] = weights[3] = 1.0;
covariates[0] = covariates[1] = 0.0;
covariates[2] = covariates[3] = 1.0;
covariates[4] = covariates[6] = 0.0;
covariates[5] = covariates[7] = 1.0;
ret = tsk_treeseq_trait_linear_model(
&ts, 1, weights, 2, covariates, 0, NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 0.0, 1e-6);
tsk_treeseq_free(&ts);
free(weights);
free(covariates);
}
static void
test_paper_ex_segregating_sites_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_stat_func_errors(&ts, tsk_treeseq_segregating_sites);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_segregating_sites(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes = 4;
double segsites;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_segregating_sites(
&ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &segsites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(segsites, 3.0, 1e-6);
/* A sample set size of 1 leads to 0 */
sample_set_sizes = 1;
ret = tsk_treeseq_segregating_sites(
&ts, 1, &sample_set_sizes, samples, 0, NULL, TSK_STAT_SITE, &segsites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(segsites, 0.0, 1e-6);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_Y1_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_stat_func_errors(&ts, tsk_treeseq_Y1);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_Y1(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes = 4;
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_Y1(&ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* A sample set size of < 2 leads to NaN */
sample_set_sizes = 1;
ret = tsk_treeseq_Y1(&ts, 1, &sample_set_sizes, samples, 0, NULL, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(result));
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_divergence_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_two_way_stat_func_errors(&ts, tsk_treeseq_divergence, 0);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_divergence(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t set_indexes[] = { 0, 1 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_divergence(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0,
NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* sample_set[0] size = 1 with indexes = (0, 0) leads to NaN */
sample_set_sizes[0] = 1;
set_indexes[1] = 0;
ret = tsk_treeseq_divergence(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0,
NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(result));
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_genetic_relatedness(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t set_indexes[] = { 0, 0 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_genetic_relatedness(&ts, 2, sample_set_sizes, samples, 1,
set_indexes, 0, NULL, TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness(&ts, 2, sample_set_sizes, samples, 1,
set_indexes, 0, NULL, TSK_STAT_SITE | TSK_STAT_NONCENTRED, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_genetic_relatedness_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_two_way_stat_func_errors(&ts, tsk_treeseq_genetic_relatedness, 0);
verify_two_way_stat_func_errors(
&ts, tsk_treeseq_genetic_relatedness, TSK_STAT_NONCENTRED);
verify_two_way_stat_func_errors(
&ts, tsk_treeseq_genetic_relatedness, TSK_STAT_POLARISED);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_genetic_relatedness_weighted(void)
{
tsk_treeseq_t ts;
double weights[] = { 1.2, 0.1, 0.0, 0.0, 3.4, 5.0, 1.0, -1.0 };
tsk_id_t indexes[] = { 0, 0, 0, 1 };
double result[100];
tsk_size_t num_weights;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
for (num_weights = 1; num_weights < 3; num_weights++) {
ret = tsk_treeseq_genetic_relatedness_weighted(
&ts, num_weights, weights, 2, indexes, 0, NULL, result, TSK_STAT_SITE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_weighted(
&ts, num_weights, weights, 2, indexes, 0, NULL, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_weighted(
&ts, num_weights, weights, 2, indexes, 0, NULL, result, TSK_STAT_NODE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_weighted(&ts, num_weights, weights, 2,
indexes, 0, NULL, result, TSK_STAT_SITE | TSK_STAT_NONCENTRED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_weighted(&ts, num_weights, weights, 2,
indexes, 0, NULL, result, TSK_STAT_BRANCH | TSK_STAT_NONCENTRED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_weighted(&ts, num_weights, weights, 2,
indexes, 0, NULL, result, TSK_STAT_NODE | TSK_STAT_NONCENTRED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
}
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_genetic_relatedness_weighted_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_two_way_weighted_stat_func_errors(
&ts, tsk_treeseq_genetic_relatedness_weighted, 0);
verify_two_way_weighted_stat_func_errors(
&ts, tsk_treeseq_genetic_relatedness_weighted, TSK_STAT_NONCENTRED);
verify_two_way_weighted_stat_func_errors(
&ts, tsk_treeseq_genetic_relatedness_weighted, TSK_STAT_POLARISED);
tsk_treeseq_free(&ts);
}
static void
test_empty_genetic_relatedness_vector(void)
{
int ret;
tsk_treeseq_t ts;
tsk_size_t num_samples;
double *weights, *result, *result2;
tsk_size_t j;
tsk_size_t num_weights = 2;
double windows[] = { 0, 0 };
tsk_treeseq_from_text(
&ts, 1, single_tree_ex_nodes, "", NULL, NULL, NULL, NULL, NULL, 0);
num_samples = tsk_treeseq_get_num_samples(&ts);
windows[1] = tsk_treeseq_get_sequence_length(&ts);
weights = tsk_malloc(num_weights * num_samples * sizeof(double));
result = tsk_malloc(num_weights * num_samples * sizeof(double));
result2 = tsk_malloc(num_weights * num_samples * sizeof(double));
for (j = 0; j < num_samples; j++) {
weights[j] = 1.0;
}
for (j = 0; j < num_samples; j++) {
weights[j + num_samples] = (float) j;
}
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, num_weights, weights, 1, windows, num_samples, ts.samples, result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 1, windows,
num_samples, ts.samples, result, TSK_STAT_NONCENTRED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
windows[0] = 0.5 * tsk_treeseq_get_sequence_length(&ts);
windows[1] = 0.75 * tsk_treeseq_get_sequence_length(&ts);
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, num_weights, weights, 1, windows, num_samples, ts.samples, result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 1, windows,
num_samples, ts.samples, result2, TSK_STAT_SPAN_NORMALISE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_samples * num_weights; j++) {
CU_ASSERT_EQUAL_FATAL(result[j] / (windows[1] - windows[0]), result2[j]);
}
tsk_treeseq_free(&ts);
free(weights);
free(result);
free(result2);
}
static void
verify_genetic_relatedness_vector(
tsk_treeseq_t *ts, tsk_size_t num_weights, tsk_size_t num_windows)
{
int ret;
tsk_size_t num_samples;
double *weights, *result;
tsk_size_t j, k;
double *windows = tsk_malloc((num_windows + 1) * sizeof(*windows));
double L = tsk_treeseq_get_sequence_length(ts);
windows[0] = 0;
windows[num_windows] = L;
for (j = 1; j < num_windows; j++) {
windows[j] = ((double) j) * L / (double) num_windows;
}
num_samples = tsk_treeseq_get_num_samples(ts);
weights = tsk_malloc(num_weights * num_samples * sizeof(*weights));
result = tsk_malloc(num_windows * num_weights * num_samples * sizeof(*result));
for (j = 0; j < num_samples; j++) {
for (k = 0; k < num_weights; k++) {
weights[j + k * num_samples] = 1.0 + (double) k;
}
}
ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,
windows, num_samples, ts->samples, result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
windows[0] = windows[1] / 2;
if (num_windows > 1) {
windows[num_windows - 1]
= windows[num_windows - 2] + (L / (double) (2 * num_windows));
}
ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,
windows, num_samples, ts->samples, result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,
windows, num_samples, ts->samples, result, TSK_STAT_NONCENTRED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_set_debug_stream(_devnull);
ret = tsk_treeseq_genetic_relatedness_vector(ts, num_weights, weights, num_windows,
windows, num_samples, ts->samples, result, TSK_DEBUG);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_set_debug_stream(stdout);
free(windows);
free(weights);
free(result);
}
static void
test_paper_ex_genetic_relatedness_vector(void)
{
tsk_treeseq_t ts;
double gap;
for (gap = 0.0; gap < 2.0; gap += 1.0) {
tsk_treeseq_from_text(&ts, 10 + gap, paper_ex_nodes, paper_ex_edges, NULL,
paper_ex_sites, paper_ex_mutations, paper_ex_individuals, NULL, 0);
tsk_size_t j, k;
for (j = 1; j < 3; j++) {
for (k = 1; k < 3; k++) {
verify_genetic_relatedness_vector(&ts, j, k);
}
}
tsk_treeseq_free(&ts);
}
}
static void
test_paper_ex_genetic_relatedness_vector_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_size_t num_samples;
double *weights, *result;
tsk_size_t j;
tsk_size_t num_windows = 2;
tsk_size_t num_weights = 2;
double windows[] = { 0, 0, 0 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
num_samples = tsk_treeseq_get_num_samples(&ts);
weights = tsk_malloc(num_weights * num_samples * sizeof(double));
result = tsk_malloc(num_windows * num_weights * num_samples * sizeof(double));
for (j = 0; j < num_samples; j++) {
weights[j] = 1.0;
}
for (j = 0; j < num_samples; j++) {
weights[j + num_samples] = (float) j;
}
/* Window errors */
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, 1, weights, 0, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, 1, weights, 0, NULL, num_samples, ts.samples, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = -1;
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 12;
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 0;
windows[2] = 12;
ret = tsk_treeseq_genetic_relatedness_vector(
&ts, 1, weights, 2, windows, num_samples, ts.samples, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
/* unsupported mode errors */
windows[0] = 0.0;
windows[1] = 5.0;
windows[2] = 10.0;
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_samples, ts.samples, result, TSK_STAT_SITE);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_samples, ts.samples, result, TSK_STAT_NODE);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
tsk_treeseq_free(&ts);
free(weights);
free(result);
}
static void
test_paper_ex_genetic_relatedness_vector_node_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_size_t num_samples;
double *weights, *result;
tsk_size_t j;
tsk_size_t num_weights = 2;
tsk_size_t num_windows = 2;
double windows[] = { 1, 1.5, 2 };
tsk_size_t num_nodes = 3;
const tsk_id_t good_nodes[] = { 1, 0, 2 };
const tsk_id_t bad_nodes1[] = { 1, -1, 2 };
const tsk_id_t bad_nodes2[] = { 1, 100, 2 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
num_samples = tsk_treeseq_get_num_samples(&ts);
weights = tsk_malloc(num_weights * num_samples * sizeof(double));
result = tsk_malloc(num_windows * num_weights * num_nodes * sizeof(double));
for (j = 0; j < num_samples; j++) {
weights[j] = 1.0;
}
for (j = 0; j < num_samples; j++) {
weights[j + num_samples] = (float) j;
}
/* node errors */
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_nodes, good_nodes, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_nodes, bad_nodes1, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_treeseq_genetic_relatedness_vector(&ts, num_weights, weights, 2, windows,
num_nodes, bad_nodes2, result, TSK_STAT_BRANCH);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
free(weights);
free(result);
}
static void
test_paper_ex_Y2_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_two_way_stat_func_errors(&ts, tsk_treeseq_Y2, 0);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_Y2(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t set_indexes[] = { 0, 1 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_Y2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* sample_set_size of 1 leads to NaN */
sample_set_sizes[1] = 1;
ret = tsk_treeseq_Y2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(result));
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_f2_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_two_way_stat_func_errors(&ts, tsk_treeseq_f2, 0);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_f2(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t set_indexes[] = { 0, 1 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_f2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* sample_set_size of 1 leads to NaN */
sample_set_sizes[0] = 1;
ret = tsk_treeseq_f2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(result));
/* sample_set_size of 1 leads to NaN */
sample_set_sizes[0] = 2;
sample_set_sizes[1] = 1;
ret = tsk_treeseq_f2(&ts, 2, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(result));
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_Y3_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_three_way_stat_func_errors(&ts, tsk_treeseq_Y3);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_Y3(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 1, 1 };
tsk_id_t set_indexes[] = { 0, 1, 2 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_Y3(&ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_f3_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_three_way_stat_func_errors(&ts, tsk_treeseq_f3);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_f3(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 2, 1, 1 };
tsk_id_t set_indexes[] = { 0, 1, 2 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_f3(&ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* sample_set_size of 1 leads to NaN */
sample_set_sizes[0] = 1;
ret = tsk_treeseq_f3(&ts, 3, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(tsk_isnan(result));
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_f4_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_four_way_stat_func_errors(&ts, tsk_treeseq_f4);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_f4(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 1, 1, 1, 1 };
tsk_id_t set_indexes[] = { 0, 1, 2, 3 };
double result;
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_f4(&ts, 4, sample_set_sizes, samples, 1, set_indexes, 0, NULL,
TSK_STAT_SITE, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_afs_errors(void)
{
tsk_treeseq_t ts;
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t samples[] = { 0, 1, 2, 3 };
double result[10]; /* not thinking too hard about the actual value needed */
double time_windows[] = { 0, 1 };
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_one_way_stat_func_errors_tw(&ts, tsk_treeseq_allele_frequency_spectrum);
ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
ret = tsk_treeseq_allele_frequency_spectrum(&ts, 2, sample_set_sizes, samples, 0,
NULL, 1, time_windows, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_afs(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sample_set_sizes[] = { 4, 0 };
double result[25];
int ret;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
/* we have two singletons and one tripleton */
ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result[0], 0);
CU_ASSERT_EQUAL_FATAL(result[1], 3.0);
CU_ASSERT_EQUAL_FATAL(result[2], 0);
ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result[0], 0);
CU_ASSERT_EQUAL_FATAL(result[1], 2.0);
CU_ASSERT_EQUAL_FATAL(result[2], 0);
CU_ASSERT_EQUAL_FATAL(result[3], 1.0);
CU_ASSERT_EQUAL_FATAL(result[4], 0);
verify_afs(&ts);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_divergence_matrix(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
}
static void
test_unary_ex_afs(void)
{
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 2, 3 };
tsk_size_t sample_set_sizes[] = { 3, 0 };
double result[25];
int ret;
tsk_treeseq_from_text(&ts, 100, unary_ex_nodes, unary_ex_edges, NULL, unary_ex_sites,
unary_ex_mutations, NULL, NULL, 0);
/* we have a singleton and a doubleton */
ret = tsk_treeseq_allele_frequency_spectrum(
&ts, 1, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result[0], 0);
CU_ASSERT_EQUAL_FATAL(result[1], 1.0);
CU_ASSERT_EQUAL_FATAL(result[2], 1.0);
CU_ASSERT_EQUAL_FATAL(result[3], 0.0);
ret = tsk_treeseq_allele_frequency_spectrum(&ts, 1, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE_FATAL(result[0] > 0);
CU_ASSERT_TRUE_FATAL(result[1] > 0);
CU_ASSERT_TRUE_FATAL(result[2] > 0);
CU_ASSERT_EQUAL_FATAL(result[3], 0.0);
verify_afs(&ts);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_ex_ld(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_ld(&ts);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_ex_mean_descendants(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_mean_descendants(&ts);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_ex_genealogical_nearest_neighbours(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_genealogical_nearest_neighbours(&ts);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_ex_general_stat(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_branch_general_stat_identity(&ts);
verify_default_general_stat(&ts);
verify_general_stat(&ts, TSK_STAT_BRANCH);
verify_general_stat(&ts, TSK_STAT_SITE);
verify_general_stat(&ts, TSK_STAT_NODE);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_ex_general_stat_errors(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_branch_general_stat_errors(&ts);
verify_site_general_stat_errors(&ts);
verify_node_general_stat_errors(&ts);
tsk_treeseq_free(&ts);
}
static void
test_caterpillar_tree_ld(void)
{
tsk_treeseq_t *ts = caterpillar_tree(50, 20, 1);
tsk_ld_calc_t ld_calc;
double r2[20];
tsk_size_t num_r2_values;
int ret = tsk_ld_calc_init(&ld_calc, ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_ld(ts);
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 0, TSK_DIR_FORWARD, 5, DBL_MAX, r2, &num_r2_values);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, 5);
ret = tsk_ld_calc_get_r2_array(
&ld_calc, 10, TSK_DIR_REVERSE, 5, DBL_MAX, r2, &num_r2_values);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_r2_values, 5);
tsk_ld_calc_free(&ld_calc);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_ld_multi_mutations(void)
{
tsk_treeseq_t *ts = caterpillar_tree(4, 2, 2);
tsk_ld_calc_t ld_calc;
double r2;
int ret = tsk_ld_calc_init(&ld_calc, ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ld_calc_get_r2(&ld_calc, 0, 1, &r2);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_ONLY_INFINITE_SITES);
tsk_ld_calc_free(&ld_calc);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_ld_silent_mutations(void)
{
tsk_treeseq_t *base_ts = caterpillar_tree(4, 2, 1);
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_ld_calc_t ld_calc;
double r2;
int ret = tsk_table_collection_copy(base_ts->tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.mutations.derived_state[1] = '0';
ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ld_calc_init(&ld_calc, &ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_ld_calc_get_r2(&ld_calc, 0, 1, &r2);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED);
tsk_ld_calc_free(&ld_calc);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
tsk_treeseq_free(base_ts);
free(base_ts);
}
static void
test_paper_ex_two_site(void)
{
tsk_treeseq_t ts;
double result[27];
tsk_size_t s, result_size, num_sample_sets;
int ret;
double truth_one_set[9] = { 1, 0.1111111111111111, 0.1111111111111111,
0.1111111111111111, 1, 1, 0.1111111111111111, 1, 1 };
double truth_two_sets[18] = { 1, 1, 0.1111111111111111, 0.1111111111111111,
0.1111111111111111, 0.1111111111111111, 0.1111111111111111, 0.1111111111111111,
1, 1, 1, 1, 0.1111111111111111, 0.1111111111111111, 1, 1, 1, 1 };
double truth_three_sets[27] = { 1, 1, NAN, 0.1111111111111111, 0.1111111111111111,
NAN, 0.1111111111111111, 0.1111111111111111, NAN, 0.1111111111111111,
0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1, 0.1111111111111111,
0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
double truth_three_index_tuples[27] = { 1, 1, NAN, 0.1111111111111111,
0.1111111111111111, NAN, 0.1111111111111111, 0.1111111111111111, NAN,
0.1111111111111111, 0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1,
0.1111111111111111, 0.1111111111111111, NAN, 1, 1, 1, 1, 1, 1 };
tsk_size_t sample_set_sizes[3], num_index_tuples;
tsk_id_t sample_sets[ts.num_samples * 3], index_tuples[2 * 3] = { 0, 1, 0, 0, 0, 2 };
tsk_size_t num_sites = ts.tables->sites.num_rows;
tsk_id_t *sites = tsk_malloc(num_sites * sizeof(*sites));
// First sample set contains all of the samples
sample_set_sizes[0] = ts.num_samples;
num_sample_sets = 1;
for (s = 0; s < ts.num_samples; s++) {
sample_sets[s] = (tsk_id_t) s;
}
for (s = 0; s < num_sites; s++) {
sites[s] = (tsk_id_t) s;
}
result_size = num_sites * num_sites;
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_one_set);
// Second sample set contains all of the samples
sample_set_sizes[1] = ts.num_samples;
num_sample_sets = 2;
for (s = ts.num_samples; s < ts.num_samples * 2; s++) {
sample_sets[s] = (tsk_id_t) s - (tsk_id_t) ts.num_samples;
}
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_sample_sets, result, truth_two_sets);
// Third sample set contains the first two samples
sample_set_sizes[2] = 2;
num_sample_sets = 3;
for (s = ts.num_samples * 2; s < (ts.num_samples * 3) - 2; s++) {
sample_sets[s] = (tsk_id_t) s - (tsk_id_t) ts.num_samples * 2;
}
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(
result_size * num_sample_sets, result, truth_three_sets);
// Two-way stats: we'll reuse all sample sets from the first 3 tests
num_sample_sets = 3;
num_index_tuples = 1;
// We'll compute r2 between sample set 0 and sample set 1
tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_sites, sites, NULL, num_sites, sites, NULL,
0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_one_set);
// Compare sample sets [(0, 1), (0, 0)]
num_index_tuples = 2;
tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_sites, sites, NULL, num_sites, sites, NULL,
0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_two_sets);
// Compare sample sets [(0, 1), (0, 0), (0, 2)]
num_index_tuples = 3;
tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_sites, sites, NULL, num_sites, sites, NULL,
0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(
result_size * num_index_tuples, result, truth_three_index_tuples);
tsk_treeseq_free(&ts);
tsk_safe_free(sites);
}
static void
test_paper_ex_two_branch(void)
{
int ret;
tsk_treeseq_t ts;
double result[27];
tsk_size_t i, result_size, num_sample_sets;
tsk_flags_t options = 0;
double truth_one_set[9] = { 0.008890640625, 0.004624203125, 0.005215703125,
0.004624203125, 0.003737578125, 0.004377078125, 0.005215703125,
0.004377078124999999, 0.005160578124999998 };
double truth_two_sets[18] = { 0.008890640625, 0.008890640625, 0.004624203125,
0.004624203125, 0.005215703125, 0.005215703125, 0.004624203125, 0.004624203125,
0.003737578125, 0.003737578125, 0.004377078125, 0.004377078125, 0.005215703125,
0.005215703125, 0.004377078124999999, 0.004377078124999999, 0.005160578124999998,
0.005160578124999998 };
double truth_three_sets[27]
= { 0.008890640625, 0.008890640625, 0.007225, 0.004624203125000001,
0.004624203125, 0.007225, 0.005215703125000002, 0.005215703125, 0.008585,
0.004624203125, 0.004624203125, 0.007225, 0.003737578125, 0.003737578125,
0.007225, 0.004377078125, 0.004377078125, 0.008585, 0.005215703125,
0.005215703125, 0.008585, 0.004377078124999999, 0.004377078124999999,
0.008585, 0.005160578124999998, 0.005160578124999998, 0.010201 };
double truth_positions_subset_1[12] = { 0.008890640625, 0.008890640625, 0.007225,
0.008890640625, 0.008890640625, 0.007225, 0.008890640625, 0.008890640625,
0.007225, 0.008890640625, 0.008890640625, 0.007225 };
double truth_positions_subset_2[12] = { 0.003737578125, 0.003737578125, 0.007225,
0.003737578125, 0.003737578125, 0.007225, 0.003737578125, 0.003737578125,
0.007225, 0.003737578125, 0.003737578125, 0.007225 };
double truth_positions_subset_3[12] = { 0.005160578125, 0.005160578125, 0.010201,
0.005160578125, 0.005160578125, 0.010201, 0.005160578125, 0.005160578125,
0.010201, 0.005160578125, 0.005160578125, 0.010201 };
double truth_three_index_tuples[27] = { 0.008890640625, 0.008890640625, 0.0039125,
0.004624203125, 0.004624203125, 0.0038125, 0.005215703125, 0.005215703125,
0.0045725, 0.004624203125, 0.004624203125, 0.0038125, 0.003737578125,
0.003737578125, 0.0040125, 0.004377078125, 0.004377078125, 0.0048525,
0.005215703125, 0.005215703125, 0.0045725, 0.004377078125, 0.004377078125,
0.0048525, 0.005160578125, 0.005160578125, 0.0058845 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
tsk_size_t sample_set_sizes[3], num_index_tuples;
tsk_id_t sample_sets[ts.num_samples * 3], index_tuples[2 * 3] = { 0, 1, 0, 0, 0, 2 };
tsk_size_t num_trees = ts.num_trees;
double *positions = tsk_malloc(num_trees * sizeof(*positions));
double positions_subset_1[2] = { 0., 0.1 };
double positions_subset_2[2] = { 2., 6. };
double positions_subset_3[2] = { 9., 9.999 };
// First sample set contains all of the samples
sample_set_sizes[0] = ts.num_samples;
num_sample_sets = 1;
for (i = 0; i < ts.num_samples; i++) {
sample_sets[i] = (tsk_id_t) i;
}
for (i = 0; i < num_trees; i++) {
positions[i] = ts.breakpoints[i];
}
options |= TSK_STAT_BRANCH;
result_size = num_trees * num_trees * num_sample_sets;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_trees,
NULL, positions, num_trees, NULL, positions, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_one_set);
// Second sample set contains all of the samples
sample_set_sizes[1] = ts.num_samples;
num_sample_sets = 2;
for (i = ts.num_samples; i < ts.num_samples * 2; i++) {
sample_sets[i] = (tsk_id_t) i - (tsk_id_t) ts.num_samples;
}
result_size = num_trees * num_trees * num_sample_sets;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_trees,
NULL, positions, num_trees, NULL, positions, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_two_sets);
// Third sample set contains the first two samples
sample_set_sizes[2] = 2;
num_sample_sets = 3;
for (i = ts.num_samples * 2; i < (ts.num_samples * 3) - 2; i++) {
sample_sets[i] = (tsk_id_t) i - (tsk_id_t) ts.num_samples * 2;
}
result_size = num_trees * num_trees * num_sample_sets;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_trees,
NULL, positions, num_trees, NULL, positions, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(result_size, result, truth_three_sets);
result_size = 4 * num_sample_sets;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2, NULL,
positions_subset_1, 2, NULL, positions_subset_1, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(result_size, result, truth_positions_subset_1);
result_size = 4 * num_sample_sets;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2, NULL,
positions_subset_2, 2, NULL, positions_subset_2, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(result_size, result, truth_positions_subset_2);
result_size = 4 * num_sample_sets;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2, NULL,
positions_subset_3, 2, NULL, positions_subset_3, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(result_size, result, truth_positions_subset_3);
// Two-way stats: we'll reuse all sample sets from the first 3 tests
num_sample_sets = 3;
result_size = num_trees * num_trees;
num_index_tuples = 1;
// We'll compute D2 between sample set 0 and sample set 1
tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_trees, NULL, positions, num_trees, NULL,
positions, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_one_set);
// Compare sample sets [(0, 1), (0, 0)]
num_index_tuples = 2;
tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_trees, NULL, positions, num_trees, NULL,
positions, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_index_tuples, result, truth_two_sets);
// Compare sample sets [(0, 1), (0, 0), (0, 2)]
num_index_tuples = 3;
tsk_memset(result, 0, sizeof(*result) * result_size * num_index_tuples);
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_trees, NULL, positions, num_trees, NULL,
positions, options, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal_nan(
result_size * num_index_tuples, result, truth_three_index_tuples);
tsk_treeseq_free(&ts);
tsk_safe_free(positions);
}
static void
test_two_site_correlated_multiallelic(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"0 2 -1\n"
"0 4 -1\n"
"0 6 -1\n"
"0 8 -1\n"
"0 10 -1\n"
"0 12 -1\n"
"0 14 -1\n"
"0 16 -1\n";
const char *edges = "0 20 9 0,1\n"
"0 20 10 2,9\n"
"0 20 11 4,5\n"
"0 20 12 6,11\n"
"0 20 13 7,8\n"
"0 20 14 3,10\n"
"0 10 15 12\n"
"10 20 15 13\n"
"0 10 15 14\n"
"10 20 15 14\n"
"10 20 16 12\n"
"0 10 16 13\n"
"0 10 16 15\n"
"10 20 16 15\n";
const char *tree_sites = "7 A\n"
"13 G\n";
const char *mutations = "0 15 T -1\n"
"0 14 G 0\n"
"1 15 T -1\n"
"1 13 C 2\n";
int ret;
tsk_treeseq_t ts;
tsk_size_t s, result_size;
double truth_D[4] = { 0.043209876543209874, -0.018518518518518517,
-0.018518518518518517, 0.05555555555555555 };
double truth_D2[4] = { 0.023844603634269844, 0.02384460363426984,
0.02384460363426984, 0.02384460363426984 };
double truth_r2[4] = { 1, 1, 1, 1 };
double truth_D_prime[4] = { 0, -0.5, -0.5, 0 };
double truth_r[4] = { 0.18377223398316206, -0.12212786219416509,
-0.12212786219416509, 0.2609542781331212 };
double truth_Dz[4] = { 0.0033870175616860566, 0.003387017561686057,
0.003387017561686057, 0.003387017561686057 };
double truth_pi2[4] = { 0.04579247743399549, 0.04579247743399549,
0.04579247743399549, 0.0457924774339955 };
double truth_D2_unbiased[4] = { 0.026455026455026454, 0.026455026455026454,
0.026455026455026454, 0.026455026455026454 };
double truth_Dz_unbiased[4] = { -0.008818342151675485, -0.008818342151675485,
-0.008818342151675485, -0.008818342151675485 };
double truth_pi2_unbiased[4] = { 0.0582010582010582, 0.0582010582010582,
0.0582010582010582, 0.0582010582010582 };
double truth_D2_unbiased_disjoint[4] = { 0.007407407407407407, 0.007407407407407407,
0.007407407407407407, 0.007407407407407407 };
tsk_treeseq_from_text(
&ts, 20, nodes, edges, NULL, tree_sites, mutations, NULL, NULL, 0);
tsk_size_t num_sample_sets = 1;
tsk_size_t sample_set_sizes[2] = { ts.num_samples, ts.num_samples };
tsk_id_t sample_sets[ts.num_samples * 2];
tsk_size_t num_sites = ts.tables->sites.num_rows;
tsk_id_t *sites = tsk_malloc(num_sites * sizeof(*sites));
result_size = num_sites * num_sites;
double result[result_size];
// Two sample sets for multipop at the bottom, only presenting one to single pop
// results
for (s = 0; s < ts.num_samples; s++) {
sample_sets[s] = (tsk_id_t) s;
sample_sets[s + ts.num_samples] = (tsk_id_t) s;
}
for (s = 0; s < num_sites; s++) {
sites[s] = (tsk_id_t) s;
}
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D_prime(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D_prime);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_Dz(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_Dz);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_pi2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_pi2);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_Dz_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_Dz_unbiased);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_pi2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_pi2_unbiased);
// We'll compute r2 between sample set 0 and sample set 1
num_sample_sets = 2;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,
(tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,
(tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2);
// perfectly overlapping sample sets will produce a result equal to the single
// population case
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
1, (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);
// two disjoint sample sets with 5 and 4 samples {0,1,2,3,4}{5,6,7,8}
sample_set_sizes[0] = 5;
sample_set_sizes[1] = 4;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
1, (tsk_id_t[2]) { 0, 1 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased_disjoint);
tsk_treeseq_free(&ts);
tsk_safe_free(sites);
}
static void
test_two_site_uncorrelated_multiallelic(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"0 2 -1\n"
"0 4 -1\n"
"0 6 -1\n"
"0 8 -1\n"
"0 10 -1\n"
"0 12 -1\n"
"0 14 -1\n"
"0 16 -1\n"
"0 2 -1\n"
"0 4 -1\n"
"0 6 -1\n"
"0 8 -1\n"
"0 10 -1\n"
"0 12 -1\n"
"0 14 -1\n"
"0 16 -1\n";
const char *edges = "0 10 9 0,1\n"
"10 20 17 0,3\n"
"0 10 10 2,9\n"
"10 20 18 6,17\n"
"0 10 11 3,4\n"
"10 20 19 1,4\n"
"0 10 12 5,11\n"
"10 20 20 7,19\n"
"0 10 13 6,7\n"
"10 20 21 2,5\n"
"0 10 14 8,13\n"
"10 20 22 8,21\n"
"0 10 15 10,12\n"
"10 20 23 18,20\n"
"0 10 16 14,15\n"
"10 20 24 22,23\n";
const char *tree_sites = "7 A\n"
"13 G\n";
const char *mutations = "0 15 T -1\n"
"0 12 G 0\n"
"1 23 T -1\n"
"1 20 A 2\n";
tsk_treeseq_t ts;
int ret;
double truth_D[4] = { 0.05555555555555555, 0.0, 0.0, 0.05555555555555555 };
double truth_D2[4] = { 0.024691358024691357, 0.0, 0.0, 0.024691358024691357 };
double truth_r2[4] = { 1, 0, 0, 1 };
double truth_D_prime[4] = { 0.0, 0.0, 0.0, 0.0 };
double truth_r[4] = { 0.25, 0.0, 0.0, 0.25 };
double truth_Dz[4] = { 0.0, 0.0, 0.0, 0.0 };
double truth_pi2[4] = { 0.04938271604938272, 0.04938271604938272,
0.04938271604938272, 0.04938271604938272 };
double truth_D2_unbiased[4] = { 0.027777777777777776, -0.009259259259259259,
-0.009259259259259259, 0.027777777777777776 };
double truth_Dz_unbiased[4] = { -0.015873015873015872, 0.005291005291005289,
0.005291005291005289, -0.015873015873015872 };
double truth_pi2_unbiased[4] = { 0.06349206349206349, 0.06216931216931215,
0.06216931216931215, 0.06349206349206349 };
double truth_D2_unbiased_disjoint[4] = { 0.008333333333333333,
-0.0027777777777777775, -0.0027777777777777775, 0.03518518518518518 };
tsk_treeseq_from_text(
&ts, 20, nodes, edges, NULL, tree_sites, mutations, NULL, NULL, 0);
tsk_size_t s;
tsk_size_t num_sample_sets = 1;
tsk_size_t num_sites = ts.tables->sites.num_rows;
tsk_id_t *sites = tsk_malloc(num_sites * sizeof(*sites));
tsk_size_t sample_set_sizes[2] = { ts.num_samples, ts.num_samples };
tsk_id_t sample_sets[ts.num_samples * 2];
tsk_size_t result_size = num_sites * num_sites;
double result[result_size];
// Two sample sets for multipop at the bottom, only presenting one to single pop
// results
for (s = 0; s < ts.num_samples; s++) {
sample_sets[s] = (tsk_id_t) s;
sample_sets[s + ts.num_samples] = (tsk_id_t) s;
}
for (s = 0; s < num_sites; s++) {
sites[s] = (tsk_id_t) s;
}
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D_prime(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D_prime);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_Dz(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_Dz);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_pi2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_pi2);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_D2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_Dz_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_Dz_unbiased);
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_pi2_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_sites, sites, NULL, num_sites, sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_pi2_unbiased);
// We'll compute r2 between sample set 0 and sample set 1
num_sample_sets = 2;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,
(tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,
(tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2);
// perfectly overlapping sample sets will produce a result equal to the single
// population case
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
1, (tsk_id_t[2]) { 0, 0 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);
// two disjoint sample sets with 5 and 4 samples {0,1,2,3,4}{5,6,7,8}
sample_set_sizes[0] = 5;
sample_set_sizes[1] = 4;
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
1, (tsk_id_t[2]) { 0, 1 }, num_sites, sites, NULL, num_sites, sites, NULL, 0,
result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased_disjoint);
tsk_treeseq_free(&ts);
tsk_safe_free(sites);
}
static void
test_two_site_backmutation(void)
{
const char *nodes
= "1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n"
"1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n"
"1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n"
"1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n"
"1 0 -1\n1 0 -1\n1 0 -1\n0 2 -1\n0 4 -1\n0 6 -1\n0 8 -1\n0 10 -1\n"
"0 12 -1\n0 14 -1\n0 16 -1\n0 18 -1\n0 20 -1\n0 22 -1\n0 24 -1\n0 26 -1\n"
"0 28 -1\n0 30 -1\n0 32 -1\n0 34 -1\n0 36 -1\n0 38 -1\n0 40 -1\n0 42 -1\n"
"0 44 -1\n0 46 -1\n0 48 -1\n0 50 -1\n0 52 -1\n0 54 -1\n0 56 -1\n0 58 -1\n"
"0 60 -1\n0 62 -1\n0 64 -1\n0 66 -1\n0 68 -1\n";
const char *edges
= "0 10 35 0,1\n0 10 36 2,35\n0 10 37 3,36\n0 10 38 4,37\n0 10 39 5,38\n"
"0 10 40 6,39\n0 10 41 7,40\n0 10 42 8,41\n0 10 43 9,42\n0 10 44 10,43\n"
"0 10 45 11,44\n0 10 46 12,45\n0 10 47 13,46\n0 10 48 14,47\n0 10 49 15,48\n"
"0 10 50 16,49\n0 10 51 17,50\n0 10 52 18,51\n0 10 53 19,52\n0 10 54 20,53\n"
"0 10 55 21,54\n0 10 56 22,55\n0 10 57 23,56\n0 10 58 24,57\n0 10 59 25,58\n"
"0 10 60 26,59\n0 10 61 27,60\n0 10 62 28,61\n0 10 63 29,62\n0 10 64 30,63\n"
"0 10 65 31,64\n0 10 66 32,65\n0 10 67 33,66\n0 10 68 34,67\n";
const char *sites = "1 A\n"
"4.5 T\n";
const char *mutations = "0 50 T -1\n"
"0 48 G 0\n"
"0 46 A 1\n"
"1 62 G -1\n"
"1 60 T 3\n"
"1 58 A 4\n";
int ret;
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
tsk_size_t num_sample_sets = 1;
tsk_size_t num_sites = ts.tables->sites.num_rows;
tsk_id_t *row_sites = tsk_malloc(num_sites * sizeof(*row_sites));
tsk_id_t *col_sites = tsk_malloc(num_sites * sizeof(*col_sites));
tsk_size_t sample_set_sizes[1] = { ts.num_samples };
tsk_id_t sample_sets[ts.num_samples];
tsk_size_t result_size = num_sites * num_sites;
double result[result_size];
tsk_size_t s;
double truth_r2[4] = { 0.999999999999999, 0.042923862278701, 0.042923862278701, 1. };
for (s = 0; s < ts.num_samples; s++) {
sample_sets[s] = (tsk_id_t) s;
}
for (s = 0; s < num_sites; s++) {
row_sites[s] = (tsk_id_t) s;
col_sites[s] = (tsk_id_t) s;
}
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_treeseq_free(&ts);
tsk_safe_free(row_sites);
tsk_safe_free(col_sites);
}
static void
test_two_locus_branch_all_stats(void)
{
int ret;
tsk_treeseq_t ts;
double result[16];
tsk_size_t result_size = 16;
tsk_id_t sample_sets[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
tsk_size_t sample_set_sizes[1] = { 10 };
double positions[4] = { 0.0, 2.0, 5.0, 6.0 };
const char *nodes
= "1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n1 0 -1\n"
"1 0 -1\n1 0 -1\n0 0.02 -1\n0 0.06 -1\n0 0.08 -1\n0 0.09 -1\n0 0.21 -1\n"
"0 0.35 -1\n0 0.44 -1\n0 0.69 -1\n0 0.79 -1\n0 0.80 -1\n0 0.84 -1\n"
"0 1.26 -1\n";
const char *edges
= "0 10 10 0,8\n0 10 11 4,7\n0 10 12 3,9\n0 10 13 6,11\n0 10 14 1,2\n"
"5 10 15 5,10\n0 5 16 5,10\n6 10 17 12,14\n2 6 18 14\n5 10 18 15\n"
"2 5 18 16\n6 10 18 17\n0 6 19 12\n0 2 19 14\n2 6 19 18\n"
"0 2 20 13\n0 2 20 16\n2 10 21 13\n6 10 21 18\n0 6 21 19\n"
"0 2 21 20\n";
double truth_D[16] = { 0 };
double truth_D2[16] = { 0.21949755999999998, 0.1867003599999999, 0.18798699999999988,
0.18941379999999983, 0.18670035999999995, 0.21159555999999993,
0.21257979999999996, 0.21222580000000005, 0.187987, 0.21257979999999996,
0.21380379999999996, 0.2134714, 0.18941379999999994, 0.21222579999999996,
0.21347139999999992, 0.21377299999999996 };
double truth_r2[16] = { 6.286870108969513, 5.742220038107836, 5.7080225607835695,
5.623290389581752, 5.742220038107832, 6.3274209876543175, 6.291288603867465,
6.195658345930953, 5.708022560783573, 6.291288603867472, 6.266256220080618,
6.170677280171318, 5.623290389581758, 6.195658345930966, 6.170677280171324,
6.094109054547737 };
double truth_D_prime[16] = { -9.6552, -9.44459999999999, -9.136799999999988,
-8.680999999999989, -9.444599999999998, -9.240699999999984, -8.937399999999977,
-8.488499999999984, -9.136799999999996, -8.93739999999999, -8.658399999999984,
-8.219399999999993, -8.68099999999999, -8.488499999999991, -8.21939999999999,
-7.814699999999995 };
double truth_r[16] = { 0.023193673439522472, 0.023272634599981495,
0.021243465874728862, 0.01919099466703808, 0.023272634599981454,
0.023358527073393587, 0.021370047752011, 0.019268461077492888,
0.021243465874728862, 0.021370047752011012, 0.020359977803327087,
0.01793842604857987, 0.019190994667037817, 0.019268461077492804,
0.017938426048579773, 0.0160605735196305 };
double truth_Dz[16] = { 0.01958895999999996, -0.007941440000000037,
-0.007572800000000046, -0.010558400000000029, -0.007941440000000022,
0.01385535999999997, 0.014569599999999966, 0.015529599999999963,
-0.007572800000000024, 0.01456959999999996, 0.015426399999999951,
0.016271199999999948, -0.010558400000000011, 0.01552959999999999,
0.016271199999999986, 0.017607999999999985 };
double truth_pi2[16] = { 0.7201219600000001, 0.6895723600000001, 0.6865174000000006,
0.6780314000000008, 0.6895723600000002, 0.6603187600000002, 0.6573934000000002,
0.6492674000000002, 0.6865174000000002, 0.6573934000000003, 0.6544810000000003,
0.6463910000000003, 0.6780314000000002, 0.6492674000000004, 0.6463910000000005,
0.6384010000000007 };
double truth_Dz_unbiased[16] = { -0.06387380952380949, -0.09312571428571428,
-0.09361428571428566, -0.10075682539682536, -0.09312571428571428,
-0.0734419047619048, -0.0730733333333334, -0.07171301587301597,
-0.0936142857142857, -0.07307333333333343, -0.07261476190476202,
-0.07147730158730167, -0.10075682539682543, -0.07171301587301596,
-0.07147730158730159, -0.06988666666666674 };
double truth_D2_unbiased[16] = { 0.19576484126984134, 0.1586769841269842,
0.16093412698412704, 0.16485253968253985, 0.15867698412698414,
0.1949926984126984, 0.19673555555555555, 0.19734825396825403,
0.16093412698412699, 0.1967355555555555, 0.19879341269841264,
0.19945182539682532, 0.16485253968253968, 0.19734825396825395,
0.1994518253968253, 0.20091222222222213 };
double truth_pi2_unbiased[16] = { 0.8910765079365083, 0.8571103174603181,
0.853337460317461, 0.8434880952380959, 0.8571103174603178, 0.8182193650793657,
0.8145322222222225, 0.8043504761904768, 0.8533374603174609, 0.8145322222222225,
0.8108450793650795, 0.800729047619048, 0.8434880952380955, 0.8043504761904766,
0.8007290476190477, 0.7906733333333332 };
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,
NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,
NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_r2(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,
NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r2);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D_prime(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions,
4, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D_prime);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_r(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,
NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_r);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_Dz(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,
NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_Dz);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_pi2(&ts, 1, sample_set_sizes, sample_sets, 4, NULL, positions, 4,
NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_pi2);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_Dz_unbiased(&ts, 1, sample_set_sizes, sample_sets, 4, NULL,
positions, 4, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_Dz_unbiased);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_D2_unbiased(&ts, 1, sample_set_sizes, sample_sets, 4, NULL,
positions, 4, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_D2_unbiased);
tsk_memset(result, 0, sizeof(*result) * result_size);
ret = tsk_treeseq_pi2_unbiased(&ts, 1, sample_set_sizes, sample_sets, 4, NULL,
positions, 4, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size, result, truth_pi2_unbiased);
tsk_treeseq_free(&ts);
}
static void
test_paper_ex_two_site_subset(void)
{
tsk_treeseq_t ts;
double result[4];
int ret;
tsk_size_t s, result_size;
tsk_size_t sample_set_sizes[1];
tsk_size_t num_sample_sets;
tsk_id_t row_sites[2] = { 0, 1 };
tsk_id_t col_sites[2] = { 1, 2 };
double result_truth_1[4] = { 0.1111111111111111, 0.1111111111111111, 1, 1 };
double result_truth_2[1] = { 0.1111111111111111 };
double result_truth_3[4] = { 0.1111111111111111, 1, 0.1111111111111111, 1 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
tsk_id_t sample_sets[ts.num_samples];
sample_set_sizes[0] = ts.num_samples;
num_sample_sets = 1;
for (s = 0; s < ts.num_samples; s++) {
sample_sets[s] = (tsk_id_t) s;
}
result_size = 2 * 2;
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2,
row_sites, NULL, 2, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_sample_sets, result, result_truth_1);
result_size = 1 * 1;
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
col_sites[0] = 2;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 1,
row_sites, NULL, 1, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_sample_sets, result, result_truth_2);
result_size = 2 * 2;
tsk_memset(result, 0, sizeof(*result) * result_size * num_sample_sets);
row_sites[0] = 1;
row_sites[1] = 2;
col_sites[0] = 0;
col_sites[1] = 1;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 2,
row_sites, NULL, 2, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(result_size * num_sample_sets, result, result_truth_3);
tsk_treeseq_free(&ts);
}
static void
test_two_locus_stat_input_errors(void)
{
tsk_treeseq_t ts;
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
tsk_size_t num_sites = ts.tables->sites.num_rows;
tsk_id_t *row_sites = tsk_malloc(num_sites * sizeof(*row_sites));
tsk_id_t *col_sites = tsk_malloc(num_sites * sizeof(*col_sites));
tsk_size_t sample_set_sizes[2] = { ts.num_samples, ts.num_samples };
tsk_size_t num_sample_sets = 1;
tsk_id_t index_tuples[2] = { 0 };
tsk_size_t num_index_tuples = 1;
tsk_id_t sample_sets[ts.num_samples * 2]; // need 2 sample sets for multipop
double positions[10] = { 0., 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 };
double bad_col_positions[2] = { 0., 0. }; // used in 1 test to cover column check
double result[100];
tsk_size_t s;
for (s = 0; s < ts.num_samples; s++) {
sample_sets[s] = (tsk_id_t) s;
sample_sets[s + ts.num_samples] = (tsk_id_t) s;
}
for (s = 0; s < num_sites; s++) {
row_sites[s] = (tsk_id_t) s;
col_sites[s] = (tsk_id_t) s;
}
// begin with the happy path
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_two_locus_count_stat(&ts, num_sample_sets, sample_set_sizes,
sample_sets, 0, NULL, NULL, NULL, num_sites, row_sites, NULL, num_sites,
col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_RESULT_DIMS);
ret = tsk_treeseq_r2(&ts, 1, sample_set_sizes, sample_sets, num_sites, row_sites,
NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sample_sets[1] = 0;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
sample_sets[1] = 1;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH,
result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
ret = tsk_treeseq_r2(&ts, 0, sample_set_sizes, sample_sets, num_sites, row_sites,
NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
sample_set_sizes[0] = 0;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EMPTY_SAMPLE_SET);
sample_set_sizes[0] = ts.num_samples;
sample_sets[1] = 10;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
sample_sets[1] = 1;
row_sites[0] = 1000;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
row_sites[0] = 0;
col_sites[num_sites - 1] = (tsk_id_t) num_sites;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
col_sites[num_sites - 1] = (tsk_id_t) num_sites - 1;
row_sites[0] = 1;
row_sites[1] = 0;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_UNSORTED_SITES);
row_sites[0] = 0;
row_sites[1] = 1;
row_sites[0] = 1;
row_sites[1] = 1;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, num_sites,
row_sites, NULL, num_sites, col_sites, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_DUPLICATE_SITES);
row_sites[0] = 0;
row_sites[1] = 1;
// Not an error condition, but we want to record this behavior. The method is robust
// to zero-length site/position inputs.
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL,
NULL, 0, NULL, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 0, NULL,
NULL, 0, NULL, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
positions[9] = 1;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 10, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POSITION_OUT_OF_BOUNDS);
positions[9] = 0.9;
positions[0] = -0.1;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 10, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POSITION_OUT_OF_BOUNDS);
positions[0] = 0;
positions[0] = 0.1;
positions[1] = 0;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 10, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_UNSORTED_POSITIONS);
positions[0] = 0;
positions[1] = 0.1;
// rows always fail first, check columns
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 2, NULL, bad_col_positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_DUPLICATE_POSITIONS);
positions[0] = 0;
positions[1] = 0;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 10, NULL, positions, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_DUPLICATE_POSITIONS);
positions[0] = 0;
positions[1] = 0.1;
ret = tsk_treeseq_r2(&ts, num_sample_sets, sample_set_sizes, sample_sets, 10, NULL,
positions, 10, NULL, positions, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
num_sample_sets = 2;
num_index_tuples = 0;
ret = tsk_treeseq_r2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,
NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
num_sample_sets = 0;
num_index_tuples = 1;
ret = tsk_treeseq_D2_ij(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,
NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
num_sample_sets = 2;
index_tuples[0] = 2;
ret = tsk_treeseq_D2_ij_unbiased(&ts, num_sample_sets, sample_set_sizes, sample_sets,
num_index_tuples, index_tuples, num_sites, row_sites, NULL, num_sites, col_sites,
NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLE_SET_INDEX);
tsk_treeseq_free(&ts);
tsk_safe_free(row_sites);
tsk_safe_free(col_sites);
}
static void
test_simplest_divergence_matrix(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0,1\n";
const char *sites = "0.1 A\n"
"0.6 A\n";
const char *mutations = "0 0 B -1\n"
"1 0 B -1\n";
tsk_treeseq_t ts;
tsk_id_t sample_ids[] = { 0, 1 };
double D_branch[4] = { 0, 2, 2, 0 };
double D_site[4] = { 0, 2, 2, 0 };
double result[4];
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_branch, result);
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL,
TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_branch, result);
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SPAN_NORMALISE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_branch, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_NODE, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSUPPORTED_STAT_MODE);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_POLARISED, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_STAT_POLARISED_UNSUPPORTED);
ret = tsk_treeseq_divergence_matrix(
&ts, 0, NULL, NULL, 0, NULL, TSK_STAT_SITE | TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_STAT_MODES);
sample_ids[0] = -1;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
sample_ids[0] = 3;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
sample_ids[0] = 1;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
sample_ids[0] = 2;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, NULL, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SAMPLES);
tsk_treeseq_free(&ts);
}
static void
test_simplest_divergence_matrix_windows(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0,1\n";
const char *sites = "0.1 A\n"
"0.6 A\n";
const char *mutations = "0 0 B -1\n"
"1 0 B -1\n";
tsk_treeseq_t ts;
tsk_id_t sample_ids[] = { 0, 1 };
double D_branch[8] = { 0, 1, 1, 0, 0, 1, 1, 0 };
double D_site[8] = { 0, 1, 1, 0, 0, 1, 1, 0 };
double result[8];
double windows[] = { 0, 0.5, 1 };
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(8, D_site, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 2, windows, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(8, D_branch, result);
/* Windows for the second half */
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_site, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 2, NULL, sample_ids, 1, windows + 1, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(4, D_branch, result);
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 0, windows, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NUM_WINDOWS);
windows[0] = -1;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 0.45;
windows[2] = 1.5;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
windows[0] = 0.55;
windows[2] = 1.0;
ret = tsk_treeseq_divergence_matrix(&ts, 2, NULL, sample_ids, 2, windows, 0, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_WINDOWS);
tsk_treeseq_free(&ts);
}
static void
test_simplest_divergence_matrix_internal_sample(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 1 0\n";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_id_t sample_ids[] = { 0, 1, 2 };
double result[9];
double D_branch[9] = { 0, 2, 1, 2, 0, 1, 1, 1, 0 };
double D_site[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_divergence_matrix(
&ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(9, D_branch, result);
ret = tsk_treeseq_divergence_matrix(
&ts, 3, NULL, sample_ids, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_arrays_almost_equal(9, D_site, result);
tsk_treeseq_free(&ts);
}
static void
test_multiroot_divergence_matrix(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL,
multiroot_ex_sites, multiroot_ex_mutations, NULL, NULL, 0);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH);
verify_divergence_matrix(&ts, TSK_STAT_BRANCH | TSK_STAT_SPAN_NORMALISE);
verify_divergence_matrix(&ts, TSK_STAT_SITE);
verify_divergence_matrix(&ts, TSK_STAT_SITE | TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
}
static void
test_pair_coalescence_counts(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_pair_coalescence_counts(&ts, 0);
verify_pair_coalescence_counts(&ts, TSK_STAT_SPAN_NORMALISE);
verify_pair_coalescence_counts(&ts, TSK_STAT_PAIR_NORMALISE);
verify_pair_coalescence_counts(
&ts, TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE);
tsk_treeseq_free(&ts);
}
static void
test_pair_coalescence_counts_missing(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(
&ts, 5, missing_ex_nodes, missing_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_pair_coalescence_counts(&ts, 0);
verify_pair_coalescence_counts(&ts, TSK_STAT_SPAN_NORMALISE);
tsk_treeseq_free(&ts);
}
static void
test_pair_coalescence_quantiles(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_pair_coalescence_quantiles(&ts);
tsk_treeseq_free(&ts);
}
static void
test_pair_coalescence_rates(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_pair_coalescence_rates(&ts);
tsk_treeseq_free(&ts);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_general_stat_input_errors", test_general_stat_input_errors },
{ "test_empty_ts_ld", test_empty_ts_ld },
{ "test_empty_ts_mean_descendants", test_empty_ts_mean_descendants },
{ "test_empty_ts_genealogical_nearest_neighbours",
test_empty_ts_genealogical_nearest_neighbours },
{ "test_empty_ts_general_stat", test_empty_ts_general_stat },
{ "test_empty_ts_afs", test_empty_ts_afs },
{ "test_single_tree_ld", test_single_tree_ld },
{ "test_single_tree_mean_descendants", test_single_tree_mean_descendants },
{ "test_single_tree_genealogical_nearest_neighbours",
test_single_tree_genealogical_nearest_neighbours },
{ "test_single_tree_general_stat", test_single_tree_general_stat },
{ "test_single_tree_general_stat_errors", test_single_tree_general_stat_errors },
{ "test_single_tree_divergence_matrix", test_single_tree_divergence_matrix },
{ "test_single_tree_divergence_matrix_internal_samples",
test_single_tree_divergence_matrix_internal_samples },
{ "test_single_tree_divergence_matrix_multi_root",
test_single_tree_divergence_matrix_multi_root },
{ "test_paper_ex_ld", test_paper_ex_ld },
{ "test_paper_ex_mean_descendants", test_paper_ex_mean_descendants },
{ "test_paper_ex_genealogical_nearest_neighbours",
test_paper_ex_genealogical_nearest_neighbours },
{ "test_paper_ex_general_stat_errors", test_paper_ex_general_stat_errors },
{ "test_paper_ex_general_stat", test_paper_ex_general_stat },
{ "test_paper_ex_trait_covariance_errors",
test_paper_ex_trait_covariance_errors },
{ "test_paper_ex_trait_covariance", test_paper_ex_trait_covariance },
{ "test_paper_ex_trait_correlation_errors",
test_paper_ex_trait_correlation_errors },
{ "test_paper_ex_trait_correlation", test_paper_ex_trait_correlation },
{ "test_paper_ex_trait_linear_model_errors",
test_paper_ex_trait_linear_model_errors },
{ "test_paper_ex_trait_linear_model", test_paper_ex_trait_linear_model },
{ "test_paper_ex_diversity_errors", test_paper_ex_diversity_errors },
{ "test_paper_ex_diversity", test_paper_ex_diversity },
{ "test_paper_ex_segregating_sites_errors",
test_paper_ex_segregating_sites_errors },
{ "test_paper_ex_segregating_sites", test_paper_ex_segregating_sites },
{ "test_paper_ex_Y1_errors", test_paper_ex_Y1_errors },
{ "test_paper_ex_Y1", test_paper_ex_Y1 },
{ "test_paper_ex_divergence_errors", test_paper_ex_divergence_errors },
{ "test_paper_ex_divergence", test_paper_ex_divergence },
{ "test_paper_ex_genetic_relatedness_errors",
test_paper_ex_genetic_relatedness_errors },
{ "test_paper_ex_genetic_relatedness", test_paper_ex_genetic_relatedness },
{ "test_paper_ex_genetic_relatedness_weighted",
test_paper_ex_genetic_relatedness_weighted },
{ "test_paper_ex_genetic_relatedness_weighted_errors",
test_paper_ex_genetic_relatedness_weighted_errors },
{ "test_empty_genetic_relatedness_vector",
test_empty_genetic_relatedness_vector },
{ "test_paper_ex_genetic_relatedness_vector",
test_paper_ex_genetic_relatedness_vector },
{ "test_paper_ex_genetic_relatedness_vector_errors",
test_paper_ex_genetic_relatedness_vector_errors },
{ "test_paper_ex_genetic_relatedness_vector_node_errors",
test_paper_ex_genetic_relatedness_vector_node_errors },
{ "test_paper_ex_Y2_errors", test_paper_ex_Y2_errors },
{ "test_paper_ex_Y2", test_paper_ex_Y2 },
{ "test_paper_ex_f2_errors", test_paper_ex_f2_errors },
{ "test_paper_ex_f2", test_paper_ex_f2 },
{ "test_paper_ex_Y3_errors", test_paper_ex_Y3_errors },
{ "test_paper_ex_Y3", test_paper_ex_Y3 },
{ "test_paper_ex_f3_errors", test_paper_ex_f3_errors },
{ "test_paper_ex_f3", test_paper_ex_f3 },
{ "test_paper_ex_f4_errors", test_paper_ex_f4_errors },
{ "test_paper_ex_f4", test_paper_ex_f4 },
{ "test_paper_ex_afs_errors", test_paper_ex_afs_errors },
{ "test_paper_ex_afs", test_paper_ex_afs },
{ "test_paper_ex_divergence_matrix", test_paper_ex_divergence_matrix },
{ "test_unary_ex_afs", test_unary_ex_afs },
{ "test_nonbinary_ex_ld", test_nonbinary_ex_ld },
{ "test_nonbinary_ex_mean_descendants", test_nonbinary_ex_mean_descendants },
{ "test_nonbinary_ex_genealogical_nearest_neighbours",
test_nonbinary_ex_genealogical_nearest_neighbours },
{ "test_nonbinary_ex_general_stat", test_nonbinary_ex_general_stat },
{ "test_nonbinary_ex_general_stat_errors",
test_nonbinary_ex_general_stat_errors },
{ "test_caterpillar_tree_ld", test_caterpillar_tree_ld },
{ "test_ld_multi_mutations", test_ld_multi_mutations },
{ "test_ld_silent_mutations", test_ld_silent_mutations },
{ "test_paper_ex_two_site", test_paper_ex_two_site },
{ "test_paper_ex_two_branch", test_paper_ex_two_branch },
{ "test_two_site_correlated_multiallelic",
test_two_site_correlated_multiallelic },
{ "test_two_site_uncorrelated_multiallelic",
test_two_site_uncorrelated_multiallelic },
{ "test_two_site_backmutation", test_two_site_backmutation },
{ "test_two_locus_site_all_stats", test_two_locus_branch_all_stats },
{ "test_paper_ex_two_site_subset", test_paper_ex_two_site_subset },
{ "test_two_locus_stat_input_errors", test_two_locus_stat_input_errors },
{ "test_simplest_divergence_matrix", test_simplest_divergence_matrix },
{ "test_simplest_divergence_matrix_windows",
test_simplest_divergence_matrix_windows },
{ "test_simplest_divergence_matrix_internal_sample",
test_simplest_divergence_matrix_internal_sample },
{ "test_multiroot_divergence_matrix", test_multiroot_divergence_matrix },
{ "test_pair_coalescence_counts", test_pair_coalescence_counts },
{ "test_pair_coalescence_counts_missing", test_pair_coalescence_counts_missing },
{ "test_pair_coalescence_quantiles", test_pair_coalescence_quantiles },
{ "test_pair_coalescence_rates", test_pair_coalescence_rates },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_tables.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2023 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include "tskit/core.h"
#include
#include
#include
#include
static void
reverse_migrations(tsk_table_collection_t *tables)
{
int ret;
tsk_migration_table_t migrations;
tsk_migration_t migration;
tsk_id_t j, ret_id;
/* Easy way to copy the metadata schema */
ret = tsk_migration_table_copy(&tables->migrations, &migrations, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_clear(&migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = (tsk_id_t) tables->migrations.num_rows - 1; j >= 0; j--) {
ret = tsk_migration_table_get_row(&tables->migrations, j, &migration);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(&migrations, migration.left,
migration.right, migration.node, migration.source, migration.dest,
migration.time, migration.metadata, migration.metadata_length);
CU_ASSERT_FATAL(ret_id >= 0);
}
ret = tsk_migration_table_copy(&migrations, &tables->migrations, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_migration_table_free(&migrations);
}
static void
reverse_edges(tsk_table_collection_t *tables)
{
int ret;
tsk_edge_table_t edges;
tsk_edge_t edge;
tsk_id_t j, ret_id;
/* Easy way to copy the metadata schema */
ret = tsk_edge_table_copy(&tables->edges, &edges, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_clear(&edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = (tsk_id_t) tables->edges.num_rows - 1; j >= 0; j--) {
ret = tsk_edge_table_get_row(&tables->edges, j, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&edges, edge.left, edge.right, edge.parent,
edge.child, edge.metadata, edge.metadata_length);
CU_ASSERT_FATAL(ret_id >= 0);
}
ret = tsk_edge_table_copy(&edges, &tables->edges, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_edge_table_free(&edges);
}
static void
reverse_mutations(tsk_table_collection_t *tables)
{
int ret;
tsk_mutation_table_t mutations;
tsk_mutation_t mutation;
tsk_id_t j, ret_id;
tsk_id_t new_parent;
tsk_id_t n = (tsk_id_t) tables->mutations.num_rows;
ret = tsk_mutation_table_init(&mutations, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = n - 1; j >= 0; j--) {
ret = tsk_mutation_table_get_row(&tables->mutations, j, &mutation);
CU_ASSERT_EQUAL_FATAL(ret, 0);
new_parent = (mutation.parent == TSK_NULL) ? TSK_NULL : n - mutation.parent - 1;
ret_id = tsk_mutation_table_add_row(&mutations, mutation.site, mutation.node,
new_parent, mutation.time, mutation.derived_state,
mutation.derived_state_length, mutation.metadata, mutation.metadata_length);
CU_ASSERT_FATAL(ret_id >= 0);
}
ret = tsk_mutation_table_copy(&mutations, &tables->mutations, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_mutation_table_free(&mutations);
}
static void
insert_edge_metadata(tsk_table_collection_t *tables)
{
int ret;
tsk_edge_table_t edges;
tsk_edge_t edge;
tsk_id_t j, ret_id;
char metadata[100];
ret = tsk_edge_table_init(&edges, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) tables->edges.num_rows; j++) {
ret = tsk_edge_table_get_row(&tables->edges, j, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
snprintf(metadata, sizeof(metadata), "md_%lld\n", (long long) j);
ret_id = tsk_edge_table_add_row(&edges, edge.left, edge.right, edge.parent,
edge.child, metadata, (tsk_size_t) strlen(metadata));
CU_ASSERT_FATAL(ret_id >= 0);
}
ret = tsk_edge_table_copy(&edges, &tables->edges, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_edge_table_free(&edges);
}
static void
test_table_collection_equals_options(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tc1, tc2;
char example_time_units[100] = "An example of time units with unicode ⏰";
char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋";
char example_metadata_schema[100]
= "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_time_units_length = (tsk_size_t) strlen(example_time_units);
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
tsk_size_t example_metadata_schema_length
= (tsk_size_t) strlen(example_metadata_schema);
// Test equality empty tables
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_TRUE(ret);
// Adding some meat to the tables
ret_id = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id
= tsk_individual_table_add_row(&tc1.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tc1.populations, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tc1.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tc1.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tc1.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
// Equality of empty vs non-empty
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_FALSE(ret);
ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);
CU_ASSERT_EQUAL(ret, 0);
// Equivalent except for time_units
ret = tsk_table_collection_set_metadata(
&tc1, example_time_units, example_time_units_length);
CU_ASSERT_EQUAL(ret, 0);
// Equivalent except for metadata
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA);
CU_ASSERT_TRUE(ret);
/* TSK_CMP_IGNORE_METADATA implies TSK_CMP_IGNORE_TS_METADATA */
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_METADATA);
CU_ASSERT_TRUE(ret);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_FALSE(ret);
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE);
CU_ASSERT_FALSE(ret);
ret = tsk_table_collection_set_metadata(
&tc2, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_TRUE(ret);
ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA);
CU_ASSERT_TRUE(ret);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_FALSE(ret);
ret = tsk_table_collection_set_metadata_schema(
&tc2, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_TRUE(ret);
// Ignore provenance
ret_id = tsk_provenance_table_add_row(&tc1.provenances, "time", 4, "record", 6);
CU_ASSERT_EQUAL(ret_id, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE);
CU_ASSERT_TRUE(ret);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_FALSE(ret);
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA);
CU_ASSERT_FALSE(ret);
ret_id = tsk_provenance_table_add_row(&tc2.provenances, "time", 4, "record", 6);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE);
CU_ASSERT_TRUE(ret);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_TRUE(ret);
// Ignore provenance timestamp
ret_id = tsk_provenance_table_add_row(&tc1.provenances, "time", 4, "record", 6);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_provenance_table_add_row(&tc2.provenances, "other", 5, "record", 6);
CU_ASSERT_FATAL(ret_id >= 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_PROVENANCE));
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TIMESTAMPS));
// Ignore provenance and top-level metadata.
ret = tsk_provenance_table_clear(&tc1.provenances);
CU_ASSERT_EQUAL(ret, 0);
example_metadata[0] = 'J';
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_equals(&tc1, &tc2, 0);
CU_ASSERT_FALSE(ret);
ret = tsk_table_collection_equals(
&tc1, &tc2, TSK_CMP_IGNORE_TS_METADATA | TSK_CMP_IGNORE_PROVENANCE);
CU_ASSERT_TRUE(ret);
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
// Check what happens when one of the tables just differs by metadata.
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_population_table_add_row(&tc1.populations, "metadata", 8);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_population_table_add_row(&tc2.populations, "", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_METADATA));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
// Ignore tables
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc2, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Add one row for each table we're ignoring
ret_id
= tsk_individual_table_add_row(&tc1.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tc1.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tc1.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_migration_table_add_row(&tc1.migrations, 0, 0, 0, 0, 0, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tc1.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tc1.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tc1.populations, NULL, 0);
CU_ASSERT(ret_id >= 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_TABLES));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
// Ignore reference sequence
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc2, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_data(&tc1.reference_sequence, "A", 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
CU_ASSERT_TRUE(
tsk_table_collection_equals(&tc1, &tc2, TSK_CMP_IGNORE_REFERENCE_SEQUENCE));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
}
static void
test_table_collection_simplify_errors(void)
{
int ret;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
tsk_id_t ret_id;
const char *individuals = "1 0.25 -2\n";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
/* Bad samples */
samples[0] = -1;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = 10;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = 0;
/* Duplicate samples */
samples[0] = 0;
samples[1] = 0;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
samples[0] = 0;
ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SITE_POSITION);
/* Out of order positions */
tables.sites.position[0] = 0.5;
ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);
/* Position out of bounds */
tables.sites.position[0] = 1.5;
ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);
tsk_site_table_truncate(&tables.sites, 0);
tables.sites.position[0] = 0;
/* Individual out of bounds */
parse_individuals(individuals, &tables.individuals);
CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 1);
ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
/* TODO More tests for this: see
* https://github.com/tskit-dev/msprime/issues/517 */
tsk_table_collection_free(&tables);
}
static void
test_reference_sequence_state_machine(void)
{
tsk_reference_sequence_t r1;
tsk_reference_sequence_init(&r1, 0);
CU_ASSERT_EQUAL(r1.data, NULL);
CU_ASSERT_EQUAL(r1.url, NULL);
CU_ASSERT_EQUAL(r1.metadata, NULL);
CU_ASSERT_EQUAL(r1.metadata_schema, NULL);
CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
/* Setting the value back to NULL makes the reference whole object NULL */
CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, NULL, 0), 0);
CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));
tsk_reference_sequence_free(&r1);
CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));
/* Any empty string is the same thing. */
tsk_reference_sequence_init(&r1, 0);
CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, "", 0), 0);
CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_init(&r1, 0);
CU_ASSERT_EQUAL(tsk_reference_sequence_set_url(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_init(&r1, 0);
CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_init(&r1, 0);
CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata_schema(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_init(&r1, 0);
CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata_schema(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_url(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, "x", 1), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata(&r1, "", 0), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_metadata_schema(&r1, "", 0), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_url(&r1, "", 0), 0);
CU_ASSERT_FALSE(tsk_reference_sequence_is_null(&r1));
CU_ASSERT_EQUAL(tsk_reference_sequence_set_data(&r1, "", 0), 0);
CU_ASSERT_TRUE(tsk_reference_sequence_is_null(&r1));
tsk_reference_sequence_free(&r1);
}
static void
test_reference_sequence_take(void)
{
int ret;
tsk_reference_sequence_t r1;
tsk_reference_sequence_t r2;
const char *const_data = "data";
const char *const_metadata = "metadata";
char *takeset_data = strdup(const_data);
char *takeset_metadata = strdup(const_metadata);
ret = tsk_reference_sequence_init(&r1, 0);
ret = tsk_reference_sequence_set_data(&r1, const_data, strlen(const_data));
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_set_metadata(
&r1, const_metadata, strlen(const_metadata));
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_init(&r2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_takeset_data(&r2, takeset_data, strlen(takeset_data));
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_takeset_metadata(
&r2, takeset_metadata, strlen(takeset_metadata));
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
/* Writing over these with copies doesn't lose memory */
ret = tsk_reference_sequence_set_data(&r2, const_data, strlen(const_data));
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_set_metadata(
&r2, const_metadata, strlen(const_metadata));
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
/* The original copies are gone, make some new ones */
takeset_data = strdup(const_data);
takeset_metadata = strdup(const_metadata);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_takeset_data(&r1, takeset_data, strlen(takeset_data));
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_takeset_metadata(
&r1, takeset_metadata, strlen(takeset_metadata));
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_free(&r2);
}
static void
test_reference_sequence(void)
{
int ret;
tsk_reference_sequence_t r1;
tsk_reference_sequence_t r2;
const char example_data[100] = "An example string with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_data_length = (tsk_size_t) strlen(example_data);
const char example_url[100] = "An example url with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_url_length = (tsk_size_t) strlen(example_url);
const char example_metadata[100] = "An example metadata with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
const char example_schema[100] = "An example schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_schema_length = (tsk_size_t) strlen(example_schema);
tsk_reference_sequence_init(&r1, 0);
tsk_reference_sequence_init(&r2, 0);
/* NULL sequences are initially equal */
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_data(&r1, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_data(&r1, "", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_data(&r2, "", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_data(&r1, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_data(&r2, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_url(&r1, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_url(&r2, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_metadata(
&r1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));
ret = tsk_reference_sequence_set_metadata(
&r2, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));
ret = tsk_reference_sequence_set_metadata_schema(
&r1, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_reference_sequence_equals(&r1, &r2, 0));
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));
ret = tsk_reference_sequence_set_metadata_schema(
&r2, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, TSK_CMP_IGNORE_METADATA));
// Test copy
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_free(&r2);
tsk_reference_sequence_init(&r1, 0);
ret = tsk_reference_sequence_set_data(&r1, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_copy(&r1, &r2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_url(&r1, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_copy(&r1, &r2, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_metadata(
&r1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_copy(&r1, &r2, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
ret = tsk_reference_sequence_set_metadata_schema(
&r1, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_copy(&r1, &r2, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_reference_sequence_equals(&r1, &r2, 0));
tsk_reference_sequence_free(&r1);
tsk_reference_sequence_free(&r2);
}
static void
test_table_collection_reference_sequence(void)
{
int ret;
tsk_table_collection_t tc1, tc2;
char example_data[100] = "An example string with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_data_length = (tsk_size_t) strlen(example_data);
char example_url[100] = "An example url with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_url_length = (tsk_size_t) strlen(example_url);
char example_metadata[100] = "An example metadata with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
char example_schema[100] = "An example schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_schema_length = (tsk_size_t) strlen(example_schema);
// Test equality
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_data(
&tc1.reference_sequence, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_data(
&tc2.reference_sequence, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_url(
&tc1.reference_sequence, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_url(
&tc2.reference_sequence, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_metadata(
&tc1.reference_sequence, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_metadata(
&tc2.reference_sequence, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_metadata_schema(
&tc1.reference_sequence, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_metadata_schema(
&tc2.reference_sequence, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test copy
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_set_data(
&tc1.reference_sequence, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_url(
&tc1.reference_sequence, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_metadata(
&tc1.reference_sequence, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_reference_sequence_set_metadata_schema(
&tc1.reference_sequence, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
// Test dump and load
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc1.sequence_length = 1.0;
ret = tsk_reference_sequence_set_data(
&tc1.reference_sequence, example_data, example_data_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_set_url(
&tc1.reference_sequence, example_url, example_url_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_set_metadata(
&tc1.reference_sequence, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_reference_sequence_set_metadata_schema(
&tc1.reference_sequence, example_schema, example_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
}
static void
test_table_collection_has_reference_sequence(void)
{
int ret;
tsk_table_collection_t tc;
ret = tsk_table_collection_init(&tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc.sequence_length = 1.0;
CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&tc));
ret = tsk_reference_sequence_set_data(&tc.reference_sequence, "A", 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_reference_sequence(&tc));
/* Goes back to NULL by setting a empty string. See
* test_reference_sequence_state_machine for detailed tests. */
ret = tsk_reference_sequence_set_data(&tc.reference_sequence, "", 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_reference_sequence(&tc));
tsk_table_collection_free(&tc);
}
static void
test_table_collection_metadata(void)
{
int ret;
tsk_table_collection_t tc1, tc2;
char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋";
char *takeset_metadata;
char example_metadata_schema[100]
= "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
tsk_size_t example_metadata_schema_length
= (tsk_size_t) strlen(example_metadata_schema);
// Test equality
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_metadata(
&tc2, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_metadata_schema(
&tc2, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test copy
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_copy(&tc1, &tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test dump and load with empty metadata and schema
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc1.sequence_length = 1.0;
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test dump and load with set metadata and schema
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc1.sequence_length = 1.0;
ret = tsk_table_collection_set_metadata(
&tc1, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata_schema(
&tc1, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
takeset_metadata = tsk_malloc(example_metadata_length * sizeof(char));
CU_ASSERT_FATAL(takeset_metadata != NULL);
memcpy(takeset_metadata, &example_metadata,
(size_t) (example_metadata_length * sizeof(char)));
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_takeset_metadata(
&tc1, takeset_metadata, example_metadata_length);
CU_ASSERT_EQUAL(
tsk_memcmp(tc1.metadata, &example_metadata, example_metadata_length), 0);
tsk_table_collection_free(&tc1);
}
static void
test_table_collection_time_units(void)
{
int ret;
tsk_table_collection_t tc1, tc2;
char example_time_units[100] = "An example of time units with unicode ⏰";
tsk_size_t example_time_units_length = (tsk_size_t) strlen(example_time_units);
// Test equality
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_time_units(
&tc1, example_time_units, example_time_units_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(&tc1, &tc2, 0));
ret = tsk_table_collection_set_time_units(
&tc2, example_time_units, example_time_units_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test copy
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_time_units(
&tc1, example_time_units, example_time_units_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&tc1, &tc2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test dump and load with default time_units
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ret, strncmp(tc1.time_units, TSK_TIME_UNITS_UNKNOWN, 7));
tc1.sequence_length = 1.0;
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
// Test dump and load with set time_units and schema
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
ret = tsk_table_collection_init(&tc1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc1.sequence_length = 1.0;
ret = tsk_table_collection_set_time_units(
&tc1, example_time_units, example_time_units_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_dump(&tc1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tc2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tc1, &tc2, 0));
tsk_table_collection_free(&tc1);
tsk_table_collection_free(&tc2);
}
static void
test_node_table(void)
{
int ret;
tsk_id_t ret_id;
tsk_node_table_t table, table2;
tsk_node_t node, node2;
tsk_size_t num_rows = 100;
tsk_id_t j;
tsk_flags_t *flags;
tsk_id_t *population;
double *time;
tsk_id_t *individual;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
char metadata_copy[test_metadata_length + 1];
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
metadata_copy[test_metadata_length] = '\0';
ret = tsk_node_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_node_table_set_max_rows_increment(&table, 1);
tsk_node_table_set_max_metadata_length_increment(&table, 1);
tsk_node_table_print_state(&table, _devnull);
ret = tsk_node_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_node_table_add_row(&table, (tsk_flags_t) j, (double) j, j, j,
test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
CU_ASSERT_EQUAL(table.flags[j], (tsk_flags_t) j);
CU_ASSERT_EQUAL(table.time[j], j);
CU_ASSERT_EQUAL(table.population[j], j);
CU_ASSERT_EQUAL(table.individual[j], j);
CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);
CU_ASSERT_EQUAL(
table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);
CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);
/* check the metadata */
tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],
test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);
ret = tsk_node_table_get_row(&table, (tsk_id_t) j, &node);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(node.id, j);
CU_ASSERT_EQUAL(node.flags, (tsk_size_t) j);
CU_ASSERT_EQUAL(node.time, j);
CU_ASSERT_EQUAL(node.population, j);
CU_ASSERT_EQUAL(node.individual, j);
CU_ASSERT_EQUAL(node.metadata_length, test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(node.metadata, test_metadata, test_metadata_length);
}
/* Test equality with and without metadata */
tsk_node_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_node_table_free(&table2);
CU_ASSERT_EQUAL(tsk_node_table_get_row(&table, (tsk_id_t) num_rows, &node),
TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_node_table_print_state(&table, _devnull);
ret = tsk_node_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_node_table_clear(&table);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
num_rows *= 2;
flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));
CU_ASSERT_FATAL(flags != NULL);
tsk_memset(flags, 1, num_rows * sizeof(tsk_flags_t));
population = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(population != NULL);
tsk_memset(population, 2, num_rows * sizeof(tsk_id_t));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memset(time, 0, num_rows * sizeof(double));
individual = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(individual != NULL);
tsk_memset(individual, 3, num_rows * sizeof(tsk_id_t));
metadata = tsk_malloc(num_rows * sizeof(char));
tsk_memset(metadata, 'a', num_rows * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {
metadata_offset[j] = (tsk_size_t) j;
}
ret = tsk_node_table_set_columns(&table, num_rows, flags, time, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
tsk_node_table_print_state(&table, _devnull);
ret = tsk_node_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Append another num_rows onto the end */
ret = tsk_node_table_append_columns(&table, num_rows, flags, time, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.flags + num_rows, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.population + num_rows, population, num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.individual + num_rows, individual, num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);
tsk_node_table_print_state(&table, _devnull);
ret = tsk_node_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Truncate back to the original number of rows. */
ret = tsk_node_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
ret = tsk_node_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* If population is NULL it should be set to -1. If metadata is NULL all metadatas
* should be set to the empty string. If individual is NULL it should be set to -1.
*/
num_rows = 10;
tsk_memset(population, 0xff, num_rows * sizeof(tsk_id_t));
tsk_memset(individual, 0xff, num_rows * sizeof(tsk_id_t));
ret = tsk_node_table_set_columns(
&table, num_rows, flags, time, NULL, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.population, population, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.individual, individual, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
num_rows * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
/* flags and time cannot be NULL */
ret = tsk_node_table_set_columns(
&table, num_rows, NULL, time, population, individual, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_node_table_set_columns(&table, num_rows, flags, NULL, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_node_table_set_columns(
&table, num_rows, flags, time, population, individual, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_node_table_set_columns(
&table, num_rows, flags, time, population, individual, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* if metadata and metadata_offset are both null, all metadatas are zero length */
num_rows = 10;
tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_node_table_set_columns(
&table, num_rows, flags, time, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_node_table_append_columns(
&table, num_rows, flags, time, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.flags + num_rows, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, metadata_offset,
num_rows * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_node_table_print_state(&table, _devnull);
ret = tsk_node_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Test extend method */
ret = tsk_node_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_node_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
ret = tsk_node_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_node_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_node_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_node_table_set_columns(&table2, num_rows, flags, time, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));
ret = tsk_node_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_node_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_node_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
ret = tsk_node_table_set_columns(&table2, num_rows, flags, time, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_node_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));
ret = tsk_node_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_row_subset; j++) {
ret = tsk_node_table_get_row(&table, j, &node);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_get_row(&table2, row_subset[j], &node2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(node.flags, node2.flags);
CU_ASSERT_EQUAL(node.time, node2.time);
CU_ASSERT_EQUAL(node.population, node2.population);
CU_ASSERT_EQUAL(node.individual, node2.individual);
CU_ASSERT_EQUAL(node.metadata_length, node2.metadata_length);
CU_ASSERT_EQUAL(tsk_memcmp(node.metadata, node2.metadata,
node.metadata_length * sizeof(*node.metadata)),
0);
}
ret = tsk_node_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
tsk_node_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
tsk_node_table_copy(&table, &table2, TSK_NO_INIT);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
tsk_node_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, 0));
tsk_node_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_FALSE(tsk_node_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_node_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_node_table_clear(&table);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_node_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
tsk_node_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(flags);
free(population);
free(time);
free(metadata);
free(metadata_offset);
free(individual);
}
static void
test_node_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_node_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
tsk_flags_t *flags;
double *time;
tsk_id_t *population;
tsk_id_t *individual;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_id_t neg_ones[num_rows];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));
/* Make a table to copy from */
ret = tsk_node_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_node_table_add_row(&source_table, (tsk_flags_t) j, (double) j + 1,
j + 2, j + 3, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));
CU_ASSERT_FATAL(flags != NULL);
tsk_memcpy(flags, source_table.flags, num_rows * sizeof(tsk_flags_t));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memcpy(time, source_table.time, num_rows * sizeof(double));
population = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(population != NULL);
tsk_memcpy(population, source_table.population, num_rows * sizeof(tsk_id_t));
individual = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(individual != NULL);
tsk_memcpy(individual, source_table.individual, num_rows * sizeof(tsk_id_t));
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(
metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_node_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_node_table_add_row(
&table, (tsk_flags_t) 1, 2, 3, 4, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_node_table_takeset_columns(&table, num_rows, flags, time, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_node_table_takeset_columns(
&table, num_rows, NULL, time, population, individual, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_node_table_takeset_columns(&table, num_rows, flags, NULL, population,
individual, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_node_table_takeset_columns(
&table, num_rows, flags, time, population, individual, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_node_table_takeset_columns(
&table, num_rows, flags, time, population, individual, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_node_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));
CU_ASSERT_FATAL(flags != NULL);
tsk_memcpy(flags, source_table.flags, num_rows * sizeof(tsk_flags_t));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memcpy(time, source_table.time, num_rows * sizeof(double));
/* if metadata and offset are both null, all entries are zero length,
individual and population default to -1 */
num_rows = 10;
ret = tsk_node_table_takeset_columns(
&table, num_rows, flags, time, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(
tsk_memcmp(table.population, neg_ones, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.individual, neg_ones, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_node_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_node_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_node_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_node_table_t table;
tsk_node_t row;
const char *metadata = "ABC";
ret = tsk_node_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(&table, 0, 1.0, 2, 3, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&table, 1, 2.0, 3, 4, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&table, 2, 3.0, 4, 5, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_node_table_update_row(&table, 0, 1, 2.0, 3, 4, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 1);
CU_ASSERT_EQUAL_FATAL(row.time, 2.0);
CU_ASSERT_EQUAL_FATAL(row.population, 3);
CU_ASSERT_EQUAL_FATAL(row.individual, 4);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_node_table_update_row(&table, 0, row.flags + 1, row.time + 1,
row.population + 1, row.individual + 1, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.time, 3.0);
CU_ASSERT_EQUAL_FATAL(row.population, 4);
CU_ASSERT_EQUAL_FATAL(row.individual, 5);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_node_table_update_row(&table, 0, 0, 0, 0, 0, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 0);
CU_ASSERT_EQUAL_FATAL(row.time, 0);
CU_ASSERT_EQUAL_FATAL(row.population, 0);
CU_ASSERT_EQUAL_FATAL(row.individual, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_node_table_update_row(&table, 1, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 0);
CU_ASSERT_EQUAL_FATAL(row.time, 0);
CU_ASSERT_EQUAL_FATAL(row.population, 0);
CU_ASSERT_EQUAL_FATAL(row.individual, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_node_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.time, 3.0);
CU_ASSERT_EQUAL_FATAL(row.population, 4);
CU_ASSERT_EQUAL_FATAL(row.individual, 5);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_node_table_update_row(&table, 3, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_node_table_free(&table);
}
static void
test_node_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_node_table_t source, t1, t2;
tsk_node_t row;
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
const char *metadata = "ABC";
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_node_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(&source, 0, 1.0, 2, 3, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&source, 1, 2.0, 3, 4, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&source, 2, 3.0, 4, 5, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_node_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &source, 0));
ret = tsk_node_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_node_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_node_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_node_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.flags, 1);
CU_ASSERT_EQUAL_FATAL(row.time, 2.0);
CU_ASSERT_EQUAL_FATAL(row.population, 3);
CU_ASSERT_EQUAL_FATAL(row.individual, 4);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_node_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_node_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_node_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_node_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_node_table_equals(&source, &t2, 0));
tsk_node_table_free(&t1);
tsk_node_table_free(&t2);
}
tsk_node_table_free(&source);
}
static void
test_edge_table_with_options(tsk_flags_t options)
{
int ret;
tsk_edge_table_t table, table2;
tsk_size_t num_rows = 100;
tsk_id_t j, ret_id;
tsk_edge_t edge, edge2;
tsk_id_t *parent, *child;
double *left, *right;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
char metadata_copy[test_metadata_length + 1];
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
metadata_copy[test_metadata_length] = '\0';
ret = tsk_edge_table_init(&table, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_edge_table_set_max_rows_increment(&table, 1);
tsk_edge_table_set_max_metadata_length_increment(&table, 1);
tsk_edge_table_print_state(&table, _devnull);
ret = tsk_edge_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
if (options & TSK_TABLE_NO_METADATA) {
ret_id = tsk_edge_table_add_row(&table, (double) j, (double) j, j, j,
test_metadata, test_metadata_length);
CU_ASSERT_EQUAL(ret_id, TSK_ERR_METADATA_DISABLED);
ret_id
= tsk_edge_table_add_row(&table, (double) j, (double) j, j, j, NULL, 0);
} else {
ret_id = tsk_edge_table_add_row(&table, (double) j, (double) j, j, j,
test_metadata, test_metadata_length);
}
CU_ASSERT_EQUAL_FATAL(ret_id, j);
CU_ASSERT_EQUAL(table.left[j], j);
CU_ASSERT_EQUAL(table.right[j], j);
CU_ASSERT_EQUAL(table.parent[j], j);
CU_ASSERT_EQUAL(table.child[j], j);
CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.metadata, NULL);
CU_ASSERT_EQUAL(table.metadata_offset, NULL);
} else {
CU_ASSERT_EQUAL(
table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);
CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);
/* check the metadata */
tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],
test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);
}
ret = tsk_edge_table_get_row(&table, (tsk_id_t) j, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(edge.id, j);
CU_ASSERT_EQUAL(edge.left, j);
CU_ASSERT_EQUAL(edge.right, j);
CU_ASSERT_EQUAL(edge.parent, j);
CU_ASSERT_EQUAL(edge.child, j);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(edge.metadata_length, 0);
CU_ASSERT_EQUAL(edge.metadata, NULL);
} else {
CU_ASSERT_EQUAL(edge.metadata_length, test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(edge.metadata, test_metadata, test_metadata_length);
}
}
ret = tsk_edge_table_get_row(&table, (tsk_id_t) num_rows, &edge);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tsk_edge_table_print_state(&table, _devnull);
ret = tsk_edge_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
num_rows *= 2;
left = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(left != NULL);
tsk_memset(left, 0, num_rows * sizeof(double));
right = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(right != NULL);
tsk_memset(right, 0, num_rows * sizeof(double));
parent = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(parent != NULL);
tsk_memset(parent, 1, num_rows * sizeof(tsk_id_t));
child = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(child != NULL);
tsk_memset(child, 1, num_rows * sizeof(tsk_id_t));
metadata = tsk_malloc(num_rows * sizeof(char));
tsk_memset(metadata, 'a', num_rows * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {
metadata_offset[j] = (tsk_size_t) j;
}
if (options & TSK_TABLE_NO_METADATA) {
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_METADATA_DISABLED);
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, child, NULL, NULL);
} else {
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, child, metadata, metadata_offset);
}
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(table.metadata, NULL);
CU_ASSERT_EQUAL(table.metadata_offset, NULL);
CU_ASSERT_EQUAL(table.metadata_length, 0);
} else {
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
}
CU_ASSERT_EQUAL(table.num_rows, num_rows);
/* Append another num_rows to the end. */
if (options & TSK_TABLE_NO_METADATA) {
ret = tsk_edge_table_append_columns(
&table, num_rows, left, right, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_METADATA_DISABLED);
ret = tsk_edge_table_append_columns(
&table, num_rows, left, right, parent, child, NULL, NULL);
} else {
ret = tsk_edge_table_append_columns(
&table, num_rows, left, right, parent, child, metadata, metadata_offset);
}
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.child + num_rows, child, num_rows * sizeof(tsk_id_t)), 0);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(table.metadata, NULL);
CU_ASSERT_EQUAL(table.metadata_offset, NULL);
CU_ASSERT_EQUAL(table.metadata_length, 0);
} else {
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);
}
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
/* Truncate back to num_rows */
ret = tsk_edge_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(table.metadata, NULL);
CU_ASSERT_EQUAL(table.metadata_offset, NULL);
CU_ASSERT_EQUAL(table.metadata_length, 0);
} else {
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
}
CU_ASSERT_EQUAL(table.num_rows, num_rows);
ret = tsk_edge_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* Test equality with and without metadata */
tsk_edge_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
if (!(options & TSK_TABLE_NO_METADATA)) {
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
}
tsk_edge_table_free(&table2);
/* Inputs cannot be NULL */
ret = tsk_edge_table_set_columns(
&table, num_rows, NULL, right, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_set_columns(
&table, num_rows, left, NULL, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, NULL, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, child, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, child, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* if metadata and metadata_offset are both null, all metadatas are zero length */
num_rows = 10;
tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_edge_table_set_columns(
&table, num_rows, left, right, parent, child, NULL, NULL);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(table.metadata, NULL);
CU_ASSERT_EQUAL(table.metadata_offset, NULL);
} else {
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
}
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
ret = tsk_edge_table_append_columns(
&table, num_rows, left, right, parent, child, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.child, child, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.child + num_rows, child, num_rows * sizeof(tsk_id_t)), 0);
if (options & TSK_TABLE_NO_METADATA) {
CU_ASSERT_EQUAL(table.metadata, NULL);
CU_ASSERT_EQUAL(table.metadata_offset, NULL);
} else {
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, metadata_offset,
num_rows * sizeof(tsk_size_t)),
0);
}
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
tsk_edge_table_print_state(&table, _devnull);
ret = tsk_edge_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Test extend method */
ret = tsk_edge_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&table2, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_edge_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
ret = tsk_edge_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_edge_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_edge_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
if (options & TSK_TABLE_NO_METADATA) {
ret = tsk_edge_table_set_columns(
&table2, num_rows, left, right, parent, child, NULL, NULL);
} else {
ret = tsk_edge_table_set_columns(
&table2, num_rows, left, right, parent, child, metadata, metadata_offset);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));
ret = tsk_edge_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_edge_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_edge_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
if (options & TSK_TABLE_NO_METADATA) {
ret = tsk_edge_table_set_columns(
&table2, num_rows, left, right, parent, child, NULL, NULL);
} else {
ret = tsk_edge_table_set_columns(
&table2, num_rows, left, right, parent, child, metadata, metadata_offset);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_edge_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));
ret = tsk_edge_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_row_subset; j++) {
ret = tsk_edge_table_get_row(&table, j, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table2, row_subset[j], &edge2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(edge.parent, edge2.parent);
CU_ASSERT_EQUAL(edge.child, edge2.child);
CU_ASSERT_EQUAL(edge.left, edge2.left);
CU_ASSERT_EQUAL(edge.right, edge2.right);
CU_ASSERT_EQUAL(edge.metadata_length, edge2.metadata_length)
CU_ASSERT_EQUAL(tsk_memcmp(edge.metadata, edge2.metadata,
edge.metadata_length * sizeof(*edge.metadata)),
0);
}
ret = tsk_edge_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
ret = tsk_edge_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
ret = tsk_edge_table_copy(&table, &table2, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
ret = tsk_edge_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, 0));
ret = tsk_edge_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_edge_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_edge_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
ret = tsk_edge_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_edge_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_edge_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(left);
free(right);
free(parent);
free(child);
free(metadata);
free(metadata_offset);
}
static void
test_edge_table(void)
{
test_edge_table_with_options(0);
test_edge_table_with_options(TSK_TABLE_NO_METADATA);
}
static void
test_edge_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_edge_table_t table;
tsk_edge_t row;
const char *metadata = "ABC";
ret = tsk_edge_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&table, 0, 1.0, 2, 3, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&table, 1, 2.0, 3, 4, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&table, 2, 3.0, 4, 5, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_edge_table_update_row(&table, 0, 1, 2.0, 3, 4, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 1);
CU_ASSERT_EQUAL_FATAL(row.right, 2.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 3);
CU_ASSERT_EQUAL_FATAL(row.child, 4);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_edge_table_update_row(&table, 0, row.left + 1, row.right + 1,
row.parent + 1, row.child + 1, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 2);
CU_ASSERT_EQUAL_FATAL(row.right, 3.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.child, 5);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_edge_table_update_row(&table, 0, 0, 0, 0, 0, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 0);
CU_ASSERT_EQUAL_FATAL(row.right, 0);
CU_ASSERT_EQUAL_FATAL(row.parent, 0);
CU_ASSERT_EQUAL_FATAL(row.child, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_edge_table_update_row(&table, 1, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 0);
CU_ASSERT_EQUAL_FATAL(row.right, 0);
CU_ASSERT_EQUAL_FATAL(row.parent, 0);
CU_ASSERT_EQUAL_FATAL(row.child, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_edge_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 2);
CU_ASSERT_EQUAL_FATAL(row.right, 3.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.child, 5);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_edge_table_update_row(&table, 3, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tsk_edge_table_free(&table);
}
static void
test_edge_table_update_row_no_metadata(void)
{
int ret;
tsk_id_t ret_id;
tsk_edge_table_t table;
tsk_edge_t row;
const char *metadata = "ABC";
ret = tsk_edge_table_init(&table, TSK_TABLE_NO_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&table, 0, 1.0, 2, 3, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&table, 1, 2.0, 3, 4, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&table, 2, 3.0, 4, 5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_edge_table_update_row(&table, 0, 1, 2.0, 3, 4, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 1);
CU_ASSERT_EQUAL_FATAL(row.right, 2.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 3);
CU_ASSERT_EQUAL_FATAL(row.child, 4);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_edge_table_update_row(&table, 0, row.left + 1, row.right + 1,
row.parent + 1, row.child + 1, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 2);
CU_ASSERT_EQUAL_FATAL(row.right, 3.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.child, 5);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_edge_table_update_row(&table, 1, 0, 0, 0, 0, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_METADATA_DISABLED);
tsk_edge_table_free(&table);
}
static void
test_edge_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_edge_table_t source, t1, t2;
tsk_edge_t row;
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
const char *metadata = "ABC";
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_edge_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&source, 0, 1.0, 2, 3, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&source, 1, 2.0, 3, 4, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&source, 2, 3.0, 4, 5, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_edge_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));
ret = tsk_edge_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_edge_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_edge_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.left, 1);
CU_ASSERT_EQUAL_FATAL(row.right, 2.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 3);
CU_ASSERT_EQUAL_FATAL(row.child, 4);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_edge_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_edge_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_edge_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_edge_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&source, &t2, 0));
tsk_edge_table_free(&t1);
tsk_edge_table_free(&t2);
}
tsk_edge_table_free(&source);
}
static void
test_edge_table_keep_rows_no_metadata(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_edge_table_t source, t1, t2;
tsk_edge_t row;
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_edge_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&source, 0, 1.0, 2, 3, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&source, 1, 2.0, 3, 4, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&source, 2, 3.0, 4, 5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_edge_table_copy(&source, &t1, TSK_TABLE_NO_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));
ret = tsk_edge_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_edge_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_edge_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_edge_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.left, 1);
CU_ASSERT_EQUAL_FATAL(row.right, 2.0);
CU_ASSERT_EQUAL_FATAL(row.parent, 3);
CU_ASSERT_EQUAL_FATAL(row.child, 4);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
tsk_edge_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_edge_table_copy(&source, &t2, TSK_TABLE_NO_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_copy(&source, &t1, TSK_TABLE_NO_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_edge_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_edge_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&source, &t2, 0));
tsk_edge_table_free(&t1);
tsk_edge_table_free(&t2);
}
tsk_edge_table_free(&source);
}
static void
test_edge_table_takeset_with_options(tsk_flags_t table_options)
{
int ret = 0;
tsk_id_t ret_id;
tsk_edge_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
double *left;
double *right;
tsk_id_t *parent;
tsk_id_t *child;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_id_t neg_ones[num_rows];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));
/* Make a table to copy from */
ret = tsk_edge_table_init(&source_table, table_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
if (table_options & TSK_TABLE_NO_METADATA) {
ret_id = tsk_edge_table_add_row(
&source_table, (double) j, (double) j + 1, j + 2, j + 3, NULL, 0);
} else {
ret_id = tsk_edge_table_add_row(&source_table, (double) j, (double) j + 1,
j + 2, j + 3, test_metadata, test_metadata_length);
}
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
left = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(left != NULL);
tsk_memcpy(left, source_table.left, num_rows * sizeof(double));
right = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(right != NULL);
tsk_memcpy(right, source_table.right, num_rows * sizeof(double));
parent = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(parent != NULL);
tsk_memcpy(parent, source_table.parent, num_rows * sizeof(tsk_id_t));
child = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(child != NULL);
tsk_memcpy(child, source_table.child, num_rows * sizeof(tsk_id_t));
if (table_options & TSK_TABLE_NO_METADATA) {
metadata = NULL;
metadata_offset = NULL;
test_metadata = NULL;
test_metadata_length = 0;
} else {
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(metadata, source_table.metadata,
num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
}
ret = tsk_edge_table_init(&table, table_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_edge_table_add_row(
&table, 1, 2, 3, 4, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, right, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_edge_table_takeset_columns(
&table, num_rows, NULL, right, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, NULL, parent, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, right, NULL, child, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, right, parent, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
if (table_options & TSK_TABLE_NO_METADATA) {
/* It isn't used, so any pointer does for testing that presence of metadata
fails */
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, right, parent, child, (char *) child, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_METADATA_DISABLED);
} else {
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, right, parent, child, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_edge_table_takeset_columns(
&table, num_rows, left, right, parent, child, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
}
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_edge_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
ret = tsk_edge_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_edge_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_edge_table_takeset(void)
{
test_edge_table_takeset_with_options(TSK_TABLE_NO_METADATA);
test_edge_table_takeset_with_options(0);
}
static void
test_edge_table_copy_semantics(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t1, t2;
tsk_edge_table_t edges;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
insert_edge_metadata(&t1);
/* t1 now has metadata. We should be able to copy to another table with metadata */
ret = tsk_table_collection_copy(&t1, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
/* We should not be able to copy into a table with no metadata */
ret = tsk_table_collection_copy(&t1, &t2, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_METADATA_DISABLED);
tsk_table_collection_free(&t2);
tsk_table_collection_free(&t1);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* t1 has no metadata, but metadata is enabled. We should be able to copy
* into a table with either metadata enabled or disabled.
*/
ret = tsk_table_collection_copy(&t1, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
ret = tsk_table_collection_copy(&t1, &t2, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
/* Try copying into a table directly */
ret = tsk_edge_table_copy(&t1.edges, &edges, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_edge_table_equals(&t1.edges, &edges, 0));
tsk_edge_table_free(&edges);
tsk_table_collection_free(&t1);
tsk_treeseq_free(&ts);
}
static void
test_edge_table_squash(void)
{
int ret;
tsk_table_collection_t tables;
const char *nodes_ex = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 0.253 -1 -1\n";
const char *edges_ex = "0 2 2 0\n"
"2 10 2 0\n"
"0 2 2 1\n"
"2 10 2 1\n";
/*
2
/ \
0 1
*/
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
parse_nodes(nodes_ex, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);
parse_edges(edges_ex, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 4);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Check output.
CU_ASSERT_EQUAL(tables.edges.num_rows, 2);
// Free things.
tsk_table_collection_free(&tables);
}
static void
test_edge_table_squash_multiple_parents(void)
{
int ret;
tsk_table_collection_t tables;
const char *nodes_ex = "1 0.000 -1 -1\n"
"1 0.000 -1 -1\n"
"1 0.000 -1 -1\n"
"1 0.000 -1 -1\n"
"0 1.000 -1 -1\n"
"0 1.000 -1 -1\n";
const char *edges_ex = "5 10 5 3\n"
"5 10 5 2\n"
"0 5 5 3\n"
"0 5 5 2\n"
"4 10 4 1\n"
"0 4 4 1\n"
"4 10 4 0\n"
"0 4 4 0\n";
/*
4 5
/ \ / \
0 1 2 3
*/
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
parse_nodes(nodes_ex, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 6);
parse_edges(edges_ex, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 8);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Check output.
CU_ASSERT_EQUAL(tables.edges.num_rows, 4);
// Free things.
tsk_table_collection_free(&tables);
}
static void
test_edge_table_squash_empty(void)
{
int ret;
tsk_table_collection_t tables;
const char *nodes_ex = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 0.253 -1 -1\n";
const char *edges_ex = "";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
parse_nodes(nodes_ex, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);
parse_edges(edges_ex, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 0);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Free things.
tsk_table_collection_free(&tables);
}
static void
test_edge_table_squash_single_edge(void)
{
int ret;
tsk_table_collection_t tables;
const char *nodes_ex = "1 0 -1 -1\n"
"0 0 -1 -1\n";
const char *edges_ex = "0 1 1 0\n";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(nodes_ex, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 2);
parse_edges(edges_ex, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 1);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Free things.
tsk_table_collection_free(&tables);
}
static void
test_edge_table_squash_bad_intervals(void)
{
int ret;
tsk_table_collection_t tables;
const char *nodes_ex = "1 0 -1 -1\n"
"0 0 -1 -1\n";
const char *edges_ex = "0 0.6 1 0\n"
"0.4 1 1 0\n";
ret = tsk_table_collection_init(&tables, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(nodes_ex, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 2);
parse_edges(edges_ex, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);
// Free things.
tsk_table_collection_free(&tables);
}
static void
test_edge_table_squash_metadata(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 1, 1, "metadata", 8);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
tsk_table_collection_free(&tables);
ret = tsk_table_collection_init(&tables, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 1, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_edge_table_squash(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
}
static void
test_site_table(void)
{
int ret;
tsk_id_t ret_id;
tsk_site_table_t table, table2;
tsk_size_t num_rows, j;
char *ancestral_state;
char *metadata;
double *position;
tsk_site_t site, site2;
tsk_size_t *ancestral_state_offset;
tsk_size_t *metadata_offset;
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
ret = tsk_site_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_site_table_set_max_rows_increment(&table, 1);
tsk_site_table_set_max_metadata_length_increment(&table, 1);
tsk_site_table_set_max_ancestral_state_length_increment(&table, 1);
tsk_site_table_print_state(&table, _devnull);
ret = tsk_site_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&table, 0, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
CU_ASSERT_EQUAL(table.position[0], 0);
CU_ASSERT_EQUAL(table.ancestral_state_offset[0], 0);
CU_ASSERT_EQUAL(table.ancestral_state_offset[1], 1);
CU_ASSERT_EQUAL(table.ancestral_state_length, 1);
CU_ASSERT_EQUAL(table.metadata_offset[0], 0);
CU_ASSERT_EQUAL(table.metadata_offset[1], 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.num_rows, 1);
ret = tsk_site_table_get_row(&table, 0, &site);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(site.position, 0);
CU_ASSERT_EQUAL(site.ancestral_state_length, 1);
CU_ASSERT_NSTRING_EQUAL(site.ancestral_state, "A", 1);
CU_ASSERT_EQUAL(site.metadata_length, 0);
ret_id = tsk_site_table_add_row(&table, 1, "AA", 2, "{}", 2);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
CU_ASSERT_EQUAL(table.position[1], 1);
CU_ASSERT_EQUAL(table.ancestral_state_offset[2], 3);
CU_ASSERT_EQUAL(table.metadata_offset[1], 0);
CU_ASSERT_EQUAL(table.metadata_offset[2], 2);
CU_ASSERT_EQUAL(table.metadata_length, 2);
CU_ASSERT_EQUAL(table.num_rows, 2);
ret = tsk_site_table_get_row(&table, 1, &site);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(site.position, 1);
CU_ASSERT_EQUAL(site.ancestral_state_length, 2);
CU_ASSERT_NSTRING_EQUAL(site.ancestral_state, "AA", 2);
CU_ASSERT_EQUAL(site.metadata_length, 2);
CU_ASSERT_NSTRING_EQUAL(site.metadata, "{}", 2);
ret_id = tsk_site_table_add_row(&table, 2, "A", 1, "metadata", 8);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
CU_ASSERT_EQUAL(table.position[1], 1);
CU_ASSERT_EQUAL(table.ancestral_state_offset[3], 4);
CU_ASSERT_EQUAL(table.ancestral_state_length, 4);
CU_ASSERT_EQUAL(table.metadata_offset[3], 10);
CU_ASSERT_EQUAL(table.metadata_length, 10);
CU_ASSERT_EQUAL(table.num_rows, 3);
ret = tsk_site_table_get_row(&table, 3, &site);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tsk_site_table_print_state(&table, _devnull);
ret = tsk_site_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_site_table_clear(&table);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.ancestral_state_length, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.ancestral_state_offset[0], 0);
CU_ASSERT_EQUAL(table.metadata_offset[0], 0);
num_rows = 100;
position = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(position != NULL);
ancestral_state = tsk_malloc(num_rows * sizeof(char));
CU_ASSERT_FATAL(ancestral_state != NULL);
ancestral_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(ancestral_state_offset != NULL);
metadata = tsk_malloc(num_rows * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < num_rows; j++) {
position[j] = (double) j;
ancestral_state[j] = (char) j;
ancestral_state_offset[j] = (tsk_size_t) j;
metadata[j] = (char) ('A' + j);
metadata_offset[j] = (tsk_size_t) j;
}
ancestral_state_offset[num_rows] = num_rows;
metadata_offset[num_rows] = num_rows;
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.ancestral_state_length, num_rows);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
/* Append another num rows */
ret = tsk_site_table_append_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.position + num_rows, position, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.ancestral_state + num_rows, ancestral_state,
num_rows * sizeof(char)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.ancestral_state_length, 2 * num_rows);
/* truncate back to num_rows */
ret = tsk_site_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.ancestral_state_length, num_rows);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
ret = tsk_site_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* Test equality with and without metadata */
tsk_site_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_site_table_free(&table2);
/* Inputs cannot be NULL */
ret = tsk_site_table_set_columns(&table, num_rows, NULL, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_set_columns(&table, num_rows, position, NULL,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_set_columns(
&table, num_rows, position, ancestral_state, NULL, metadata, metadata_offset);
/* Metadata and metadata_offset must both be null */
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Set metadata to NULL */
ret = tsk_site_table_set_columns(
&table, num_rows, position, ancestral_state, ancestral_state_offset, NULL, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_EQUAL(tsk_memcmp(table.position, position, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.ancestral_state, ancestral_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.ancestral_state_length, num_rows);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
/* Test extend method */
ret = tsk_site_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_site_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
ret = tsk_site_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_site_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_site_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_site_table_set_columns(&table2, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));
ret = tsk_site_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_site_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_site_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
ret = tsk_site_table_set_columns(&table2, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_site_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));
ret = tsk_site_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_row_subset; j++) {
ret = tsk_site_table_get_row(&table, (tsk_id_t) j, &site);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table2, row_subset[j], &site2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(site.position, site2.position);
CU_ASSERT_EQUAL(site.ancestral_state_length, site2.ancestral_state_length);
CU_ASSERT_EQUAL(site.metadata_length, site2.metadata_length);
CU_ASSERT_EQUAL(tsk_memcmp(site.ancestral_state, site2.ancestral_state,
site.ancestral_state_length * sizeof(*site.ancestral_state)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(site.metadata, site2.metadata,
site.metadata_length * sizeof(*site.metadata)),
0);
}
/* Test for bad offsets */
ancestral_state_offset[0] = 1;
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
ancestral_state_offset[0] = 0;
ancestral_state_offset[num_rows] = 0;
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
ancestral_state_offset[0] = 0;
metadata_offset[0] = 0;
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
metadata_offset[0] = 0;
metadata_offset[num_rows] = 0;
ret = tsk_site_table_set_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
ret = tsk_site_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
tsk_site_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
tsk_site_table_copy(&table, &table2, TSK_NO_INIT);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
tsk_site_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, 0));
tsk_site_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_FALSE(tsk_site_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_site_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
ret = tsk_site_table_clear(&table);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.ancestral_state_length, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_site_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
tsk_site_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(position);
free(ancestral_state);
free(ancestral_state_offset);
free(metadata);
free(metadata_offset);
}
static void
test_site_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_site_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
double *position;
char *ancestral_state;
tsk_size_t *ancestral_state_offset;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_ancestral_state = "red";
tsk_size_t test_ancestral_state_length = 3;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_id_t neg_ones[num_rows];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));
/* Make a table to copy from */
ret = tsk_site_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_site_table_add_row(&source_table, (double) j, test_ancestral_state,
test_ancestral_state_length, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
position = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(position != NULL);
tsk_memcpy(position, source_table.position, num_rows * sizeof(double));
ancestral_state = tsk_malloc(num_rows * test_ancestral_state_length * sizeof(char));
CU_ASSERT_FATAL(ancestral_state != NULL);
tsk_memcpy(ancestral_state, source_table.ancestral_state,
num_rows * test_ancestral_state_length * sizeof(char));
ancestral_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(ancestral_state_offset != NULL);
tsk_memcpy(ancestral_state_offset, source_table.ancestral_state_offset,
(num_rows + 1) * sizeof(tsk_size_t));
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(
metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_site_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_site_table_add_row(&table, 1, test_ancestral_state,
test_ancestral_state_length, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_site_table_takeset_columns(&table, num_rows, NULL, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_takeset_columns(&table, num_rows, position, NULL,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_takeset_columns(
&table, num_rows, position, ancestral_state, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Check bad offset in ancestral_state */
ancestral_state_offset[0] = 1;
ret = tsk_site_table_takeset_columns(&table, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_site_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
position = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(position != NULL);
tsk_memcpy(position, source_table.position, num_rows * sizeof(double));
ancestral_state = tsk_malloc(num_rows * test_ancestral_state_length * sizeof(char));
CU_ASSERT_FATAL(ancestral_state != NULL);
tsk_memcpy(ancestral_state, source_table.ancestral_state,
num_rows * test_ancestral_state_length * sizeof(char));
ancestral_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(ancestral_state_offset != NULL);
tsk_memcpy(ancestral_state_offset, source_table.ancestral_state_offset,
(num_rows + 1) * sizeof(tsk_size_t));
/* if metadata and offset are both null, all entries are zero length*/
num_rows = 10;
ret = tsk_site_table_takeset_columns(
&table, num_rows, position, ancestral_state, ancestral_state_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_site_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_site_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_site_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_site_table_t table;
tsk_site_t row;
const char *ancestral_state = "XYZ";
const char *metadata = "ABC";
ret = tsk_site_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&table, 0, ancestral_state, 1, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&table, 1, ancestral_state, 2, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&table, 2, ancestral_state, 3, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_site_table_update_row(
&table, 0, 1, &ancestral_state[1], 1, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 1);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 1);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_site_table_update_row(&table, 0, row.position + 1, row.ancestral_state,
row.ancestral_state_length, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 2);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 1);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_site_table_update_row(&table, 0, row.position, row.ancestral_state,
row.ancestral_state_length, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 2);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 1);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_site_table_update_row(
&table, 0, row.position, NULL, 0, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 2);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_site_table_update_row(&table, 0, 2, ancestral_state, 3, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 2);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 3);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[2], 'Z');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_site_table_update_row(&table, 1, 5, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 5);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_site_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.position, 2);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 3);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[2], 'Z');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_site_table_update_row(&table, 3, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tsk_site_table_free(&table);
}
static void
test_site_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_site_table_t source, t1, t2;
tsk_site_t row;
const char *ancestral_state = "XYZ";
const char *metadata = "ABC";
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_site_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&source, 0, ancestral_state, 1, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&source, 1, ancestral_state, 2, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&source, 2, ancestral_state, 3, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_site_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &source, 0));
ret = tsk_site_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_site_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_site_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_site_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.position, 1);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state_length, 2);
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.ancestral_state[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_site_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_site_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_site_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_site_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&source, &t2, 0));
tsk_site_table_free(&t1);
tsk_site_table_free(&t2);
}
tsk_site_table_free(&source);
}
static void
test_mutation_table(void)
{
int ret;
tsk_id_t ret_id;
tsk_mutation_table_t table, table2;
tsk_size_t num_rows = 100;
tsk_size_t max_len = 20;
tsk_size_t k, len;
tsk_id_t j;
tsk_id_t *node;
tsk_id_t *parent;
tsk_id_t *site;
double *time;
char *derived_state, *metadata;
char c[max_len + 1];
tsk_size_t *derived_state_offset, *metadata_offset;
tsk_mutation_t mutation, mutation2;
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
for (j = 0; j < (tsk_id_t) max_len; j++) {
c[j] = (char) ('A' + j);
}
ret = tsk_mutation_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_mutation_table_set_max_rows_increment(&table, 1);
tsk_mutation_table_set_max_metadata_length_increment(&table, 1);
tsk_mutation_table_set_max_derived_state_length_increment(&table, 1);
tsk_mutation_table_print_state(&table, _devnull);
ret = tsk_mutation_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
len = 0;
for (j = 0; j < (tsk_id_t) num_rows; j++) {
k = TSK_MIN((tsk_size_t) j + 1, max_len);
ret_id = tsk_mutation_table_add_row(&table, j, j, j, (double) j, c, k, c, k);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
CU_ASSERT_EQUAL(table.site[j], j);
CU_ASSERT_EQUAL(table.node[j], j);
CU_ASSERT_EQUAL(table.parent[j], j);
CU_ASSERT_EQUAL(table.time[j], j);
CU_ASSERT_EQUAL(table.derived_state_offset[j], len);
CU_ASSERT_EQUAL(table.metadata_offset[j], len);
CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);
len += k;
CU_ASSERT_EQUAL(table.derived_state_offset[j + 1], len);
CU_ASSERT_EQUAL(table.derived_state_length, len);
CU_ASSERT_EQUAL(table.metadata_offset[j + 1], len);
CU_ASSERT_EQUAL(table.metadata_length, len);
ret = tsk_mutation_table_get_row(&table, (tsk_id_t) j, &mutation);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(mutation.id, j);
CU_ASSERT_EQUAL(mutation.site, j);
CU_ASSERT_EQUAL(mutation.node, j);
CU_ASSERT_EQUAL(mutation.parent, j);
CU_ASSERT_EQUAL(mutation.time, j);
CU_ASSERT_EQUAL(mutation.metadata_length, k);
CU_ASSERT_NSTRING_EQUAL(mutation.metadata, c, k);
CU_ASSERT_EQUAL(mutation.derived_state_length, k);
CU_ASSERT_NSTRING_EQUAL(mutation.derived_state, c, k);
}
ret = tsk_mutation_table_get_row(&table, (tsk_id_t) num_rows, &mutation);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
tsk_mutation_table_print_state(&table, _devnull);
ret = tsk_mutation_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
num_rows *= 2;
site = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(site != NULL);
node = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(node != NULL);
parent = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(parent != NULL);
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
derived_state = tsk_malloc(num_rows * sizeof(char));
CU_ASSERT_FATAL(derived_state != NULL);
derived_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(derived_state_offset != NULL);
metadata = tsk_malloc(num_rows * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
node[j] = j;
site[j] = j + 1;
parent[j] = j + 2;
time[j] = (double) (j + 3);
derived_state[j] = 'Y';
derived_state_offset[j] = (tsk_size_t) j;
metadata[j] = 'M';
metadata_offset[j] = (tsk_size_t) j;
}
derived_state_offset[num_rows] = num_rows;
metadata_offset[num_rows] = num_rows;
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.derived_state_length, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
/* Append another num_rows */
ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.site + num_rows, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.derived_state_length, 2 * num_rows);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
/* Truncate back to num_rows */
ret = tsk_mutation_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.derived_state_length, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
/* Test equality with and without metadata */
tsk_mutation_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_mutation_table_free(&table2);
ret = tsk_mutation_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* Check all this again, except with parent == NULL, time == NULL
* and metadata == NULL. */
tsk_memset(parent, 0xff, num_rows * sizeof(tsk_id_t));
for (j = 0; j < (tsk_id_t) num_rows; j++) {
time[j] = TSK_UNKNOWN_TIME;
}
tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, NULL, NULL,
derived_state, derived_state_offset, NULL, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.derived_state_offset, derived_state_offset,
num_rows * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.derived_state_length, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
/* Append another num_rows */
ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, NULL, NULL,
derived_state, derived_state_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.site, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.site + num_rows, site, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parent + num_rows, parent, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.derived_state, derived_state, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.derived_state + num_rows, derived_state,
num_rows * sizeof(char)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.derived_state_length, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
/* Inputs except parent, time, metadata and metadata_offset cannot be NULL*/
ret = tsk_mutation_table_set_columns(&table, num_rows, NULL, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_set_columns(&table, num_rows, site, NULL, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
NULL, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
derived_state, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Inputs except parent, time, metadata and metadata_offset cannot be NULL*/
ret = tsk_mutation_table_append_columns(&table, num_rows, NULL, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_append_columns(&table, num_rows, site, NULL, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,
NULL, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,
derived_state, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_append_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Test extend method */
for (j = 0; j < (tsk_id_t) num_rows; j++) {
parent[j] = j + 2;
time[j] = (double) (j + 3);
metadata[j] = (char) ('A' + j);
metadata_offset[j] = (tsk_size_t) j;
}
metadata_offset[num_rows] = num_rows;
ret = tsk_mutation_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_mutation_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
ret = tsk_mutation_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_mutation_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_mutation_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_mutation_table_set_columns(&table2, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));
ret = tsk_mutation_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_mutation_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_mutation_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
ret = tsk_mutation_table_set_columns(&table2, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_mutation_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));
ret = tsk_mutation_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < num_row_subset; k++) {
ret = tsk_mutation_table_get_row(&table, (tsk_id_t) k, &mutation);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table2, row_subset[k], &mutation2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(mutation.site, mutation2.site);
CU_ASSERT_EQUAL(mutation.node, mutation2.node);
CU_ASSERT_EQUAL(mutation.parent, mutation2.parent);
CU_ASSERT_EQUAL(mutation.time, mutation2.time);
CU_ASSERT_EQUAL(mutation.derived_state_length, mutation2.derived_state_length);
CU_ASSERT_EQUAL(mutation.metadata_length, mutation2.metadata_length);
CU_ASSERT_EQUAL(
tsk_memcmp(mutation.derived_state, mutation2.derived_state,
mutation.derived_state_length * sizeof(*mutation.derived_state)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(mutation.metadata, mutation2.metadata,
mutation.metadata_length * sizeof(*mutation.metadata)),
0);
}
/* Test for bad offsets */
derived_state_offset[0] = 1;
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
derived_state_offset[0] = 0;
derived_state_offset[num_rows] = 0;
ret = tsk_mutation_table_set_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
ret = tsk_mutation_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
tsk_mutation_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
tsk_mutation_table_copy(&table, &table2, TSK_NO_INIT);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
tsk_mutation_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, 0));
tsk_mutation_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_FALSE(tsk_mutation_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_mutation_table_clear(&table);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.derived_state_length, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_mutation_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
tsk_mutation_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(site);
free(node);
free(parent);
free(time);
free(derived_state);
free(derived_state_offset);
free(metadata);
free(metadata_offset);
}
static void
test_mutation_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_mutation_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
tsk_id_t *site;
tsk_id_t *node;
tsk_id_t *parent;
double *time;
char *derived_state;
tsk_size_t *derived_state_offset;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_derived_state = "red";
tsk_size_t test_derived_state_length = 3;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_id_t neg_ones[num_rows];
double unknown_times[num_rows];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));
/* Make a table to copy from */
ret = tsk_mutation_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
unknown_times[j] = TSK_UNKNOWN_TIME;
ret_id = tsk_mutation_table_add_row(&source_table, j, j + 1, j + 2,
(double) j + 3, test_derived_state, test_derived_state_length, test_metadata,
test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
site = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(site != NULL);
tsk_memcpy(site, source_table.site, num_rows * sizeof(tsk_id_t));
node = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(node != NULL);
tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));
parent = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(parent != NULL);
tsk_memcpy(parent, source_table.parent, num_rows * sizeof(tsk_id_t));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memcpy(time, source_table.time, num_rows * sizeof(double));
derived_state = tsk_malloc(num_rows * test_derived_state_length * sizeof(char));
CU_ASSERT_FATAL(derived_state != NULL);
tsk_memcpy(derived_state, source_table.derived_state,
num_rows * test_derived_state_length * sizeof(char));
derived_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(derived_state_offset != NULL);
tsk_memcpy(derived_state_offset, source_table.derived_state_offset,
(num_rows + 1) * sizeof(tsk_size_t));
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(
metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_mutation_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_mutation_table_add_row(&table, 1, 1, 1, 1, test_derived_state,
test_derived_state_length, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_mutation_table_takeset_columns(&table, num_rows, NULL, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, NULL, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
// Parent and time not tested as they have deafults
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,
NULL, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,
derived_state, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Check error on bad derived_state offset */
derived_state_offset[0] = 1;
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_mutation_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
// Re init non-optional arrays
site = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(site != NULL);
tsk_memcpy(site, source_table.site, num_rows * sizeof(tsk_id_t));
node = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(node != NULL);
tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));
derived_state = tsk_malloc(num_rows * test_derived_state_length * sizeof(char));
CU_ASSERT_FATAL(derived_state != NULL);
tsk_memcpy(derived_state, source_table.derived_state,
num_rows * test_derived_state_length * sizeof(char));
derived_state_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(derived_state_offset != NULL);
tsk_memcpy(derived_state_offset, source_table.derived_state_offset,
(num_rows + 1) * sizeof(tsk_size_t));
/* if metadata and offset are both null, all entries are zero length, if parent or
* time are NULL they default to null values*/
num_rows = 10;
ret = tsk_mutation_table_takeset_columns(&table, num_rows, site, node, NULL, NULL,
derived_state, derived_state_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(tsk_memcmp(table.parent, neg_ones, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time, unknown_times, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_mutation_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_mutation_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_mutation_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_mutation_table_t table;
tsk_mutation_t row;
const char *derived_state = "XYZ";
const char *metadata = "ABC";
ret = tsk_mutation_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id
= tsk_mutation_table_add_row(&table, 0, 1, 2, 3, derived_state, 1, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&table, 1, 2, 3, 4, derived_state, 2, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&table, 2, 3, 4, 5, derived_state, 3, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_mutation_table_update_row(
&table, 0, 1, 2, 3, 4, &derived_state[1], 1, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 1);
CU_ASSERT_EQUAL_FATAL(row.node, 2);
CU_ASSERT_EQUAL_FATAL(row.parent, 3);
CU_ASSERT_EQUAL_FATAL(row.time, 4);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 1);
CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_mutation_table_update_row(&table, 0, row.site + 1, row.node + 1,
row.parent + 1, row.time + 1, row.derived_state, row.derived_state_length,
row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 2);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.time, 5);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 1);
CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_mutation_table_update_row(&table, 0, row.site, row.node, row.parent,
row.time, row.derived_state, row.derived_state_length, row.metadata,
row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 2);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.time, 5);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 1);
CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_mutation_table_update_row(&table, 0, row.site, row.node, row.parent,
row.time, NULL, 0, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 2);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.time, 5);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_mutation_table_update_row(
&table, 0, 2, 3, 4, 5, derived_state, 3, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 2);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.time, 5);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 3);
CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.derived_state[2], 'Z');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_mutation_table_update_row(&table, 1, 5, 6, 7, 8, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 5);
CU_ASSERT_EQUAL_FATAL(row.node, 6);
CU_ASSERT_EQUAL_FATAL(row.parent, 7);
CU_ASSERT_EQUAL_FATAL(row.time, 8);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_mutation_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.site, 2);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.parent, 4);
CU_ASSERT_EQUAL_FATAL(row.time, 5);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 3);
CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.derived_state[2], 'Z');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_mutation_table_update_row(&table, 3, 0, 0, 0, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
tsk_mutation_table_free(&table);
}
static void
test_mutation_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_mutation_table_t source, t1, t2;
tsk_mutation_t row;
const char *derived_state = "XYZ";
const char *metadata = "ABC";
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_mutation_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&source, 0, 1, -1, 3.0, derived_state, 1, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&source, 1, 2, -1, 4.0, derived_state, 2, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&source, 2, 3, 0, 5.0, derived_state, 3, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_mutation_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &source, 0));
ret = tsk_mutation_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_mutation_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_mutation_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_mutation_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.site, 1);
CU_ASSERT_EQUAL_FATAL(row.node, 2);
CU_ASSERT_EQUAL_FATAL(row.parent, -1);
CU_ASSERT_EQUAL_FATAL(row.time, 4);
CU_ASSERT_EQUAL_FATAL(row.derived_state_length, 2);
CU_ASSERT_EQUAL_FATAL(row.derived_state[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.derived_state[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_mutation_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_mutation_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_mutation_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_mutation_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t2, 0));
tsk_mutation_table_free(&t1);
tsk_mutation_table_free(&t2);
}
tsk_mutation_table_free(&source);
}
static void
test_mutation_table_keep_rows_parent_references(void)
{
int ret;
tsk_id_t ret_id;
tsk_mutation_table_t source, t;
tsk_bool_t keep[4] = { 1, 1, 1, 1 };
tsk_id_t id_map[4];
ret = tsk_mutation_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(&source, 0, 1, -1, 3.0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(&source, 1, 2, -1, 4.0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(&source, 2, 3, 1, 5.0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(&source, 3, 4, 1, 6.0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_mutation_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* OOB errors */
t.parent[0] = -2;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);
t.parent[0] = 4;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);
/* But ignored if row is not kept */
keep[0] = false;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_mutation_table_free(&t);
ret = tsk_mutation_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Try to remove referenced row 1 */
keep[0] = true;
keep[1] = false;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0));
tsk_mutation_table_free(&t);
ret = tsk_mutation_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* remove unreferenced row 0 */
keep[0] = false;
keep[1] = true;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.num_rows, 3);
CU_ASSERT_EQUAL_FATAL(t.parent[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.parent[1], 0);
CU_ASSERT_EQUAL_FATAL(t.parent[2], 0);
tsk_mutation_table_free(&t);
/* Check that we don't change the table in error cases. */
source.parent[3] = -2;
ret = tsk_mutation_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = true;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0));
tsk_mutation_table_free(&t);
/* Check that we don't change the table in error cases. */
source.parent[3] = 0;
ret = tsk_mutation_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = false;
ret = tsk_mutation_table_keep_rows(&t, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&source, &t, 0));
tsk_mutation_table_free(&t);
tsk_mutation_table_free(&source);
}
static void
test_migration_table(void)
{
int ret;
tsk_id_t ret_id;
tsk_migration_table_t table, table2;
tsk_size_t num_rows = 100;
tsk_id_t j;
tsk_id_t *node;
tsk_id_t *source, *dest;
double *left, *right, *time;
tsk_migration_t migration, migration2;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
char metadata_copy[test_metadata_length + 1];
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
metadata_copy[test_metadata_length] = '\0';
ret = tsk_migration_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_migration_table_set_max_rows_increment(&table, 1);
tsk_migration_table_print_state(&table, _devnull);
ret = tsk_migration_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_migration_table_add_row(&table, (double) j, (double) j, j, j, j,
(double) j, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
CU_ASSERT_EQUAL(table.left[j], j);
CU_ASSERT_EQUAL(table.right[j], j);
CU_ASSERT_EQUAL(table.node[j], j);
CU_ASSERT_EQUAL(table.source[j], j);
CU_ASSERT_EQUAL(table.dest[j], j);
CU_ASSERT_EQUAL(table.time[j], j);
CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);
CU_ASSERT_EQUAL(
table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);
CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);
/* check the metadata */
tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],
test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);
ret = tsk_migration_table_get_row(&table, (tsk_id_t) j, &migration);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(migration.id, j);
CU_ASSERT_EQUAL(migration.left, j);
CU_ASSERT_EQUAL(migration.right, j);
CU_ASSERT_EQUAL(migration.node, j);
CU_ASSERT_EQUAL(migration.source, j);
CU_ASSERT_EQUAL(migration.dest, j);
CU_ASSERT_EQUAL(migration.time, j);
CU_ASSERT_EQUAL(migration.metadata_length, test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(migration.metadata, test_metadata, test_metadata_length);
}
ret = tsk_migration_table_get_row(&table, (tsk_id_t) num_rows, &migration);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
tsk_migration_table_print_state(&table, _devnull);
ret = tsk_migration_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
num_rows *= 2;
left = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(left != NULL);
tsk_memset(left, 1, num_rows * sizeof(double));
right = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(right != NULL);
tsk_memset(right, 2, num_rows * sizeof(double));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memset(time, 3, num_rows * sizeof(double));
node = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(node != NULL);
tsk_memset(node, 4, num_rows * sizeof(tsk_id_t));
source = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(source != NULL);
tsk_memset(source, 5, num_rows * sizeof(tsk_id_t));
dest = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(dest != NULL);
tsk_memset(dest, 6, num_rows * sizeof(tsk_id_t));
metadata = tsk_malloc(num_rows * sizeof(char));
tsk_memset(metadata, 'a', num_rows * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {
metadata_offset[j] = (tsk_size_t) j;
}
ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
/* Append another num_rows */
ret = tsk_migration_table_append_columns(&table, num_rows, left, right, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.source + num_rows, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.dest + num_rows, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);
/* Truncate back to num_rows */
ret = tsk_migration_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
/* Test equality with and without metadata */
tsk_migration_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_migration_table_free(&table2);
ret = tsk_migration_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* inputs cannot be NULL */
ret = tsk_migration_table_set_columns(&table, num_rows, NULL, right, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(&table, num_rows, left, NULL, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(&table, num_rows, left, right, NULL, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, NULL,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, source,
NULL, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(&table, num_rows, left, right, node, source,
dest, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(
&table, num_rows, left, right, node, source, dest, time, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_set_columns(
&table, num_rows, left, right, node, source, dest, time, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_migration_table_clear(&table);
CU_ASSERT_EQUAL(table.num_rows, 0);
/* if metadata and metadata_offset are both null, all metadatas are zero length */
num_rows = 10;
tsk_memset(metadata_offset, 0, (num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_migration_table_set_columns(
&table, num_rows, left, right, node, source, dest, time, NULL, NULL);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_migration_table_append_columns(
&table, num_rows, left, right, node, source, dest, time, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.left, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.left + num_rows, left, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.right, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.right + num_rows, right, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.time, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.time + num_rows, time, num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.node, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.node + num_rows, node, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.source, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.source + num_rows, source, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.dest, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.dest + num_rows, dest, num_rows * sizeof(tsk_id_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_migration_table_print_state(&table, _devnull);
ret = tsk_migration_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Test extend method */
ret = tsk_migration_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_migration_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
ret = tsk_migration_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_migration_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_migration_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_migration_table_set_columns(&table2, num_rows, left, right, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));
ret = tsk_migration_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_migration_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_migration_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
ret = tsk_migration_table_set_columns(&table2, num_rows, left, right, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_migration_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));
ret = tsk_migration_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_row_subset; j++) {
ret = tsk_migration_table_get_row(&table, j, &migration);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_get_row(&table2, row_subset[j], &migration2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(migration.source, migration2.source);
CU_ASSERT_EQUAL(migration.dest, migration2.dest);
CU_ASSERT_EQUAL(migration.node, migration2.node);
CU_ASSERT_EQUAL(migration.left, migration2.left);
CU_ASSERT_EQUAL(migration.right, migration2.right);
CU_ASSERT_EQUAL(migration.time, migration2.time);
CU_ASSERT_EQUAL(migration.metadata_length, migration2.metadata_length);
CU_ASSERT_EQUAL(tsk_memcmp(migration.metadata, migration2.metadata,
migration.metadata_length * sizeof(*migration.metadata)),
0);
}
ret = tsk_migration_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
tsk_migration_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
tsk_migration_table_copy(&table, &table2, TSK_NO_INIT);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
tsk_migration_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, 0));
tsk_migration_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_FALSE(tsk_migration_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(tsk_migration_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_migration_table_clear(&table);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_migration_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
tsk_migration_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(left);
free(right);
free(time);
free(node);
free(source);
free(dest);
free(metadata);
free(metadata_offset);
}
static void
test_migration_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_migration_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
double *left;
double *right;
tsk_id_t *node;
tsk_id_t *source;
tsk_id_t *dest;
double *time;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
/* Make a table to copy from */
ret = tsk_migration_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_migration_table_add_row(&source_table, (double) j, (double) j + 1,
j + 2, j + 3, j + 4, (double) j + 5, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
left = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(left != NULL);
tsk_memcpy(left, source_table.left, num_rows * sizeof(double));
right = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(right != NULL);
tsk_memcpy(right, source_table.right, num_rows * sizeof(double));
node = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(node != NULL);
tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));
source = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(source != NULL);
tsk_memcpy(source, source_table.source, num_rows * sizeof(tsk_id_t));
dest = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(dest != NULL);
tsk_memcpy(dest, source_table.dest, num_rows * sizeof(tsk_id_t));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memcpy(time, source_table.time, num_rows * sizeof(double));
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(
metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_migration_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_migration_table_add_row(
&table, 1, 1, 1, 1, 1, 1, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node,
source, dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_migration_table_takeset_columns(&table, num_rows, NULL, right, node,
source, dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(&table, num_rows, left, NULL, node, source,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, NULL,
source, dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node, NULL,
dest, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node,
source, NULL, time, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(&table, num_rows, left, right, node,
source, dest, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(
&table, num_rows, left, right, node, source, dest, time, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_migration_table_takeset_columns(
&table, num_rows, left, right, node, source, dest, time, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_migration_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
// Re init non-optional arrays
left = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(left != NULL);
tsk_memcpy(left, source_table.left, num_rows * sizeof(double));
right = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(right != NULL);
tsk_memcpy(right, source_table.right, num_rows * sizeof(double));
node = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(node != NULL);
tsk_memcpy(node, source_table.node, num_rows * sizeof(tsk_id_t));
source = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(source != NULL);
tsk_memcpy(source, source_table.source, num_rows * sizeof(tsk_id_t));
dest = tsk_malloc(num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(dest != NULL);
tsk_memcpy(dest, source_table.dest, num_rows * sizeof(tsk_id_t));
time = tsk_malloc(num_rows * sizeof(double));
CU_ASSERT_FATAL(time != NULL);
tsk_memcpy(time, source_table.time, num_rows * sizeof(double));
/* if metadata and offset are both null, all entries are zero length */
num_rows = 10;
ret = tsk_migration_table_takeset_columns(
&table, num_rows, left, right, node, source, dest, time, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_migration_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_migration_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_migration_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_migration_table_t table;
tsk_migration_t row;
const char *metadata = "ABC";
ret = tsk_migration_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(&table, 0, 1.0, 2, 3, 4, 5, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_migration_table_add_row(&table, 1, 2.0, 3, 4, 5, 6, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_migration_table_add_row(&table, 2, 3.0, 4, 5, 6, 7, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_migration_table_update_row(&table, 0, 1, 2.0, 3, 4, 5, 6, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 1);
CU_ASSERT_EQUAL_FATAL(row.right, 2.0);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.source, 4);
CU_ASSERT_EQUAL_FATAL(row.dest, 5);
CU_ASSERT_EQUAL_FATAL(row.time, 6);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_migration_table_update_row(&table, 0, row.left + 1, row.right + 1,
row.node + 1, row.source + 1, row.dest + 1, row.time + 1, row.metadata,
row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 2);
CU_ASSERT_EQUAL_FATAL(row.right, 3.0);
CU_ASSERT_EQUAL_FATAL(row.node, 4);
CU_ASSERT_EQUAL_FATAL(row.source, 5);
CU_ASSERT_EQUAL_FATAL(row.dest, 6);
CU_ASSERT_EQUAL_FATAL(row.time, 7);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_migration_table_update_row(&table, 0, 0, 0, 0, 0, 0, 0, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 0);
CU_ASSERT_EQUAL_FATAL(row.right, 0);
CU_ASSERT_EQUAL_FATAL(row.node, 0);
CU_ASSERT_EQUAL_FATAL(row.source, 0);
CU_ASSERT_EQUAL_FATAL(row.dest, 0);
CU_ASSERT_EQUAL_FATAL(row.time, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_migration_table_update_row(&table, 1, 0, 0, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 0);
CU_ASSERT_EQUAL_FATAL(row.right, 0);
CU_ASSERT_EQUAL_FATAL(row.node, 0);
CU_ASSERT_EQUAL_FATAL(row.source, 0);
CU_ASSERT_EQUAL_FATAL(row.dest, 0);
CU_ASSERT_EQUAL_FATAL(row.time, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_migration_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.left, 2);
CU_ASSERT_EQUAL_FATAL(row.right, 3.0);
CU_ASSERT_EQUAL_FATAL(row.node, 4);
CU_ASSERT_EQUAL_FATAL(row.source, 5);
CU_ASSERT_EQUAL_FATAL(row.dest, 6);
CU_ASSERT_EQUAL_FATAL(row.time, 7);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_migration_table_update_row(&table, 3, 0, 0, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
tsk_migration_table_free(&table);
}
static void
test_migration_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_migration_table_t source, t1, t2;
tsk_migration_t row;
const char *metadata = "ABC";
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_migration_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(&source, 0, 1.0, 2, 3, 4, 5, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_migration_table_add_row(&source, 1, 2.0, 3, 4, 5, 6, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_migration_table_add_row(&source, 2, 3.0, 4, 5, 6, 7, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_migration_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &source, 0));
ret = tsk_migration_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_migration_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_migration_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_migration_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.left, 1);
CU_ASSERT_EQUAL_FATAL(row.right, 2);
CU_ASSERT_EQUAL_FATAL(row.node, 3);
CU_ASSERT_EQUAL_FATAL(row.source, 4);
CU_ASSERT_EQUAL_FATAL(row.dest, 5);
CU_ASSERT_EQUAL_FATAL(row.time, 6);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_migration_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_migration_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_migration_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_migration_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_migration_table_equals(&source, &t2, 0));
tsk_migration_table_free(&t1);
tsk_migration_table_free(&t2);
}
tsk_migration_table_free(&source);
}
static void
test_individual_table(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_individual_table_t table, table2;
tsk_size_t num_rows = 100;
tsk_id_t j;
tsk_size_t k;
tsk_flags_t *flags;
double *location;
tsk_id_t *parents;
char *metadata;
tsk_size_t *metadata_offset;
tsk_size_t *parents_offset;
tsk_size_t *location_offset;
tsk_individual_t individual;
tsk_individual_t individual2;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
char metadata_copy[test_metadata_length + 1];
tsk_size_t spatial_dimension = 2;
tsk_size_t num_parents = 3;
double test_location[spatial_dimension];
tsk_id_t test_parents[num_parents];
tsk_size_t zeros[num_rows + 1];
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
for (k = 0; k < spatial_dimension; k++) {
test_location[k] = (double) k;
}
for (k = 0; k < num_parents; k++) {
test_parents[k] = (tsk_id_t) k + 42;
}
metadata_copy[test_metadata_length] = '\0';
ret = tsk_individual_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_individual_table_set_max_rows_increment(&table, 1);
tsk_individual_table_set_max_metadata_length_increment(&table, 1);
tsk_individual_table_set_max_location_length_increment(&table, 1);
tsk_individual_table_set_max_parents_length_increment(&table, 1);
tsk_individual_table_print_state(&table, _devnull);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_individual_table_add_row(&table, (tsk_flags_t) j, test_location,
spatial_dimension, test_parents, num_parents, test_metadata,
test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
CU_ASSERT_EQUAL(table.flags[j], (tsk_flags_t) j);
for (k = 0; k < spatial_dimension; k++) {
test_location[k] = (double) k;
CU_ASSERT_EQUAL(
table.location[spatial_dimension * (size_t) j + k], test_location[k]);
}
CU_ASSERT_EQUAL(
table.metadata_length, (tsk_size_t) (j + 1) * test_metadata_length);
CU_ASSERT_EQUAL(table.metadata_offset[j + 1], table.metadata_length);
/* check the metadata */
tsk_memcpy(metadata_copy, table.metadata + table.metadata_offset[j],
test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(metadata_copy, test_metadata, test_metadata_length);
ret = tsk_individual_table_get_row(&table, (tsk_id_t) j, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(individual.id, j);
CU_ASSERT_EQUAL(individual.flags, (tsk_flags_t) j);
CU_ASSERT_EQUAL(individual.location_length, spatial_dimension);
CU_ASSERT_NSTRING_EQUAL(
individual.location, test_location, spatial_dimension * sizeof(double));
CU_ASSERT_EQUAL(individual.metadata_length, test_metadata_length);
CU_ASSERT_NSTRING_EQUAL(
individual.metadata, test_metadata, test_metadata_length);
}
/* Test equality with and without metadata */
tsk_individual_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_individual_table_free(&table2);
ret = tsk_individual_table_get_row(&table, (tsk_id_t) num_rows, &individual);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_individual_table_print_state(&table, _devnull);
tsk_individual_table_clear(&table);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
num_rows *= 2;
flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));
CU_ASSERT_FATAL(flags != NULL);
for (k = 0; k < num_rows; k++) {
flags[k] = (tsk_flags_t) (k + num_rows);
}
location = tsk_malloc(spatial_dimension * num_rows * sizeof(double));
CU_ASSERT_FATAL(location != NULL);
for (k = 0; k < spatial_dimension * num_rows; k++) {
location[k] = (double) (k + (num_rows * 2));
}
location_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(location_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {
location_offset[j] = (tsk_size_t) j * spatial_dimension;
}
parents = tsk_malloc(num_parents * num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(parents != NULL);
for (k = 0; k < num_parents * num_rows; k++) {
parents[k] = (tsk_id_t) (k + (num_rows * 4));
}
parents_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(parents_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {
parents_offset[j] = (tsk_size_t) j * num_parents;
}
metadata = tsk_malloc(num_rows * sizeof(char));
for (k = 0; k < num_rows; k++) {
metadata[k] = (char) ((k % 58) + 65);
}
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows + 1; j++) {
metadata_offset[j] = (tsk_size_t) j;
}
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location_offset, location_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parents_offset, parents_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.location_length, spatial_dimension * num_rows);
CU_ASSERT_EQUAL(table.parents_length, num_parents * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
tsk_individual_table_print_state(&table, _devnull);
/* Append another num_rows onto the end */
ret = tsk_individual_table_append_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.flags + num_rows, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location + spatial_dimension * num_rows, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parents + num_parents * num_rows, parents,
num_parents * num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);
CU_ASSERT_EQUAL(table.parents_length, 2 * num_parents * num_rows);
CU_ASSERT_EQUAL(table.location_length, 2 * spatial_dimension * num_rows);
tsk_individual_table_print_state(&table, _devnull);
ret = tsk_individual_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Truncate back to num_rows */
ret = tsk_individual_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location_offset, location_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parents_offset, parents_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset, metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.location_length, spatial_dimension * num_rows);
CU_ASSERT_EQUAL(table.parents_length, num_parents * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
tsk_individual_table_print_state(&table, _devnull);
ret = tsk_individual_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* flags can't be NULL */
ret = tsk_individual_table_set_columns(&table, num_rows, NULL, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* location and location offset must be simultaneously NULL or not */
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location, NULL,
parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_individual_table_set_columns(&table, num_rows, flags, NULL,
location_offset, NULL, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* parents and parents offset must be simultaneously NULL or not */
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,
location_offset, parents, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,
location_offset, NULL, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* if location and location_offset are both null, all locations are zero length */
num_rows = 10;
ret = tsk_individual_table_set_columns(
&table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.location_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.location_length, 0);
ret = tsk_individual_table_append_columns(
&table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.location_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location_offset + num_rows, zeros,
num_rows * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.location_length, 0);
tsk_individual_table_print_state(&table, _devnull);
ret = tsk_individual_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* if parents and parents_offset are both null, all parents are zero length */
num_rows = 10;
ret = tsk_individual_table_set_columns(
&table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)), 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.parents_length, 0);
ret = tsk_individual_table_append_columns(
&table, num_rows, flags, NULL, NULL, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parents_offset + num_rows, zeros,
num_rows * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.parents_length, 0);
tsk_individual_table_print_state(&table, _devnull);
ret = tsk_individual_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* if metadata and metadata_offset are both null, all metadatas are zero length */
num_rows = 10;
ret = tsk_individual_table_set_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, flags, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(double)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_individual_table_append_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, NULL, NULL);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.location + spatial_dimension * num_rows, location,
spatial_dimension * num_rows * sizeof(double)),
0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents, parents, num_parents * num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.parents + num_parents * num_rows, parents,
num_parents * num_rows * sizeof(tsk_id_t)),
0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_offset + num_rows, zeros,
num_rows * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_individual_table_print_state(&table, _devnull);
tsk_individual_table_dump_text(&table, _devnull);
/* Test extend method */
ret = tsk_individual_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_individual_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
ret = tsk_individual_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_individual_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_individual_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_individual_table_set_columns(&table2, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));
ret = tsk_individual_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_individual_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_individual_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
ret = tsk_individual_table_set_columns(&table2, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_individual_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));
ret = tsk_individual_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < num_row_subset; k++) {
ret = tsk_individual_table_get_row(&table, (tsk_id_t) k, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table2, row_subset[k], &individual2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(individual.flags, individual2.flags);
CU_ASSERT_EQUAL(individual.location_length, individual2.location_length);
CU_ASSERT_EQUAL(individual.parents_length, individual2.parents_length);
CU_ASSERT_EQUAL(individual.metadata_length, individual2.metadata_length);
CU_ASSERT_EQUAL(tsk_memcmp(individual.location, individual2.location,
individual.location_length * sizeof(*individual.location)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(individual.parents, individual2.parents,
individual.parents_length * sizeof(*individual.parents)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(individual.metadata, individual2.metadata,
individual.metadata_length * sizeof(*individual.metadata)),
0);
}
ret = tsk_individual_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
tsk_individual_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
tsk_individual_table_copy(&table, &table2, TSK_NO_INIT);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
tsk_individual_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_TRUE(tsk_individual_table_equals(&table, &table2, 0));
tsk_individual_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_FALSE(tsk_individual_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_individual_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_individual_table_clear(&table);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_individual_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_individual_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(flags);
free(location);
free(location_offset);
free(parents);
free(parents_offset);
free(metadata);
free(metadata_offset);
}
static void
test_individual_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_individual_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
tsk_size_t k;
tsk_flags_t *flags;
double *location;
tsk_id_t *parents;
char *metadata;
tsk_size_t *metadata_offset;
tsk_size_t *parents_offset;
tsk_size_t *location_offset;
tsk_size_t spatial_dimension = 2;
tsk_size_t num_parents = 3;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
double test_location[spatial_dimension];
tsk_id_t test_parents[num_parents];
tsk_size_t zeros[num_rows + 1];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
/* Make a table to copy from */
ret = tsk_individual_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < spatial_dimension; k++) {
test_location[k] = (double) k;
}
for (k = 0; k < num_parents; k++) {
test_parents[k] = (tsk_id_t) k + 42;
}
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_individual_table_add_row(&source_table, (tsk_flags_t) j,
test_location, spatial_dimension, test_parents, num_parents, test_metadata,
test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
flags = tsk_malloc(num_rows * sizeof(tsk_flags_t));
CU_ASSERT_FATAL(flags != NULL);
tsk_memcpy(flags, source_table.flags, num_rows * sizeof(tsk_flags_t));
location = tsk_malloc(spatial_dimension * num_rows * sizeof(double));
CU_ASSERT_FATAL(location != NULL);
tsk_memcpy(
location, source_table.location, spatial_dimension * num_rows * sizeof(double));
location_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(location_offset != NULL);
tsk_memcpy(location_offset, source_table.location_offset,
(num_rows + 1) * sizeof(tsk_size_t));
parents = tsk_malloc(num_parents * num_rows * sizeof(tsk_id_t));
CU_ASSERT_FATAL(parents != NULL);
tsk_memcpy(parents, source_table.parents, num_parents * num_rows * sizeof(tsk_id_t));
parents_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(parents_offset != NULL);
tsk_memcpy(parents_offset, source_table.parents_offset,
(num_rows + 1) * sizeof(tsk_size_t));
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(
metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_individual_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_individual_table_add_row(&table, (tsk_flags_t) 1, test_location,
spatial_dimension, test_parents, num_parents, test_metadata,
test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
/* location and location offset must be simultaneously NULL or not */
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location, NULL,
parents, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, NULL,
location_offset, NULL, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* parents and parents offset must be simultaneously NULL or not */
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,
location_offset, parents, NULL, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,
location_offset, NULL, parents_offset, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* metadata and metadata offset must be simultaneously NULL or not */
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_individual_table_takeset_columns(&table, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_individual_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
/* if ragged array and offset are both null, all entries are zero length,
NULL flags mean all zero entries */
num_rows = 10;
ret = tsk_individual_table_takeset_columns(
&table, num_rows, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
CU_ASSERT_EQUAL(tsk_memcmp(table.flags, zeros, num_rows * sizeof(tsk_flags_t)), 0);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(
tsk_memcmp(table.location_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.location_length, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.parents_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)), 0);
CU_ASSERT_EQUAL(table.parents_length, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_offset, zeros, (num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
ret = tsk_individual_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_individual_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_individual_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_individual_table_t table;
tsk_individual_t row;
double location[] = { 0, 1, 2 };
tsk_id_t parents[] = { 0, 1, 2 };
const char *metadata = "ABC";
ret = tsk_individual_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id
= tsk_individual_table_add_row(&table, 0, location, 1, parents, 1, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_individual_table_add_row(&table, 1, location, 2, parents, 2, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_individual_table_add_row(&table, 2, location, 3, parents, 3, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_individual_table_update_row(
&table, 0, 1, &location[1], 1, &parents[1], 1, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 1);
CU_ASSERT_EQUAL_FATAL(row.location_length, 1);
CU_ASSERT_EQUAL_FATAL(row.location[0], 1.0);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);
CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_individual_table_update_row(&table, 0, row.flags + 1, row.location,
row.location_length, row.parents, row.parents_length, row.metadata,
row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.location_length, 1);
CU_ASSERT_EQUAL_FATAL(row.location[0], 1.0);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);
CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_individual_table_update_row(&table, 0, row.flags, location, 1, row.parents,
row.parents_length, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.location_length, 1);
CU_ASSERT_EQUAL_FATAL(row.location[0], 0.0);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);
CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_individual_table_update_row(&table, 0, row.flags, NULL, 0, row.parents,
row.parents_length, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.location_length, 0);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 1);
CU_ASSERT_EQUAL_FATAL(row.parents[0], 1);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_individual_table_update_row(
&table, 0, 2, location, 3, parents, 3, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.location_length, 3);
CU_ASSERT_EQUAL_FATAL(row.location[0], 0);
CU_ASSERT_EQUAL_FATAL(row.location[1], 1);
CU_ASSERT_EQUAL_FATAL(row.location[2], 2);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 3);
CU_ASSERT_EQUAL_FATAL(row.parents[0], 0);
CU_ASSERT_EQUAL_FATAL(row.parents[1], 1);
CU_ASSERT_EQUAL_FATAL(row.parents[2], 2);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_individual_table_update_row(&table, 1, 5, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 5);
CU_ASSERT_EQUAL_FATAL(row.location_length, 0);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_individual_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.flags, 2);
CU_ASSERT_EQUAL_FATAL(row.location_length, 3);
CU_ASSERT_EQUAL_FATAL(row.location[0], 0);
CU_ASSERT_EQUAL_FATAL(row.location[1], 1);
CU_ASSERT_EQUAL_FATAL(row.location[2], 2);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 3);
CU_ASSERT_EQUAL_FATAL(row.parents[0], 0);
CU_ASSERT_EQUAL_FATAL(row.parents[1], 1);
CU_ASSERT_EQUAL_FATAL(row.parents[2], 2);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_individual_table_update_row(&table, 3, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_individual_table_free(&table);
}
static void
test_individual_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_individual_t row;
double location[] = { 0, 1, 2 };
tsk_id_t parents[] = { -1, 1, -1 };
const char *metadata = "ABC";
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t indexes[] = { 0, 1, 2 };
tsk_id_t id_map[3];
tsk_individual_table_t source, t1, t2;
tsk_size_t j;
ret = tsk_individual_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id
= tsk_individual_table_add_row(&source, 0, location, 1, parents, 1, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_individual_table_add_row(&source, 1, location, 2, parents, 2, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_individual_table_add_row(&source, 2, location, 3, parents, 3, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_individual_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &source, 0));
ret = tsk_individual_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_individual_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_individual_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_individual_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.flags, 1);
CU_ASSERT_EQUAL_FATAL(row.parents_length, 2);
CU_ASSERT_EQUAL_FATAL(row.parents[0], -1);
CU_ASSERT_EQUAL_FATAL(row.parents[1], 0);
CU_ASSERT_EQUAL_FATAL(row.location_length, 2);
CU_ASSERT_EQUAL_FATAL(row.location[0], 0);
CU_ASSERT_EQUAL_FATAL(row.location[1], 1);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_individual_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_individual_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_individual_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_individual_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t2, 0));
tsk_individual_table_free(&t1);
tsk_individual_table_free(&t2);
}
tsk_individual_table_free(&source);
}
static void
test_individual_table_keep_rows_parent_references(void)
{
int ret;
tsk_id_t ret_id;
tsk_individual_table_t source, t;
tsk_bool_t keep[] = { 1, 1, 1, 1 };
tsk_id_t parents[] = { -1, 1, 2 };
tsk_id_t id_map[4];
ret = tsk_individual_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 3, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(&source, 0, NULL, 0, parents, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_individual_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* OOB errors */
t.parents[0] = -2;
ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);
t.parents[0] = 4;
ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(t.num_rows, 4);
/* But ignored if row is not kept */
keep[0] = false;
ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_individual_table_free(&t);
ret = tsk_individual_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Try to remove referenced row 2 */
keep[0] = true;
keep[2] = false;
ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);
CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0));
tsk_individual_table_free(&t);
ret = tsk_individual_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* remove unreferenced row 0 */
keep[0] = false;
keep[2] = true;
ret = tsk_individual_table_keep_rows(&t, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.num_rows, 3);
CU_ASSERT_EQUAL_FATAL(t.parents[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.parents[1], 0);
CU_ASSERT_EQUAL_FATAL(t.parents[2], 1);
tsk_individual_table_free(&t);
/* Check that we don't change the table in error cases. */
source.parents[1] = -2;
ret = tsk_individual_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = true;
ret = tsk_individual_table_keep_rows(&t, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0));
tsk_individual_table_free(&t);
/* Check that we don't change the table in error cases. */
source.parents[1] = 0;
ret = tsk_individual_table_copy(&source, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = false;
ret = tsk_individual_table_keep_rows(&t, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);
CU_ASSERT_TRUE(tsk_individual_table_equals(&source, &t, 0));
tsk_individual_table_free(&t);
tsk_individual_table_free(&source);
}
static void
test_population_table(void)
{
int ret;
tsk_id_t ret_id;
tsk_population_table_t table, table2;
tsk_size_t num_rows = 100;
tsk_size_t max_len = 20;
tsk_size_t k, len;
tsk_id_t j;
char *metadata;
char c[max_len + 1];
tsk_size_t *metadata_offset;
tsk_population_t population, population2;
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
for (j = 0; j < (tsk_id_t) max_len; j++) {
c[j] = (char) ('A' + j);
}
ret = tsk_population_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_population_table_set_max_rows_increment(&table, 1);
tsk_population_table_set_max_metadata_length_increment(&table, 1);
tsk_population_table_print_state(&table, _devnull);
ret = tsk_population_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Adding zero length metadata with NULL should be fine */
ret_id = tsk_population_table_add_row(&table, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
CU_ASSERT_EQUAL(table.num_rows, 1);
CU_ASSERT_EQUAL(table.metadata_offset[0], 0);
CU_ASSERT_EQUAL(table.metadata_offset[1], 0);
tsk_population_table_clear(&table);
CU_ASSERT_EQUAL(table.num_rows, 0);
len = 0;
for (j = 0; j < (tsk_id_t) num_rows; j++) {
k = TSK_MIN((tsk_size_t) j + 1, max_len);
ret_id = tsk_population_table_add_row(&table, c, k);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
CU_ASSERT_EQUAL(table.metadata_offset[j], len);
CU_ASSERT_EQUAL(table.num_rows, (tsk_size_t) j + 1);
len += k;
CU_ASSERT_EQUAL(table.metadata_offset[j + 1], len);
CU_ASSERT_EQUAL(table.metadata_length, len);
ret = tsk_population_table_get_row(&table, (tsk_id_t) j, &population);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(population.id, j);
CU_ASSERT_EQUAL(population.metadata_length, k);
CU_ASSERT_NSTRING_EQUAL(population.metadata, c, k);
}
/* Test equality with and without metadata */
tsk_population_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the metadata values */
table2.metadata[0] = 0;
CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Change the last metadata entry */
table2.metadata_offset[table2.num_rows]
= table2.metadata_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
/* Delete all metadata */
tsk_memset(table2.metadata_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.metadata_offset));
CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_population_table_free(&table2);
ret = tsk_population_table_get_row(&table, (tsk_id_t) num_rows, &population);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_population_table_print_state(&table, _devnull);
ret = tsk_population_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
num_rows *= 2;
metadata = tsk_malloc(num_rows * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
metadata[j] = 'M';
metadata_offset[j] = (tsk_size_t) j;
}
metadata_offset[num_rows] = num_rows;
ret = tsk_population_table_set_columns(&table, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
/* Append another num_rows */
ret = tsk_population_table_append_columns(
&table, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata + num_rows, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.metadata_length, 2 * num_rows);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
/* Truncate back to num_rows */
ret = tsk_population_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata, metadata, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.metadata_length, num_rows);
ret = tsk_population_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* Metadata = NULL gives an error */
ret = tsk_population_table_set_columns(&table, num_rows, NULL, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_population_table_set_columns(&table, num_rows, metadata, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_population_table_set_columns(&table, num_rows, NULL, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Test extend method */
ret = tsk_population_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_population_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
ret = tsk_population_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_population_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_population_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_population_table_set_columns(&table2, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));
ret = tsk_population_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_population_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_population_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
ret = tsk_population_table_set_columns(&table2, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_population_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));
ret = tsk_population_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (k = 0; k < num_row_subset; k++) {
ret = tsk_population_table_get_row(&table, (tsk_id_t) k, &population);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_get_row(&table2, row_subset[k], &population2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(population.metadata_length, population2.metadata_length);
CU_ASSERT_EQUAL(tsk_memcmp(population.metadata, population2.metadata,
population.metadata_length * sizeof(*population.metadata)),
0);
}
/* Test for bad offsets */
metadata_offset[0] = 1;
ret = tsk_population_table_set_columns(&table, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
metadata_offset[0] = 0;
metadata_offset[num_rows] = 0;
ret = tsk_population_table_set_columns(&table, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
ret = tsk_population_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(table.metadata_schema_length, 0);
CU_ASSERT_EQUAL(table.metadata_schema, NULL);
const char *example = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example);
const char *example2 = "A different example 🎄🌳🌴🌲🎋";
tsk_size_t example2_length = (tsk_size_t) strlen(example);
tsk_population_table_set_metadata_schema(&table, example, example_length);
CU_ASSERT_EQUAL(table.metadata_schema_length, example_length);
CU_ASSERT_EQUAL(tsk_memcmp(table.metadata_schema, example, example_length), 0);
tsk_population_table_copy(&table, &table2, TSK_NO_INIT);
CU_ASSERT_EQUAL(table.metadata_schema_length, table2.metadata_schema_length);
CU_ASSERT_EQUAL(
tsk_memcmp(table.metadata_schema, table2.metadata_schema, example_length), 0);
tsk_population_table_set_metadata_schema(&table2, example, example_length);
CU_ASSERT_TRUE(tsk_population_table_equals(&table, &table2, 0));
tsk_population_table_set_metadata_schema(&table2, example2, example2_length);
CU_ASSERT_FALSE(tsk_population_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_population_table_equals(&table, &table2, TSK_CMP_IGNORE_METADATA));
tsk_population_table_clear(&table);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.metadata_length, 0);
tsk_population_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
tsk_population_table_free(&table2);
CU_ASSERT_EQUAL(ret, 0);
free(metadata);
free(metadata_offset);
}
static void
test_population_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_population_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
char *metadata;
tsk_size_t *metadata_offset;
const char *test_metadata = "test";
tsk_size_t test_metadata_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
/* Make a table to copy from */
ret = tsk_population_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_population_table_add_row(
&source_table, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
metadata = tsk_malloc(num_rows * test_metadata_length * sizeof(char));
CU_ASSERT_FATAL(metadata != NULL);
tsk_memcpy(
metadata, source_table.metadata, num_rows * test_metadata_length * sizeof(char));
metadata_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(metadata_offset != NULL);
tsk_memcpy(metadata_offset, source_table.metadata_offset,
(num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_population_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_population_table_add_row(&table, test_metadata, test_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_population_table_takeset_columns(
&table, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
ret = tsk_population_table_takeset_columns(&table, num_rows, NULL, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_population_table_takeset_columns(&table, num_rows, metadata, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_population_table_takeset_columns(&table, num_rows, NULL, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Test bad offset */
metadata_offset[0] = 1;
ret = tsk_population_table_takeset_columns(
&table, num_rows, metadata, metadata_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_population_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
ret = tsk_population_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_population_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_population_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_population_table_t table;
tsk_population_t row;
const char *metadata = "ABC";
ret = tsk_population_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_population_table_add_row(&table, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&table, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&table, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_population_table_update_row(&table, 0, &metadata[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_population_table_update_row(&table, 0, row.metadata, row.metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 1);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'B');
ret = tsk_population_table_update_row(&table, 0, metadata, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_population_table_update_row(&table, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 0);
ret = tsk_population_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 3);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.metadata[2], 'C');
ret = tsk_population_table_update_row(&table, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_population_table_free(&table);
}
static void
test_population_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_population_table_t source, t1, t2;
tsk_population_t row;
const char *metadata = "ABC";
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t id_map[3];
tsk_id_t indexes[] = { 0, 1, 2 };
ret = tsk_population_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_population_table_add_row(&source, metadata, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&source, metadata, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&source, metadata, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_population_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &source, 0));
ret = tsk_population_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_population_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_population_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_population_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.metadata_length, 2);
CU_ASSERT_EQUAL_FATAL(row.metadata[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.metadata[1], 'B');
tsk_population_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_population_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_population_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_population_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_population_table_equals(&source, &t2, 0));
tsk_population_table_free(&t1);
tsk_population_table_free(&t2);
}
tsk_population_table_free(&source);
}
static void
test_provenance_table(void)
{
int ret;
tsk_id_t ret_id;
tsk_provenance_table_t table, table2;
tsk_size_t num_rows = 100;
tsk_size_t j;
char *timestamp;
tsk_size_t *timestamp_offset;
const char *test_timestamp = "2017-12-06T20:40:25+00:00";
tsk_size_t test_timestamp_length = (tsk_size_t) strlen(test_timestamp);
char timestamp_copy[test_timestamp_length + 1];
char *record;
tsk_size_t *record_offset;
const char *test_record = "{\"json\"=1234}";
tsk_size_t test_record_length = (tsk_size_t) strlen(test_record);
char record_copy[test_record_length + 1];
tsk_provenance_t provenance, provenance2;
tsk_id_t row_subset[6] = { 1, 9, 1, 0, 2, 2 };
tsk_size_t num_row_subset = 6;
timestamp_copy[test_timestamp_length] = '\0';
record_copy[test_record_length] = '\0';
ret = tsk_provenance_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_provenance_table_set_max_rows_increment(&table, 1);
tsk_provenance_table_set_max_timestamp_length_increment(&table, 1);
tsk_provenance_table_set_max_record_length_increment(&table, 1);
tsk_provenance_table_print_state(&table, _devnull);
ret = tsk_provenance_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_rows; j++) {
ret_id = tsk_provenance_table_add_row(&table, test_timestamp,
test_timestamp_length, test_record, test_record_length);
CU_ASSERT_EQUAL_FATAL(ret_id, (tsk_id_t) j);
CU_ASSERT_EQUAL(table.timestamp_length, (j + 1) * test_timestamp_length);
CU_ASSERT_EQUAL(table.timestamp_offset[j + 1], table.timestamp_length);
CU_ASSERT_EQUAL(table.record_length, (j + 1) * test_record_length);
CU_ASSERT_EQUAL(table.record_offset[j + 1], table.record_length);
/* check the timestamp */
tsk_memcpy(timestamp_copy, table.timestamp + table.timestamp_offset[j],
test_timestamp_length);
CU_ASSERT_NSTRING_EQUAL(timestamp_copy, test_timestamp, test_timestamp_length);
/* check the record */
tsk_memcpy(
record_copy, table.record + table.record_offset[j], test_record_length);
CU_ASSERT_NSTRING_EQUAL(record_copy, test_record, test_record_length);
ret = tsk_provenance_table_get_row(&table, (tsk_id_t) j, &provenance);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(provenance.id, (tsk_id_t) j);
CU_ASSERT_EQUAL(provenance.timestamp_length, test_timestamp_length);
CU_ASSERT_NSTRING_EQUAL(
provenance.timestamp, test_timestamp, test_timestamp_length);
CU_ASSERT_EQUAL(provenance.record_length, test_record_length);
CU_ASSERT_NSTRING_EQUAL(provenance.record, test_record, test_record_length);
}
ret = tsk_provenance_table_get_row(&table, (tsk_id_t) num_rows, &provenance);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
tsk_provenance_table_print_state(&table, _devnull);
ret = tsk_provenance_table_dump_text(&table, _devnull);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_provenance_table_clear(&table);
CU_ASSERT_EQUAL(table.num_rows, 0);
CU_ASSERT_EQUAL(table.timestamp_length, 0);
CU_ASSERT_EQUAL(table.record_length, 0);
num_rows *= 2;
timestamp = tsk_malloc(num_rows * sizeof(char));
tsk_memset(timestamp, 'a', num_rows * sizeof(char));
CU_ASSERT_FATAL(timestamp != NULL);
timestamp_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(timestamp_offset != NULL);
record = tsk_malloc(num_rows * sizeof(char));
tsk_memset(record, 'a', num_rows * sizeof(char));
CU_ASSERT_FATAL(record != NULL);
record_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(record_offset != NULL);
for (j = 0; j < num_rows + 1; j++) {
timestamp_offset[j] = j;
record_offset[j] = j;
}
ret = tsk_provenance_table_set_columns(
&table, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp, timestamp, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp_offset, timestamp_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.record, record, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.record_offset, record_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.timestamp_length, num_rows);
CU_ASSERT_EQUAL(table.record_length, num_rows);
tsk_provenance_table_print_state(&table, _devnull);
/* Append another num_rows onto the end */
ret = tsk_provenance_table_append_columns(
&table, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp, timestamp, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.timestamp + num_rows, timestamp, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.record, record, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(
tsk_memcmp(table.record + num_rows, record, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(table.num_rows, 2 * num_rows);
CU_ASSERT_EQUAL(table.timestamp_length, 2 * num_rows);
CU_ASSERT_EQUAL(table.record_length, 2 * num_rows);
tsk_provenance_table_print_state(&table, _devnull);
/* Truncate back to num_rows */
ret = tsk_provenance_table_truncate(&table, num_rows);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp, timestamp, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.timestamp_offset, timestamp_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(table.record, record, num_rows * sizeof(char)), 0);
CU_ASSERT_EQUAL(tsk_memcmp(table.record_offset, record_offset,
(num_rows + 1) * sizeof(tsk_size_t)),
0);
CU_ASSERT_EQUAL(table.num_rows, num_rows);
CU_ASSERT_EQUAL(table.timestamp_length, num_rows);
CU_ASSERT_EQUAL(table.record_length, num_rows);
tsk_provenance_table_print_state(&table, _devnull);
/* Test equality with and without timestamp */
tsk_provenance_table_copy(&table, &table2, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));
/* Change the timestamp values */
table2.timestamp[0] = 0;
CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));
/* Change the last timestamp entry */
table2.timestamp_offset[table2.num_rows]
= table2.timestamp_offset[table2.num_rows - 1];
CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));
/* Delete all timestamps */
tsk_memset(table2.timestamp_offset, 0,
(table2.num_rows + 1) * sizeof(*table2.timestamp_offset));
CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));
CU_ASSERT_TRUE(
tsk_provenance_table_equals(&table, &table2, TSK_CMP_IGNORE_TIMESTAMPS));
tsk_provenance_table_free(&table2);
/* Test equality with and without timestamp */
tsk_provenance_table_copy(&table, &table2, 0);
table2.record_length = 0;
CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));
tsk_provenance_table_free(&table2);
ret = tsk_provenance_table_truncate(&table, num_rows + 1);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_TABLE_POSITION);
/* No arguments can be null */
ret = tsk_provenance_table_set_columns(
&table, num_rows, NULL, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_provenance_table_set_columns(
&table, num_rows, timestamp, NULL, record, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_provenance_table_set_columns(
&table, num_rows, timestamp, timestamp_offset, NULL, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_provenance_table_set_columns(
&table, num_rows, timestamp, timestamp_offset, record, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Test extend method */
ret = tsk_provenance_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_init(&table2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Can't extend from self */
ret = tsk_provenance_table_extend(&table, &table, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANNOT_EXTEND_FROM_SELF);
/* Two empty tables */
CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));
ret = tsk_provenance_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));
/* Row out of bounds */
ret = tsk_provenance_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
/* Num rows out of bounds */
ret = tsk_provenance_table_extend(&table, &table2, num_rows * 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
/* Copy rows in order if index NULL */
ret = tsk_provenance_table_set_columns(
&table2, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));
ret = tsk_provenance_table_extend(&table, &table2, table2.num_rows, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));
/* Copy nothing if index not NULL but length zero */
ret = tsk_provenance_table_extend(&table, &table2, 0, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));
/* Copy first N rows in order if index NULL */
ret = tsk_provenance_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_extend(&table, &table2, num_rows / 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_truncate(&table2, num_rows / 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&table, &table2, 0));
ret = tsk_provenance_table_set_columns(
&table2, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Copy a subset */
ret = tsk_provenance_table_truncate(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_provenance_table_equals(&table, &table2, 0));
ret = tsk_provenance_table_extend(&table, &table2, num_row_subset, row_subset, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_row_subset; j++) {
ret = tsk_provenance_table_get_row(&table, (tsk_id_t) j, &provenance);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_get_row(&table2, row_subset[j], &provenance2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(provenance.timestamp_length, provenance2.timestamp_length);
CU_ASSERT_EQUAL(provenance.record_length, provenance2.record_length);
CU_ASSERT_EQUAL(tsk_memcmp(provenance.timestamp, provenance2.timestamp,
provenance.timestamp_length * sizeof(*provenance.timestamp)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(provenance.record, provenance2.record,
provenance.record_length * sizeof(*provenance.record)),
0);
}
tsk_provenance_table_free(&table);
tsk_provenance_table_free(&table2);
free(timestamp);
free(timestamp_offset);
free(record);
free(record_offset);
}
static void
test_provenance_table_takeset(void)
{
int ret = 0;
tsk_id_t ret_id;
tsk_provenance_table_t source_table, table;
tsk_size_t num_rows = 100;
tsk_id_t j;
char *timestamp;
tsk_size_t *timestamp_offset;
char *record;
tsk_size_t *record_offset;
const char *test_timestamp = "red";
tsk_size_t test_timestamp_length = 3;
const char *test_record = "test";
tsk_size_t test_record_length = 4;
tsk_size_t zeros[num_rows + 1];
tsk_id_t neg_ones[num_rows];
tsk_memset(zeros, 0, (num_rows + 1) * sizeof(tsk_size_t));
tsk_memset(neg_ones, 0xff, num_rows * sizeof(tsk_id_t));
/* Make a table to copy from */
ret = tsk_provenance_table_init(&source_table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) num_rows; j++) {
ret_id = tsk_provenance_table_add_row(&source_table, test_timestamp,
test_timestamp_length, test_record, test_record_length);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
/* Prepare arrays to be taken */
timestamp = tsk_malloc(num_rows * test_timestamp_length * sizeof(char));
CU_ASSERT_FATAL(timestamp != NULL);
tsk_memcpy(timestamp, source_table.timestamp,
num_rows * test_timestamp_length * sizeof(char));
timestamp_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(timestamp_offset != NULL);
tsk_memcpy(timestamp_offset, source_table.timestamp_offset,
(num_rows + 1) * sizeof(tsk_size_t));
record = tsk_malloc(num_rows * test_record_length * sizeof(char));
CU_ASSERT_FATAL(record != NULL);
tsk_memcpy(
record, source_table.record, num_rows * test_record_length * sizeof(char));
record_offset = tsk_malloc((num_rows + 1) * sizeof(tsk_size_t));
CU_ASSERT_FATAL(record_offset != NULL);
tsk_memcpy(
record_offset, source_table.record_offset, (num_rows + 1) * sizeof(tsk_size_t));
ret = tsk_provenance_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add one row so that we can check takeset frees it */
ret_id = tsk_provenance_table_add_row(
&table, test_timestamp, test_timestamp_length, test_record, test_record_length);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&source_table, &table, 0));
/* Test error states, all of these must not take the array, or free existing */
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, NULL, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, timestamp, NULL, record, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, timestamp, timestamp_offset, NULL, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, timestamp, timestamp_offset, record, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Bad offsets */
timestamp_offset[0] = 1;
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
timestamp_offset[0] = 0;
record_offset[0] = 1;
ret = tsk_provenance_table_takeset_columns(
&table, num_rows, timestamp, timestamp_offset, record, record_offset);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
/* Truncation after takeset keeps memory and max_rows */
ret = tsk_provenance_table_clear(&table);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(table.max_rows, num_rows);
ret = tsk_provenance_table_free(&table);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_provenance_table_free(&source_table);
CU_ASSERT_EQUAL(ret, 0);
}
static void
test_provenance_table_update_row(void)
{
int ret;
tsk_id_t ret_id;
tsk_provenance_table_t table;
tsk_provenance_t row;
const char *timestamp = "XYZ";
const char *record = "ABC";
ret = tsk_provenance_table_init(&table, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_provenance_table_add_row(&table, timestamp, 1, record, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_provenance_table_add_row(&table, timestamp, 2, record, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_provenance_table_add_row(&table, timestamp, 3, record, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_provenance_table_update_row(&table, 0, ×tamp[1], 1, &record[1], 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 1);
CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.record_length, 1);
CU_ASSERT_EQUAL_FATAL(row.record[0], 'B');
ret = tsk_provenance_table_update_row(
&table, 0, row.timestamp, row.timestamp_length, row.record, row.record_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 1);
CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'Y');
CU_ASSERT_EQUAL_FATAL(row.record_length, 1);
CU_ASSERT_EQUAL_FATAL(row.record[0], 'B');
ret = tsk_provenance_table_update_row(&table, 0, row.timestamp,
row.timestamp_length - 1, row.record, row.record_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 0);
CU_ASSERT_EQUAL_FATAL(row.record_length, 1);
CU_ASSERT_EQUAL_FATAL(row.record[0], 'B');
ret = tsk_provenance_table_update_row(&table, 0, timestamp, 3, record, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_get_row(&table, 0, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 3);
CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.timestamp[2], 'Z');
CU_ASSERT_EQUAL_FATAL(row.record_length, 3);
CU_ASSERT_EQUAL_FATAL(row.record[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.record[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.record[2], 'C');
ret = tsk_provenance_table_update_row(&table, 1, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_get_row(&table, 1, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 0);
CU_ASSERT_EQUAL_FATAL(row.record_length, 0);
ret = tsk_provenance_table_get_row(&table, 2, &row);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 3);
CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.timestamp[2], 'Z');
CU_ASSERT_EQUAL_FATAL(row.record_length, 3);
CU_ASSERT_EQUAL_FATAL(row.record[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.record[1], 'B');
CU_ASSERT_EQUAL_FATAL(row.record[2], 'C');
ret = tsk_provenance_table_update_row(&table, 3, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
tsk_provenance_table_free(&table);
}
static void
test_provenance_table_keep_rows(void)
{
int ret;
tsk_id_t ret_id;
tsk_size_t j;
tsk_provenance_table_t source, t1, t2;
tsk_provenance_t row;
const char *timestamp = "XYZ";
const char *record = "ABC";
tsk_bool_t keep[3] = { 1, 1, 1 };
tsk_id_t indexes[] = { 0, 1, 2 };
tsk_id_t id_map[3];
ret = tsk_provenance_table_init(&source, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_provenance_table_add_row(&source, timestamp, 1, record, 1);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_provenance_table_add_row(&source, timestamp, 2, record, 2);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_provenance_table_add_row(&source, timestamp, 3, record, 3);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_provenance_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &source, 0));
ret = tsk_provenance_table_keep_rows(&t1, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &source, 0));
CU_ASSERT_EQUAL_FATAL(id_map[0], 0);
CU_ASSERT_EQUAL_FATAL(id_map[1], 1);
CU_ASSERT_EQUAL_FATAL(id_map[2], 2);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], -1);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_provenance_table_copy(&source, &t1, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[0] = 0;
keep[1] = 1;
keep[2] = 0;
ret = tsk_provenance_table_keep_rows(&t1, keep, 0, id_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t1.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(id_map[0], -1);
CU_ASSERT_EQUAL_FATAL(id_map[1], 0);
CU_ASSERT_EQUAL_FATAL(id_map[2], -1);
ret = tsk_provenance_table_get_row(&t1, 0, &row);
CU_ASSERT_EQUAL_FATAL(row.timestamp_length, 2);
CU_ASSERT_EQUAL_FATAL(row.timestamp[0], 'X');
CU_ASSERT_EQUAL_FATAL(row.timestamp[1], 'Y');
CU_ASSERT_EQUAL_FATAL(row.record_length, 2);
CU_ASSERT_EQUAL_FATAL(row.record[0], 'A');
CU_ASSERT_EQUAL_FATAL(row.record[1], 'B');
tsk_provenance_table_free(&t1);
keep[0] = 0;
keep[1] = 0;
keep[2] = 0;
/* Keeping first n rows equivalent to truncate */
for (j = 0; j < source.num_rows; j++) {
ret = tsk_provenance_table_copy(&source, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_copy(&source, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_truncate(&t1, j + 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
keep[j] = 1;
ret = tsk_provenance_table_keep_rows(&t2, keep, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&t1, &t2, 0));
/* Adding the remaining rows back on to the table gives the original
* table */
ret = tsk_provenance_table_extend(
&t2, &source, source.num_rows - j - 1, indexes + j + 1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_provenance_table_equals(&source, &t2, 0));
tsk_provenance_table_free(&t1);
tsk_provenance_table_free(&t2);
}
tsk_provenance_table_free(&source);
}
static void
test_table_size_increments(void)
{
int ret;
tsk_table_collection_t tables;
tsk_size_t new_size;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_location_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_ancestral_state_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_derived_state_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_metadata_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_timestamp_length_increment, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_record_length_increment, 0);
/* Setting to non-zero sets to that size */
new_size = 1;
ret = tsk_individual_table_set_max_rows_increment(&tables.individuals, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows_increment, new_size);
ret = tsk_individual_table_set_max_metadata_length_increment(
&tables.individuals, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_metadata_length_increment, new_size);
ret = tsk_individual_table_set_max_location_length_increment(
&tables.individuals, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_location_length_increment, new_size);
ret = tsk_node_table_set_max_rows_increment(&tables.nodes, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows_increment, new_size);
ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length_increment, new_size);
ret = tsk_edge_table_set_max_rows_increment(&tables.edges, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows_increment, new_size);
ret = tsk_edge_table_set_max_metadata_length_increment(&tables.edges, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_metadata_length_increment, new_size);
ret = tsk_site_table_set_max_rows_increment(&tables.sites, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows_increment, new_size);
ret = tsk_site_table_set_max_metadata_length_increment(&tables.sites, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_metadata_length_increment, new_size);
ret = tsk_site_table_set_max_ancestral_state_length_increment(
&tables.sites, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_ancestral_state_length_increment, new_size);
ret = tsk_mutation_table_set_max_rows_increment(&tables.mutations, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows_increment, new_size);
ret = tsk_mutation_table_set_max_metadata_length_increment(
&tables.mutations, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_metadata_length_increment, new_size);
ret = tsk_mutation_table_set_max_derived_state_length_increment(
&tables.mutations, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_derived_state_length_increment, new_size);
ret = tsk_migration_table_set_max_rows_increment(&tables.migrations, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows_increment, new_size);
ret = tsk_migration_table_set_max_metadata_length_increment(
&tables.migrations, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_metadata_length_increment, new_size);
ret = tsk_population_table_set_max_rows_increment(&tables.populations, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows_increment, new_size);
ret = tsk_population_table_set_max_metadata_length_increment(
&tables.populations, new_size);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_metadata_length_increment, new_size);
ret = tsk_provenance_table_set_max_rows_increment(&tables.provenances, new_size);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows_increment, new_size);
ret = tsk_provenance_table_set_max_timestamp_length_increment(
&tables.provenances, new_size);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_timestamp_length_increment, new_size);
ret = tsk_provenance_table_set_max_record_length_increment(
&tables.provenances, new_size);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_record_length_increment, new_size);
tsk_table_collection_free(&tables);
}
static void
test_table_expansion(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables2;
ret = tsk_table_collection_init(&tables2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Individual table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 1);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 2097153);
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_individual_table_set_max_rows_increment(&tables.individuals, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_individual_table_set_max_rows_increment(&tables.individuals, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_extend(
&tables.individuals, &tables2.individuals, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.individuals.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Node table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 2097153);
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_node_table_set_max_rows_increment(&tables.nodes, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_extend(&tables.nodes, &tables2.nodes, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_node_table_set_max_rows_increment(&tables.nodes, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_extend(
&tables.nodes, &tables2.nodes, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Edge table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 1);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 2097153);
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_edge_table_set_max_rows_increment(&tables.edges, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_extend(&tables.edges, &tables2.edges, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_edge_table_set_max_rows_increment(&tables.edges, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_extend(
&tables.edges, &tables2.edges, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.edges.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Migration table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 1);
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 2097153);
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_migration_table_set_max_rows_increment(&tables.migrations, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_migration_table_set_max_rows_increment(&tables.migrations, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_extend(
&tables.migrations, &tables2.migrations, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.migrations.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Site table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 1);
ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 2097153);
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_site_table_set_max_rows_increment(&tables.sites, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_extend(&tables.sites, &tables2.sites, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_site_table_set_max_rows_increment(&tables.sites, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_extend(
&tables.sites, &tables2.sites, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.sites.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Mutation table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 1);
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_mutation_table_extend(&tables.mutations, &tables2.mutations, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 2097153);
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_mutation_table_set_max_rows_increment(&tables.mutations, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_mutation_table_set_max_rows_increment(&tables.mutations, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_extend(
&tables.mutations, &tables2.mutations, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.mutations.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Population table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 1);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 2097153);
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_population_table_set_max_rows_increment(&tables.populations, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_population_table_set_max_rows_increment(&tables.populations, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_extend(
&tables.populations, &tables2.populations, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.populations.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
/* Provenance table */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 1);
ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/*Extending by a small amount results in 1024 rows in the first case*/
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 1024);
/*Extending by an amount that fits doesn't grow the table*/
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 1023, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 1024);
/*Extending by an amount that doesn't fit doubles the table*/
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 1024, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 2048);
/*Extending by an amount greater than the next double extends to that amount*/
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 4096, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4097);
/*After extending beyond 2^21 subsequent extension doesn't double but adds 2^21*/
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 2097152, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 2097153);
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 2097154, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305);
/*Extending by more rows than possible results in overflow*/
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, TSK_MAX_ID, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305);
/*Setting a custom extension uses that*/
ret = tsk_provenance_table_set_max_rows_increment(&tables.provenances, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 4194305, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305 + 42);
/*Setting a custom extension that overflows errors*/
ret = tsk_provenance_table_set_max_rows_increment(&tables.provenances, TSK_MAX_ID);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_provenance_table_extend(
&tables.provenances, &tables2.provenances, 4194305 + 42 + 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLE_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.provenances.max_rows, 4194305 + 42);
tsk_table_collection_free(&tables);
tsk_table_collection_free(&tables2);
}
static void
test_ragged_expansion(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
char *data = tsk_malloc(104857600 * sizeof(char));
/* Test with node table metadata */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 1);
/*Extending by a small amount results in 65536 bytes in the first case*/
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 2);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 65536);
/*Extending by an amount that fits doesn't grow the column*/
ret_id
= tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 65534);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 65536);
/*Extending by an amount that doesn't fit doubles the column*/
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 65536 * 2);
/*Extending by an amount greater than the next double extends to that amount*/
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data,
1 + (65536 * 2 * 2 - 2 - 65534 - 1));
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 2 + 65534 + 1 + 196608);
/*After extending beyond 100MB subsequent extension doesn't double but adds 100MB*/
ret_id = tsk_node_table_add_row(
&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 104857600);
CU_ASSERT_EQUAL_FATAL(ret_id, 4);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 105119745);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 5);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 105119745 + 104857600);
/*Extending by more bytes than possible results in overflow*/
ret_id = tsk_node_table_add_row(
&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, TSK_MAX_SIZE);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 105119745 + 104857600);
tsk_node_table_free(&tables.nodes);
ret = tsk_node_table_init(&tables.nodes, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/*Setting a custom extension uses that*/
ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, 42);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 3);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 43);
/*Setting a custom extension that overflows errors*/
ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, TSK_MAX_SIZE);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, data, 41);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
CU_ASSERT_EQUAL_FATAL(tables.nodes.max_metadata_length, 43);
tsk_table_collection_free(&tables);
tsk_safe_free(data);
}
static void
test_link_ancestors_input_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_edge_table_t result;
tsk_id_t samples[] = { 0, 1 };
tsk_id_t ancestors[] = { 4, 6 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add an edge with some metadata */
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 7);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 7, 6, "metadata", 8);
CU_ASSERT_FATAL(ret_id > 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, NULL, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
tsk_edge_table_free(&result);
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, NULL, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Bad sample IDs */
samples[0] = -1;
ret = tsk_table_collection_link_ancestors(
&tables, samples, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Bad ancestor IDs */
samples[0] = 0;
ancestors[0] = -1;
ret = tsk_table_collection_link_ancestors(
&tables, samples, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Duplicate sample IDs */
ancestors[0] = 4;
samples[0] = 1;
ret = tsk_table_collection_link_ancestors(
&tables, samples, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
/* Duplicate sample IDs */
ancestors[0] = 6;
samples[0] = 0;
ret = tsk_table_collection_link_ancestors(
&tables, samples, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
/* TODO more tests! */
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
tsk_edge_table_free(&result);
}
static void
test_link_ancestors_single_tree(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_edge_table_t result;
tsk_id_t samples[] = { 0, 1 };
tsk_id_t ancestors[] = { 4, 6 };
size_t i;
double res_left = 0;
double res_right = 1;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, samples, 2, ancestors, 2, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Check we get the right result.
CU_ASSERT_EQUAL(result.num_rows, 3);
tsk_id_t res_parent[] = { 4, 4, 6 };
tsk_id_t res_child[] = { 0, 1, 4 };
for (i = 0; i < result.num_rows; i++) {
CU_ASSERT_EQUAL(res_parent[i], result.parent[i]);
CU_ASSERT_EQUAL(res_child[i], result.child[i]);
CU_ASSERT_EQUAL(res_left, result.left[i]);
CU_ASSERT_EQUAL(res_right, result.right[i]);
}
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
tsk_edge_table_free(&result);
}
static void
test_link_ancestors_no_edges(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_edge_table_t result;
tsk_id_t samples[] = { 2 };
tsk_id_t ancestors[] = { 4 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, samples, 1, ancestors, 1, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
tsk_edge_table_free(&result);
tsk_treeseq_free(&ts);
}
static void
test_link_ancestors_samples_and_ancestors_overlap(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_edge_table_t result;
tsk_id_t samples[] = { 0, 1, 2, 4 };
tsk_id_t ancestors[] = { 2 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, samples, 4, ancestors, 1, 0, &result);
// tsk_edge_table_print_state(&result, stdout);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Check we get the right result.
CU_ASSERT_EQUAL(result.num_rows, 2);
size_t i;
tsk_id_t res_parent = 4;
tsk_id_t res_child[] = { 0, 1 };
double res_left = 0;
double res_right = 1;
for (i = 0; i < result.num_rows; i++) {
CU_ASSERT_EQUAL(res_parent, result.parent[i]);
CU_ASSERT_EQUAL(res_child[i], result.child[i]);
CU_ASSERT_EQUAL(res_left, result.left[i]);
CU_ASSERT_EQUAL(res_right, result.right[i]);
}
tsk_table_collection_free(&tables);
tsk_edge_table_free(&result);
tsk_treeseq_free(&ts);
}
static void
test_link_ancestors_paper(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_edge_table_t result;
tsk_id_t samples[] = { 0, 1, 2 };
tsk_id_t ancestors[] = { 5, 6, 7 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, samples, 3, ancestors, 3, 0, &result);
// tsk_edge_table_print_state(&result, stdout);
// Check we get the right result.
CU_ASSERT_EQUAL(result.num_rows, 6);
size_t i;
tsk_id_t res_parent[] = { 5, 5, 6, 6, 7, 7 };
tsk_id_t res_child[] = { 1, 2, 0, 5, 0, 5 };
double res_left[] = { 0, 2, 0, 0, 7, 7 };
double res_right[] = { 10, 10, 7, 7, 10, 10 };
for (i = 0; i < result.num_rows; i++) {
CU_ASSERT_EQUAL(res_parent[i], result.parent[i]);
CU_ASSERT_EQUAL(res_child[i], result.child[i]);
CU_ASSERT_EQUAL(res_left[i], result.left[i]);
CU_ASSERT_EQUAL(res_right[i], result.right[i]);
}
tsk_table_collection_free(&tables);
tsk_edge_table_free(&result);
tsk_treeseq_free(&ts);
}
static void
test_link_ancestors_multiple_to_single_tree(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_edge_table_t result;
tsk_id_t samples[] = { 1, 3 };
tsk_id_t ancestors[] = { 5 };
size_t i;
tsk_id_t res_parent = 5;
tsk_id_t res_child[] = { 1, 3 };
double res_left = 0;
double res_right = 10;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_init(&result, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_link_ancestors(
&tables, samples, 2, ancestors, 1, 0, &result);
CU_ASSERT_EQUAL(result.num_rows, 2);
for (i = 0; i < result.num_rows; i++) {
CU_ASSERT_EQUAL(res_parent, result.parent[i]);
CU_ASSERT_EQUAL(res_child[i], result.child[i]);
CU_ASSERT_EQUAL(res_left, result.left[i]);
CU_ASSERT_EQUAL(res_right, result.right[i]);
}
tsk_table_collection_free(&tables);
tsk_edge_table_free(&result);
tsk_treeseq_free(&ts);
}
static void
verify_ibd_segment_list(tsk_identity_segment_list_t *list, tsk_size_t num_nodes)
{
tsk_identity_segment_t *seg;
double total_span = 0;
tsk_size_t num_segments = 0;
/* double last_right = 0; */
for (seg = list->head; seg != NULL; seg = seg->next) {
CU_ASSERT_FATAL(seg->left < seg->right);
CU_ASSERT_FATAL(seg->node >= 0);
CU_ASSERT_FATAL(seg->node < (tsk_id_t) num_nodes);
total_span += seg->right - seg->left;
num_segments++;
/* TODO the segments are not necessarily in order - issue #1682 */
/* CU_ASSERT_FATAL(seg->left >= last_right); */
/* last_right = seg->right; */
}
CU_ASSERT_EQUAL_FATAL(total_span, list->total_span);
CU_ASSERT_EQUAL_FATAL(num_segments, list->num_segments);
}
static void
verify_ibd_result(tsk_identity_segments_t *result)
{
int ret;
tsk_size_t j;
tsk_id_t a, b;
int64_t index;
tsk_size_t total_segments = 0;
double total_span = 0;
tsk_size_t num_pairs = tsk_identity_segments_get_num_pairs(result);
tsk_id_t *pairs
= tsk_malloc(2 * tsk_identity_segments_get_num_pairs(result) * sizeof(*pairs));
tsk_id_t *pairs2
= tsk_malloc(2 * tsk_identity_segments_get_num_pairs(result) * sizeof(*pairs));
tsk_identity_segment_list_t **lists
= tsk_malloc(tsk_identity_segments_get_num_pairs(result) * sizeof(*lists));
tsk_avl_node_int_t **avl_nodes
= tsk_malloc(result->pair_map.size * sizeof(*avl_nodes));
CU_ASSERT_FATAL(pairs != NULL);
CU_ASSERT_FATAL(pairs2 != NULL);
CU_ASSERT_FATAL(avl_nodes != NULL);
CU_ASSERT_FATAL(lists != NULL);
CU_ASSERT_EQUAL_FATAL(num_pairs, result->pair_map.size);
tsk_identity_segments_print_state(result, _devnull);
ret = tsk_identity_segments_get_keys(result, pairs);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_avl_tree_int_ordered_nodes(&result->pair_map, avl_nodes);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_pairs; j++) {
a = pairs[2 * j];
b = pairs[2 * j + 1];
index = a * (int64_t) result->num_nodes + b;
CU_ASSERT(a < b);
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&result->pair_map, index), avl_nodes[j]);
index = b * (int64_t) result->num_nodes + a;
CU_ASSERT_EQUAL(tsk_avl_tree_int_search(&result->pair_map, index), NULL);
}
ret = tsk_identity_segments_get_items(result, pairs2, lists);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_pairs; j++) {
CU_ASSERT_EQUAL_FATAL(pairs[2 * j], pairs2[2 * j]);
CU_ASSERT_EQUAL_FATAL(pairs[2 * j + 1], pairs2[2 * j + 1]);
verify_ibd_segment_list(lists[j], result->num_nodes);
total_segments += lists[j]->num_segments;
total_span += lists[j]->total_span;
}
CU_ASSERT_EQUAL_FATAL(result->num_segments, total_segments);
CU_ASSERT_DOUBLE_EQUAL(result->total_span, total_span, 1e-6);
free(pairs);
free(pairs2);
free(lists);
free(avl_nodes);
}
static void
test_ibd_segments_debug(void)
{
tsk_treeseq_t ts;
int ret;
tsk_identity_segments_t result;
tsk_size_t sizes[] = { 2, 2 };
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
tsk_set_debug_stream(_devnull);
/* Run the DEBUG code */
ret = tsk_table_collection_ibd_within(
ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_DEBUG);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_between(
ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, TSK_DEBUG);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_within(
ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_DEBUG | TSK_IBD_STORE_PAIRS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_within(
ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_DEBUG | TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
tsk_set_debug_stream(stdout);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_caterpillar_tree(void)
{
int ret;
tsk_identity_segments_t result;
tsk_treeseq_t *ts = caterpillar_tree(100, 1, 5);
/* We're just testing out the memory expansion in ibd_finder */
ret = tsk_table_collection_ibd_within(ts->tables, &result, NULL, 0, 0.0, DBL_MAX, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_ibd_segments_single_tree(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
tsk_size_t sizes[] = { 1, 1 };
tsk_identity_segments_t result;
tsk_identity_segment_list_t *list = NULL;
tsk_identity_segment_t *seg = NULL;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Only get IBD segs for (0, 1) */
ret = tsk_table_collection_ibd_within(
&tables, &result, samples, 2, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_identity_segments_get(&result, samples[0], samples[1], &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(list != NULL);
seg = list->head;
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 4);
/* Queries for other sample pairs fail */
ret = tsk_identity_segments_get(&result, 0, 2, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list, NULL);
ret = tsk_identity_segments_get(&result, 1, 3, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list, NULL);
tsk_identity_segments_print_state(&result, _devnull);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 1);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_total_span(&result), 1);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
/* Get IBD segs among all pairs of samples */
ret = tsk_table_collection_ibd_within(
&tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* We have 4 samples, so 4 choose 2 sample pairs */
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 6);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_total_span(&result), 6);
ret = tsk_identity_segments_get(&result, 0, 1, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 4);
ret = tsk_identity_segments_get(&result, 3, 0, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 6);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
/* Get segs between {0} and {1} */
ret = tsk_table_collection_ibd_between(
ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_ibd_result(&result);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 1);
ret = tsk_identity_segments_get(&result, 0, 1, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 4);
tsk_identity_segments_free(&result);
/* within an empty list gives no segments */
ret = tsk_table_collection_ibd_within(&tables, &result, samples, 0, 0.0, DBL_MAX, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 0);
tsk_identity_segments_free(&result);
/* Between an empty list gives no segments */
ret = tsk_table_collection_ibd_between(
ts.tables, &result, 0, sizes, samples, 0.0, DBL_MAX, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 0);
tsk_identity_segments_free(&result);
/* Between one empty list gives no segments*/
sizes[0] = 0;
ret = tsk_table_collection_ibd_between(
ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 0);
tsk_identity_segments_free(&result);
sizes[0] = 2;
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_single_tree_options(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_identity_segments_t result;
tsk_identity_segment_list_t *list = NULL;
tsk_id_t pairs[12];
tsk_identity_segment_list_t *lists[6];
tsk_flags_t options[2];
int k;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_ibd_within(&tables, &result, NULL, 0, 0.0, DBL_MAX, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* We have 4 samples, so 4 choose 2 sample pairs */
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 6);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_total_span(&result), 6);
/* out-of-bounds is still detected */
ret = tsk_identity_segments_get(&result, 0, 100, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* By default all specific queries fail on the ibd_segments result */
ret = tsk_identity_segments_get(&result, 0, 1, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IBD_PAIRS_NOT_STORED);
ret = tsk_identity_segments_get_keys(&result, pairs);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IBD_PAIRS_NOT_STORED);
ret = tsk_identity_segments_get_items(&result, pairs, lists);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_IBD_PAIRS_NOT_STORED);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_within(
&tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_PAIRS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* out-of-bounds is still detected */
ret = tsk_identity_segments_get(&result, 0, 100, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Getters for the lists now work, but the lists themselves are NULL */
ret = tsk_identity_segments_get(&result, 0, 1, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list->head, NULL);
CU_ASSERT_EQUAL_FATAL(list->total_span, 1);
CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);
ret = tsk_identity_segments_get_keys(&result, pairs);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(pairs[0], 0);
CU_ASSERT_EQUAL_FATAL(pairs[1], 1);
ret = tsk_identity_segments_get_items(&result, pairs, lists);
CU_ASSERT_EQUAL_FATAL(pairs[0], 0);
CU_ASSERT_EQUAL_FATAL(pairs[1], 1);
CU_ASSERT_EQUAL_FATAL(lists[0]->head, NULL);
CU_ASSERT_EQUAL_FATAL(lists[0]->total_span, 1);
CU_ASSERT_EQUAL_FATAL(lists[0]->num_segments, 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
/* store_segments implies store_pairs */
options[0] = TSK_IBD_STORE_SEGMENTS;
options[1] = TSK_IBD_STORE_PAIRS | TSK_IBD_STORE_SEGMENTS;
for (k = 0; k < 2; k++) {
ret = tsk_table_collection_ibd_within(
&tables, &result, NULL, 0, 0.0, DBL_MAX, options[k]);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* out-of-bounds is still detected */
ret = tsk_identity_segments_get(&result, 0, 100, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_identity_segments_get(&result, 0, 1, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(list->head != NULL);
CU_ASSERT_EQUAL_FATAL(list->head->left, 0);
CU_ASSERT_EQUAL_FATAL(list->head->right, 1);
CU_ASSERT_EQUAL_FATAL(list->head->next, NULL);
CU_ASSERT_EQUAL_FATAL(list->total_span, 1);
CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);
ret = tsk_identity_segments_get_keys(&result, pairs);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(pairs[0], 0);
CU_ASSERT_EQUAL_FATAL(pairs[1], 1);
ret = tsk_identity_segments_get_items(&result, pairs, lists);
CU_ASSERT_EQUAL_FATAL(pairs[0], 0);
CU_ASSERT_EQUAL_FATAL(pairs[1], 1);
CU_ASSERT_FATAL(lists[0]->head != NULL);
CU_ASSERT_EQUAL_FATAL(lists[0]->head->left, 0);
CU_ASSERT_EQUAL_FATAL(lists[0]->head->right, 1);
CU_ASSERT_EQUAL_FATAL(lists[0]->head->next, NULL);
CU_ASSERT_EQUAL_FATAL(lists[0]->total_span, 1);
CU_ASSERT_EQUAL_FATAL(lists[0]->num_segments, 1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_free(&result);
}
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_single_tree_between(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t sizes[] = { 2, 2 };
tsk_identity_segments_t result;
tsk_identity_segment_list_t *list = NULL;
tsk_identity_segment_t *seg = NULL;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Get segs between {0, 1} and {2, 3} */
ret = tsk_table_collection_ibd_between(
ts.tables, &result, 2, sizes, samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_ibd_result(&result);
CU_ASSERT_EQUAL_FATAL(tsk_identity_segments_get_num_segments(&result), 4);
ret = tsk_identity_segments_get(&result, 0, 2, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 6);
ret = tsk_identity_segments_get(&result, 0, 3, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 6);
ret = tsk_identity_segments_get(&result, 1, 2, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 6);
ret = tsk_identity_segments_get(&result, 1, 3, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
seg = list->head;
CU_ASSERT_FATAL(seg != NULL);
CU_ASSERT_EQUAL_FATAL(seg->next, NULL);
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, 6);
tsk_identity_segments_free(&result);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_multiple_trees(void)
{
int ret;
tsk_size_t j, k;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1, 2 };
tsk_id_t pairs[][2] = { { 0, 1 }, { 0, 2 } };
tsk_size_t num_samples = 3;
tsk_size_t num_pairs = 2;
tsk_identity_segments_t result;
double true_left[2][2] = { { 0.0, 0.75 }, { 0.75, 0.0 } };
double true_right[2][2] = { { 0.75, 1.0 }, { 1.0, 0.75 } };
double true_node[2][2] = { { 4, 5 }, { 5, 6 } };
tsk_identity_segment_list_t *list;
tsk_identity_segment_t *seg;
tsk_treeseq_from_text(&ts, 2, multiple_tree_ex_nodes, multiple_tree_ex_edges, NULL,
NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_ibd_within(
&tables, &result, samples, num_samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_pairs; j++) {
ret = tsk_identity_segments_get(&result, pairs[j][0], pairs[j][1], &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list->num_segments, 2);
k = 0;
for (seg = list->head; seg != NULL; seg = seg->next) {
CU_ASSERT_EQUAL_FATAL(seg->left, true_left[j][k]);
CU_ASSERT_EQUAL_FATAL(seg->right, true_right[j][k]);
CU_ASSERT_EQUAL_FATAL(seg->node, true_node[j][k]);
k++;
}
CU_ASSERT_EQUAL_FATAL(list->num_segments, k);
}
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_within(
&tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_empty_result(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
tsk_identity_segments_t result;
tsk_identity_segment_list_t *list;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_ibd_within(
&tables, &result, samples, 1, 0.0, 0.5, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_identity_segments_get(&result, samples[0], samples[1], &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(list == NULL);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_min_span_max_time(void)
{
int ret;
tsk_treeseq_t ts;
tsk_identity_segments_t result;
tsk_identity_segment_list_t *list;
tsk_identity_segment_t *seg;
tsk_treeseq_from_text(&ts, 2, multiple_tree_ex_nodes, multiple_tree_ex_edges, NULL,
NULL, NULL, NULL, NULL, 0);
ret = tsk_table_collection_ibd_within(
ts.tables, &result, NULL, 0, 0.5, 3.0, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_identity_segments_get(&result, 0, 1, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);
seg = list->head;
CU_ASSERT_EQUAL_FATAL(seg->left, 0.0);
CU_ASSERT_EQUAL_FATAL(seg->right, 0.75);
CU_ASSERT_EQUAL_FATAL(seg->node, 4);
ret = tsk_identity_segments_get(&result, 1, 2, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list, NULL);
ret = tsk_identity_segments_get(&result, 0, 2, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list, NULL);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1, 2 };
tsk_id_t duplicate_samples[] = { 0, 1, 0 };
tsk_id_t samples2[] = { -1, 1 };
tsk_size_t sample_set_sizes[] = { 3 };
tsk_identity_segments_t result;
tsk_identity_segment_list_t *list;
tsk_treeseq_from_text(&ts, 2, multiple_tree_ex_nodes, multiple_tree_ex_edges, NULL,
NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Invalid sample IDs
ret = tsk_table_collection_ibd_within(
&tables, &result, samples2, 1, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_between(&tables, &result, 1, sample_set_sizes,
samples2, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_identity_segments_free(&result);
// Bad length or time
ret = tsk_table_collection_ibd_within(&tables, &result, samples, 2, 0.0, -1, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_within(&tables, &result, samples, 2, -1, 0.0, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_between(&tables, &result, 1, sample_set_sizes,
samples, -1, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_between(
&tables, &result, 1, sample_set_sizes, samples, 0, -1, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_identity_segments_free(&result);
// Duplicate samples
ret = tsk_table_collection_ibd_within(
&tables, &result, duplicate_samples, 3, 0.0, DBL_MAX, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
tsk_identity_segments_free(&result);
ret = tsk_table_collection_ibd_between(&tables, &result, 1, sample_set_sizes,
duplicate_samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
tsk_identity_segments_free(&result);
// Check for bad inputs to result
ret = tsk_table_collection_ibd_within(
&tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_identity_segments_get(&result, 0, -1, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_identity_segments_get(&result, -1, 0, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_identity_segments_get(&result, 0, 100, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_identity_segments_get(&result, 100, 0, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_identity_segments_get(&result, 0, 5, &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(list, NULL);
/* TODO add more checks here */
ret = tsk_identity_segments_get(&result, 0, 0, &list);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAME_NODES_IN_PAIR);
tsk_identity_segments_free(&result);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_samples_are_descendants(void)
{
int ret;
tsk_treeseq_t ts;
tsk_id_t samples[] = { 0, 1, 2, 3, 4, 5 };
tsk_size_t num_samples = 6;
tsk_identity_segments_t result;
tsk_id_t pairs[][2] = { { 0, 2 }, { 0, 4 }, { 2, 4 }, { 1, 3 }, { 1, 5 }, { 3, 5 } };
tsk_size_t num_pairs = 6;
tsk_id_t true_node[] = { 2, 4, 4, 3, 5, 5 };
tsk_size_t j;
tsk_identity_segment_list_t *list;
tsk_identity_segment_t *seg;
tsk_treeseq_from_text(&ts, 1, multi_root_tree_ex_nodes, multi_root_tree_ex_edges,
NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_table_collection_ibd_within(
ts.tables, &result, samples, num_samples, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_pairs; j++) {
tsk_identity_segments_get(&result, pairs[j][0], pairs[j][1], &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(list != NULL);
CU_ASSERT_EQUAL_FATAL(list->num_segments, 1);
seg = list->head;
CU_ASSERT_EQUAL_FATAL(seg->left, 0);
CU_ASSERT_EQUAL_FATAL(seg->right, 1);
CU_ASSERT_EQUAL_FATAL(seg->node, true_node[j]);
}
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_multiple_ibd_paths(void)
{
int ret;
tsk_size_t j, k;
tsk_treeseq_t ts;
tsk_id_t pairs[][2] = { { 0, 1 }, { 0, 2 }, { 1, 2 } };
tsk_size_t num_pairs = 3;
tsk_identity_segments_t result;
double true_left[3][2] = { { 0.2, 0.0 }, { 0.2, 0.0 }, { 0.0, 0.2 } };
double true_right[3][2] = { { 1.0, 0.2 }, { 1.0, 0.2 }, { 0.2, 1.0 } };
double true_node[3][2] = { { 4, 5 }, { 3, 5 }, { 4, 4 } };
tsk_identity_segment_list_t *list;
tsk_identity_segment_t *seg;
tsk_treeseq_from_text(&ts, 2, multi_path_tree_ex_nodes, multi_path_tree_ex_edges,
NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_table_collection_ibd_within(
ts.tables, &result, NULL, 0, 0.0, DBL_MAX, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_pairs; j++) {
tsk_identity_segments_get(&result, pairs[j][0], pairs[j][1], &list);
CU_ASSERT_EQUAL_FATAL(ret, 0);
k = 0;
for (seg = list->head; seg != NULL; seg = seg->next) {
CU_ASSERT_EQUAL_FATAL(seg->left, true_left[j][k]);
CU_ASSERT_EQUAL_FATAL(seg->right, true_right[j][k]);
CU_ASSERT_EQUAL_FATAL(seg->node, true_node[j][k]);
k++;
}
CU_ASSERT_EQUAL_FATAL(k, 2);
}
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
tsk_treeseq_free(&ts);
}
static void
test_ibd_segments_odd_topologies(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
tsk_id_t samples1[] = { 0, 2 };
tsk_identity_segments_t result;
tsk_treeseq_from_text(
&ts, 1, odd_tree1_ex_nodes, odd_tree1_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// Multiple roots.
ret = tsk_table_collection_ibd_within(
&tables, &result, samples, 1, 0, 0, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
// Parent is a sample.
ret = tsk_table_collection_ibd_within(
&tables, &result, samples1, 1, 0, 0, TSK_IBD_STORE_SEGMENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_ibd_result(&result);
tsk_identity_segments_free(&result);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_simplify_tables_drops_indexes(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0))
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_simplify_empty_tables(void)
{
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_simplify(&tables, NULL, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 0);
tsk_table_collection_free(&tables);
}
static void
test_simplify_metadata(void)
{
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
tsk_edge_table_add_row(&tables.edges, 0, 0, 1, 1, "metadata", 8);
ret = tsk_table_collection_simplify(&tables, NULL, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
tsk_table_collection_free(&tables);
}
static void
test_edge_update_invalidates_index(void)
{
int ret;
tsk_id_t ret_id;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
/* Any operations on the edge table should now invalidate the index */
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
/* Even though the actual indexes still exist */
CU_ASSERT_FALSE(tables.indexes.edge_insertion_order == NULL);
CU_ASSERT_FALSE(tables.indexes.edge_removal_order == NULL);
CU_ASSERT_EQUAL_FATAL(tables.indexes.num_edges, tsk_treeseq_get_num_edges(&ts));
ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 0, 1, NULL, 0);
CU_ASSERT_TRUE(ret_id > 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
/* Even though the actual indexes still exist */
CU_ASSERT_FALSE(tables.indexes.edge_insertion_order == NULL);
CU_ASSERT_FALSE(tables.indexes.edge_removal_order == NULL);
CU_ASSERT_EQUAL_FATAL(tables.indexes.num_edges, tsk_treeseq_get_num_edges(&ts));
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_copy_table_collection(void)
{
int ret;
tsk_id_t ret_id;
tsk_treeseq_t ts;
tsk_table_collection_t tables, tables_copy;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add some migrations, population and provenance */
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 1, 2, 3, 4, 5, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_migration_table_add_row(&tables.migrations, 1, 2, 3, 4, 5, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_population_table_add_row(&tables.populations, "metadata", 8);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, "other", 5);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_provenance_table_add_row(&tables.provenances, "time", 4, "record", 6);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_provenance_table_add_row(&tables.provenances, "time ", 5, "record ", 7);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
tsk_table_collection_copy(&tables, &tables_copy, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &tables_copy, 0));
tsk_table_collection_free(&tables);
tsk_table_collection_free(&tables_copy);
tsk_treeseq_free(&ts);
}
static void
test_sort_tables_offsets(void)
{
int ret;
tsk_treeseq_t *ts;
tsk_table_collection_t tables, copy;
tsk_bookmark_t bookmark;
ts = caterpillar_tree(10, 5, 5);
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check that setting edge offset = len(edges) does nothing */
reverse_edges(&tables);
ret = tsk_table_collection_copy(&tables, ©, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.edges = tables.edges.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check that setting migration offset = len(migrations) does nothing */
reverse_migrations(&tables);
ret = tsk_table_collection_copy(&tables, ©, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.migrations = tables.migrations.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tables.sites.num_rows > 2);
CU_ASSERT_FATAL(tables.mutations.num_rows > 2);
/* Check that setting mutation and site offset = to the len
* of the tables leaves them untouched. */
reverse_mutations(&tables);
/* Swap the positions of the first two sites, as a quick way
* to disorder the site table */
tables.sites.position[0] = tables.sites.position[1];
tables.sites.position[1] = 0;
ret = tsk_table_collection_copy(&tables, ©, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.sites = tables.sites.num_rows;
bookmark.mutations = tables.mutations.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
/* Anything other than len(table) leads to an error for sites
* and mutations, and we can't specify one without the other. */
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.sites = tables.sites.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.mutations = tables.mutations.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.sites = tables.sites.num_rows - 1;
bookmark.mutations = tables.mutations.num_rows - 1;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
/* Individuals must either all be sorted or all skipped */
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add a parent relation that unsorts the table */
tables.individuals.parents[0] = 5;
ret = tsk_table_collection_copy(&tables, ©, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.individuals = tables.individuals.num_rows;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, ©, 0));
/* Check that sorting would have had no effect as individuals not in default sort*/
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, ©, 0));
/* Individual bookmark ignored */
tsk_memset(&bookmark, 0, sizeof(bookmark));
bookmark.individuals = tables.individuals.num_rows - 1;
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
tsk_table_collection_free(©);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_sort_tables_drops_indexes_with_options(tsk_flags_t tc_options)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, tc_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0))
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0))
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_sort_tables_drops_indexes(void)
{
test_sort_tables_drops_indexes_with_options(0);
test_sort_tables_drops_indexes_with_options(TSK_TC_NO_EDGE_METADATA);
}
static void
test_sort_tables_edge_metadata(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t1, t2;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
insert_edge_metadata(&t1);
ret = tsk_table_collection_copy(&t1, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
reverse_edges(&t1);
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));
ret = tsk_table_collection_sort(&t1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
tsk_treeseq_free(&ts);
}
static void
test_sort_tables_no_edge_metadata(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t1, t2;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(t1.edges.options & TSK_TABLE_NO_METADATA);
ret = tsk_table_collection_copy(&t1, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(t2.edges.options & TSK_TABLE_NO_METADATA);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
reverse_edges(&t1);
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));
ret = tsk_table_collection_sort(&t1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
ret = tsk_table_collection_copy(&t1, &t2, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(t1.edges.options & TSK_TABLE_NO_METADATA);
CU_ASSERT_TRUE(t2.edges.options & TSK_TABLE_NO_METADATA);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
reverse_edges(&t1);
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));
ret = tsk_table_collection_sort(&t1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
tsk_table_collection_free(&t1);
tsk_treeseq_free(&ts);
}
static void
test_sort_tables_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_bookmark_t pos;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&pos, 0, sizeof(pos));
/* Everything 0 should be fine */
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Everything is sorted already */
pos.edges = tables.edges.num_rows;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
pos.edges = (tsk_size_t) -1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
pos.edges = tables.edges.num_rows + 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tsk_memset(&pos, 0, sizeof(pos));
pos.migrations = (tsk_size_t) -1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
pos.migrations = tables.migrations.num_rows + 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
/* Node, population and provenance positions are ignored */
tsk_memset(&pos, 0, sizeof(pos));
pos.nodes = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&pos, 0, sizeof(pos));
pos.populations = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(&pos, 0, sizeof(pos));
pos.provenances = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Specifying only one of sites or mutations is an error */
tsk_memset(&pos, 0, sizeof(pos));
pos.sites = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
tsk_memset(&pos, 0, sizeof(pos));
pos.mutations = 1;
ret = tsk_table_collection_sort(&tables, &pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
/* Test TSK_ERR_MUTATION_PARENT_INCONSISTENT */
ret = tsk_table_collection_clear(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.0, "x", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, "a", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 3, 0.0, "b", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 1, 0.0, "c", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, "d", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_INCONSISTENT);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_sort_tables_mutation_times(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables, t1, t2;
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n"
"0.3 0\n";
const char *mutations = "0 0 1 -1 3\n"
"1 1 1 -1 3\n"
"2 4 1 -1 8\n"
"2 1 0 -1 4\n"
"2 2 1 -1 3\n"
"2 1 1 -1 2\n"
"3 6 1 -1 10\n";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
tables.nodes.time[4] = 6;
tables.nodes.time[5] = 8;
tables.nodes.time[6] = 10;
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
parse_mutations(mutations, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 4);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 7);
tables.sequence_length = 1.0;
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check to make sure we have legal mutations */
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(&t1, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
reverse_mutations(&t1);
CU_ASSERT_FALSE(tsk_table_collection_equals(&t1, &t2, 0));
ret = tsk_table_collection_sort(&t1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
tsk_table_collection_free(&t1);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_sort_tables_mutations(void)
{
int ret;
tsk_table_collection_t tables;
/* Sorting hierarchy:
* 1. site
* 2. time (when known)
* 3. node_time
* 4. num_descendants: parent mutations first
* 5. node_id
* 6. mutation_id
*/
const char *sites = "0.0 A\n"
"0.5 T\n"
"0.75 G\n";
const char *mutations_unsorted =
/* Test site criterion (primary) - site 1 should come after site 0 */
"1 0 X -1 0.0\n" /* mut 0: site 1, will be sorted after site 0 mutations */
"0 0 Y -1 0.0\n" /* mut 1: site 0, will be sorted before site 1 mutations */
/* Test time criterion - within same site, earlier time first */
"0 4 B -1 2.0\n" /* mut 2: site 0, node 4 (time 1.0), time 2.0 (later time)
*/
"0 5 A -1 2.5\n" /* mut 3: site 0, node 5 (time 2.0), time 2.5 (earlier
relative) */
/* Test unknown vs known times - unknown times at site 2, fall back to node_time
sorting */
"2 4 U2 -1\n" /* mut 4: site 2, node 4 (time 1.0), unknown time - falls back
to node_time */
"2 4 U3 -1\n" /* mut 5: site 2, node 4 (time 1.0), unknown time - should use
mutation_id as tiebreaker */
"2 5 U1 -1\n" /* mut 6: site 2, node 5 (time 2.0), unknown time - falls back
to node_time */
/* Test node_time criterion - same site, same mut time, different node times */
"0 4 D -1 1.5\n" /* mut 7: site 0, node 4 (time 1.0), mut time 1.5 */
"0 5 C -1 2.5\n" /* mut 8: site 0, node 5 (time 2.0), mut time 2.5 - same
mut time */
/* Test num_descendants criterion with mutation parent-child relationships */
"0 2 P -1 0.0\n" /* mut 9: site 0, node 2, parent mutation (0 descendants
initially) */
"0 1 C1 9 0.0\n" /* mut 10: site 0, node 1, child of mut 9 (parent now has
1+ descendants) */
"0 1 C2 9 0.0\n" /* mut 11: site 0, node 1, another child of mut 9 (parent
now has 2+ descendants) */
"0 3 Q -1 0.0\n" /* mut 12: site 0, node 3, no children (0 descendants) */
"0 0 C3 10 0.0\n" /* mut 13: site 0, node 0, child of mut 10 (making mut 9 a
grandparent) */
/* Test node and mutation_id criteria for final tiebreaking */
"0 0 Z1 -1 0.0\n" /* mut 14: site 0, node 0, no parent, will test node+id
ordering */
"0 0 Z2 -1 0.0\n"; /* mut 15: site 0, node 0, no parent, later in input =
higher ID */
const char *mutations_sorted =
/* Site 0 mutations - known times first, sorted by time */
"0 5 A -1 2.5\n"
"0 5 C -1 2.5\n"
"0 4 B -1 2.0\n"
"0 4 D -1 1.5\n"
"0 2 P -1 0.0\n"
"0 1 C1 4 0.0\n"
"0 0 Y -1 0.0\n"
"0 0 C3 5 0.0\n"
"0 0 Z1 -1 0.0\n"
"0 0 Z2 -1 0.0\n"
"0 1 C2 4 0.0\n"
"0 3 Q -1 0.0\n"
/* Site 1 mutations */
"1 0 X -1 0.0\n"
/* Site 2 mutations - unknown times, sorted by node_time then other criteria */
"2 5 U1 -1\n"
"2 4 U2 -1\n"
"2 4 U3 -1\n";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
parse_edges(single_tree_ex_edges, &tables.edges);
parse_sites(sites, &tables.sites);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 3);
parse_mutations(mutations_unsorted, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 16);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_t expected;
ret = tsk_table_collection_init(&expected, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
expected.sequence_length = 1.0;
parse_nodes(single_tree_ex_nodes, &expected.nodes);
parse_edges(single_tree_ex_edges, &expected.edges);
parse_sites(sites, &expected.sites);
parse_mutations(mutations_sorted, &expected.mutations);
CU_ASSERT_TRUE(tsk_mutation_table_equals(&tables.mutations, &expected.mutations, 0));
tsk_table_collection_free(&expected);
tsk_table_collection_free(&tables);
}
static void
test_sort_tables_canonical_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
tsk_id_t null_p[] = { -1 };
tsk_id_t zero_p[] = { 0 };
tsk_id_t one_p[] = { 1 };
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.0, "x", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, "a", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 3, 0.0, "b", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 1, 0.0, "c", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, "d", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_canonicalise(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_INCONSISTENT);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_FATAL(ret == 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 2, 0.0, "a", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 3, 0.0, "b", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 1, 0.0, "c", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, 0.0, "d", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_canonicalise(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_canonicalise(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);
ret = tsk_individual_table_clear(&tables.individuals);
CU_ASSERT_FATAL(ret == 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_canonicalise(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT);
ret = tsk_individual_table_clear(&tables.individuals);
CU_ASSERT_FATAL(ret == 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, null_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_canonicalise(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
}
static void
test_sort_tables_canonical(void)
{
int ret;
tsk_table_collection_t t1, t2;
// this is single_tree_ex with individuals and populations
const char *nodes = "1 0 -1 1\n"
"1 0 2 3\n"
"1 0 0 -1\n"
"1 0 -1 3\n"
"0 1 2 -1\n"
"0 2 -1 2\n"
"0 3 -1 -1\n";
const char *individuals = "0 0.0 1\n"
"0 1.0 -1\n"
"0 2.0 1,3\n"
"0 3.0 -1,1\n";
const char *sites = "0 0\n"
"0.2 0\n"
"0.1 0\n";
const char *mutations = "0 0 2 3 0.5\n"
"2 1 1 -1 0.5\n"
"1 4 3 -1 3\n"
"0 4 1 -1 2.5\n"
"2 2 1 -1 2\n"
"1 1 5 7 0.5\n"
"1 2 1 -1 2\n"
"1 1 4 2 0.5\n"
"1 1 6 7 0.5\n";
const char *nodes_sorted = "1 0 -1 0\n"
"1 0 0 1\n"
"1 0 1 -1\n"
"1 0 -1 1\n"
"0 1 0 -1\n"
"0 2 -1 2\n"
"0 3 -1 -1\n";
const char *individuals_sorted = "0 1.0 -1\n"
"0 3.0 -1,0\n"
"0 2.0 0,1\n";
const char *sites_sorted = "0 0\n"
"0.1 0\n"
"0.2 0\n";
const char *mutations_sorted = "0 4 1 -1 2.5\n"
"0 0 2 0 0.5\n"
"1 2 1 -1 2\n"
"1 1 1 -1 0.5\n"
"2 4 3 -1 3\n"
"2 2 1 -1 2\n"
"2 1 4 4 0.5\n"
"2 1 5 6 0.5\n"
"2 1 6 6 0.5\n";
const char *individuals_sorted_kept = "0 1.0 -1\n"
"0 3.0 -1,0\n"
"0 2.0 0,1\n"
"0 0.0 0\n";
ret = tsk_table_collection_init(&t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
t1.sequence_length = 1.0;
ret = tsk_table_collection_init(&t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
t2.sequence_length = 1.0;
parse_nodes(nodes, &t1.nodes);
CU_ASSERT_EQUAL_FATAL(t1.nodes.num_rows, 7);
parse_individuals(individuals, &t1.individuals);
CU_ASSERT_EQUAL_FATAL(t1.individuals.num_rows, 4);
tsk_population_table_add_row(&t1.populations, "A", 1);
tsk_population_table_add_row(&t1.populations, "B", 1);
tsk_population_table_add_row(&t1.populations, "C", 1);
parse_edges(single_tree_ex_edges, &t1.edges);
CU_ASSERT_EQUAL_FATAL(t1.edges.num_rows, 6);
parse_sites(sites, &t1.sites);
CU_ASSERT_EQUAL_FATAL(t1.sites.num_rows, 3);
parse_mutations(mutations, &t1.mutations);
CU_ASSERT_EQUAL_FATAL(t1.mutations.num_rows, 9);
ret = tsk_table_collection_canonicalise(&t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
parse_nodes(nodes_sorted, &t2.nodes);
tsk_population_table_add_row(&t2.populations, "C", 1);
tsk_population_table_add_row(&t2.populations, "A", 1);
CU_ASSERT_EQUAL_FATAL(t2.nodes.num_rows, 7);
parse_individuals(individuals_sorted, &t2.individuals);
CU_ASSERT_EQUAL_FATAL(t2.individuals.num_rows, 3);
parse_edges(single_tree_ex_edges, &t2.edges);
CU_ASSERT_EQUAL_FATAL(t2.edges.num_rows, 6);
parse_sites(sites_sorted, &t2.sites);
parse_mutations(mutations_sorted, &t2.mutations);
CU_ASSERT_EQUAL_FATAL(t2.sites.num_rows, 3);
CU_ASSERT_EQUAL_FATAL(t2.mutations.num_rows, 9);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
ret = tsk_table_collection_clear(&t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_clear(&t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// now with KEEP_UNREFERENCED
parse_nodes(nodes, &t1.nodes);
parse_individuals(individuals, &t1.individuals);
tsk_population_table_add_row(&t1.populations, "A", 1);
tsk_population_table_add_row(&t1.populations, "B", 1);
tsk_population_table_add_row(&t1.populations, "C", 1);
parse_edges(single_tree_ex_edges, &t1.edges);
parse_sites(sites, &t1.sites);
parse_mutations(mutations, &t1.mutations);
ret = tsk_table_collection_canonicalise(&t1, TSK_SUBSET_KEEP_UNREFERENCED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
parse_nodes(nodes_sorted, &t2.nodes);
tsk_population_table_add_row(&t2.populations, "C", 1);
tsk_population_table_add_row(&t2.populations, "A", 1);
tsk_population_table_add_row(&t2.populations, "B", 1);
parse_individuals(individuals_sorted_kept, &t2.individuals);
CU_ASSERT_EQUAL_FATAL(t2.individuals.num_rows, 4);
parse_edges(single_tree_ex_edges, &t2.edges);
parse_sites(sites_sorted, &t2.sites);
parse_mutations(mutations_sorted, &t2.mutations);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t2);
tsk_table_collection_free(&t1);
}
static void
test_sort_tables_migrations(void)
{
int ret;
tsk_treeseq_t *ts;
tsk_table_collection_t tables, copy;
ts = caterpillar_tree(13, 1, 1);
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tables.migrations.num_rows > 0);
ret = tsk_table_collection_copy(&tables, ©, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
reverse_migrations(&tables);
CU_ASSERT_FATAL(!tsk_table_collection_equals(&tables, ©, 0));
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_migration_table_equals(&tables.migrations, ©.migrations, 0));
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
/* Make sure we test the deeper comparison keys. The full key is
* (time, source, dest, left, node) */
tsk_migration_table_clear(&tables.migrations);
/* params = left, right, node, source, dest, time */
tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 0, NULL, 0);
tsk_migration_table_add_row(&tables.migrations, 0, 1, 1, 0, 1, 0, NULL, 0);
ret = tsk_migration_table_copy(&tables.migrations, ©.migrations, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
reverse_migrations(&tables);
CU_ASSERT_FATAL(!tsk_table_collection_equals(&tables, ©, 0));
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_migration_table_equals(&tables.migrations, ©.migrations, 0));
tsk_table_collection_free(&tables);
tsk_table_collection_free(©);
tsk_treeseq_free(ts);
free(ts);
}
static void
test_sort_tables_individuals(void)
{
int ret;
tsk_table_collection_t tables, copy;
const char *individuals = "1 0.25 2,3 0\n"
"2 0.5 5,-1 1\n"
"3 0.3 -1 2\n"
"4 0.3 -1 3\n"
"5 0.3 3 4\n"
"6 0.3 4 5\n";
const char *individuals_cycle = "1 0.2 2 0\n"
"2 0.5 0 1\n"
"3 0.3 1 2\n";
const tsk_id_t bad_parents[] = { 200 };
tsk_id_t ret_id;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_individuals(individuals, &tables.individuals);
ret = tsk_table_collection_copy(&tables, ©, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Table sort doesn't touch individuals by default*/
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
/* Not calling with TSK_CHECK_TREES so casting is safe */
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check that the sort is stable */
tsk_table_collection_free(©);
ret = tsk_table_collection_copy(&tables, ©, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, ©, 0));
/* Errors on bad table */
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, bad_parents, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 6);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
/* Errors on cycle */
tsk_individual_table_clear(&tables.individuals);
parse_individuals(individuals_cycle, &tables.individuals);
ret = tsk_table_collection_individual_topological_sort(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_PARENT_CYCLE);
tsk_table_collection_free(&tables);
tsk_table_collection_free(©);
}
static void
test_sorter_interface(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_table_sorter_t sorter;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));
/* Nominal case */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0));
ret = tsk_table_sorter_init(&sorter, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));
CU_ASSERT_EQUAL(sorter.user_data, NULL);
tsk_table_sorter_free(&sorter);
/* If we set the sort_edges function to NULL then we should leave the
* node table as is. */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0));
ret = tsk_table_sorter_init(&sorter, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sorter.sort_edges = NULL;
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0));
tsk_table_sorter_free(&sorter);
/* Reversing again should make them equal */
reverse_edges(&tables);
CU_ASSERT_TRUE(tsk_edge_table_equals(&ts.tables->edges, &tables.edges, 0));
/* Do not check integrity before sorting */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0));
ret = tsk_table_sorter_init(&sorter, &tables, TSK_NO_CHECK_INTEGRITY);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));
tsk_table_sorter_free(&sorter);
/* The user_data shouldn't be touched */
reverse_edges(&tables);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, &tables, 0));
ret = tsk_table_sorter_init(&sorter, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sorter.user_data = (void *) &ts;
ret = tsk_table_sorter_run(&sorter, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, &tables, 0));
CU_ASSERT_EQUAL_FATAL(sorter.user_data, &ts);
tsk_table_sorter_free(&sorter);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_dump_unindexed_with_options(tsk_flags_t tc_options)
{
tsk_table_collection_t tables, loaded;
int ret;
ret = tsk_table_collection_init(&tables, tc_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
ret = tsk_table_collection_dump(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
ret = tsk_table_collection_load(&loaded, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&loaded, 0));
CU_ASSERT_TRUE(tsk_node_table_equals(&tables.nodes, &loaded.nodes, 0));
CU_ASSERT_TRUE(tsk_edge_table_equals(&tables.edges, &loaded.edges, 0));
tsk_table_collection_free(&loaded);
tsk_table_collection_free(&tables);
}
static void
test_dump_unindexed(void)
{
test_dump_unindexed_with_options(0);
test_dump_unindexed_with_options(TSK_TC_NO_EDGE_METADATA);
}
static void
test_dump_load_empty_with_options(tsk_flags_t tc_options)
{
int ret;
tsk_table_collection_t t1, t2;
ret = tsk_table_collection_init(&t1, tc_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
t1.sequence_length = 1.0;
ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
}
static void
test_dump_load_empty(void)
{
test_dump_load_empty_with_options(0);
test_dump_load_empty_with_options(TSK_TC_NO_EDGE_METADATA);
}
static void
test_dump_load_unsorted_with_options(tsk_flags_t tc_options)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t t1, t2;
ret = tsk_table_collection_init(&t1, tc_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
t1.sequence_length = 1.0;
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 1, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 2, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 4);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
/* Verify that it's unsorted */
ret = (int) tsk_table_collection_check_integrity(&t1, TSK_CHECK_EDGE_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);
ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&t1, 0));
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
CU_ASSERT_FALSE(tsk_table_collection_has_index(&t1, 0));
CU_ASSERT_FALSE(tsk_table_collection_has_index(&t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
}
static void
test_dump_load_unsorted(void)
{
test_dump_load_unsorted_with_options(0);
test_dump_load_unsorted_with_options(TSK_TC_NO_EDGE_METADATA);
}
static void
test_dump_load_metadata_schema(void)
{
int ret;
tsk_table_collection_t t1, t2;
ret = tsk_table_collection_init(&t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
t1.sequence_length = 1.0;
char example[100] = "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_length = (tsk_size_t) strlen(example) + 4;
tsk_node_table_set_metadata_schema(
&t1.nodes, strcat(example, "node"), example_length);
tsk_edge_table_set_metadata_schema(
&t1.edges, strcat(example, "edge"), example_length);
tsk_site_table_set_metadata_schema(
&t1.sites, strcat(example, "site"), example_length);
tsk_mutation_table_set_metadata_schema(
&t1.mutations, strcat(example, "muta"), example_length);
tsk_migration_table_set_metadata_schema(
&t1.migrations, strcat(example, "migr"), example_length);
tsk_individual_table_set_metadata_schema(
&t1.individuals, strcat(example, "indi"), example_length);
tsk_population_table_set_metadata_schema(
&t1.populations, strcat(example, "popu"), example_length);
ret = tsk_table_collection_dump(&t1, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&t2, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
}
static void
test_dump_fail_no_file(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t t1;
ret = tsk_table_collection_init(&t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
t1.sequence_length = 1.0;
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 1, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
ret_id = tsk_node_table_add_row(
&t1.nodes, TSK_NODE_IS_SAMPLE, 2, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 4);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 3, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_edge_table_add_row(&t1.edges, 0, 1, 4, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
/* Verify that it's unsorted */
ret = (int) tsk_table_collection_check_integrity(&t1, TSK_CHECK_EDGE_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);
/* Make sure the file doesn't exist beforehand. */
unlink(_tmp_file_name);
errno = 0;
CU_ASSERT_EQUAL(access(_tmp_file_name, F_OK), -1);
tsk_table_collection_free(&t1);
}
static void
test_load_reindex(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_dump(&ts, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_drop_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0));
ret = tsk_table_collection_drop_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Dump the unindexed version */
ret = tsk_table_collection_dump(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_free(&tables);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_load(&tables, _tmp_file_name, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_has_index(&tables, 0));
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_table_overflow(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_size_t max_rows = ((tsk_size_t) TSK_MAX_ID);
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Simulate overflows */
tables.individuals.max_rows = max_rows;
tables.individuals.num_rows = max_rows;
ret_id
= tsk_individual_table_add_row(&tables.individuals, 0, 0, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.nodes.max_rows = max_rows;
tables.nodes.num_rows = max_rows;
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.edges.max_rows = max_rows;
tables.edges.num_rows = max_rows;
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.migrations.max_rows = max_rows;
tables.migrations.num_rows = max_rows;
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.sites.max_rows = max_rows;
tables.sites.num_rows = max_rows;
ret_id = tsk_site_table_add_row(&tables.sites, 0, 0, 0, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.mutations.max_rows = max_rows;
tables.mutations.num_rows = max_rows;
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, 0, 0, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.provenances.max_rows = max_rows;
tables.provenances.num_rows = max_rows;
ret_id = tsk_provenance_table_add_row(&tables.provenances, 0, 0, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tables.populations.max_rows = max_rows;
tables.populations.num_rows = max_rows;
ret_id = tsk_population_table_add_row(&tables.populations, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLE_OVERFLOW);
tsk_table_collection_free(&tables);
}
static void
test_column_overflow(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_size_t too_big = TSK_MAX_SIZE;
double zero = 0;
char zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
tsk_id_t id_zeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// location
/* We can't trigger a column overflow with one element because the parameter
* value is 32 bit */
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, &zero, 1, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
// Check normal overflow from additional length
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, too_big, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
// Check overflow from minimum increment
ret = tsk_individual_table_set_max_location_length_increment(
&tables.individuals, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
// parents
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, id_zeros, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, too_big, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_individual_table_set_max_parents_length_increment(
&tables.individuals, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
// metadata
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_individual_table_set_max_metadata_length_increment(
&tables.individuals, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_node_table_set_max_metadata_length_increment(&tables.nodes, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_edge_table_set_max_metadata_length_increment(&tables.edges, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 0, 0, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id = tsk_site_table_add_row(&tables.sites, 0, zeros, 1, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
// ancestral state
ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, too_big, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_site_table_set_max_ancestral_state_length_increment(
&tables.sites, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
// metadata
ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_site_table_set_max_metadata_length_increment(&tables.sites, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0, NULL, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, zeros, 1, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
// derived state
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, 0, 0, NULL, too_big, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_mutation_table_set_max_derived_state_length_increment(
&tables.mutations, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, NULL, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
// metadata
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, 0, 0, NULL, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_mutation_table_set_max_metadata_length_increment(
&tables.mutations, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, 0, NULL, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id = tsk_provenance_table_add_row(&tables.provenances, zeros, 1, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0)
// timestamp
ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, too_big, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_provenance_table_set_max_timestamp_length_increment(
&tables.provenances, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
// record
ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_provenance_table_set_max_record_length_increment(
&tables.provenances, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_provenance_table_add_row(&tables.provenances, NULL, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id = tsk_population_table_add_row(&tables.populations, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_population_table_set_max_metadata_length_increment(
&tables.populations, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, zeros, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, too_big);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
ret = tsk_migration_table_set_max_metadata_length_increment(
&tables.migrations, too_big);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 0, 0, 0, 0, 0, NULL, 1);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_COLUMN_OVERFLOW);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_check_integrity_with_options(tsk_flags_t tc_options)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
const char *individuals = "1 0.25 -1\n"
"2 0.5,0.25 2\n"
"3 0.5,0.25 0\n";
ret = tsk_table_collection_init(&tables, tc_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
/* nodes */
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, INFINITY, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* Not calling with TSK_CHECK_TREES so casting is safe */
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);
ret = tsk_node_table_clear(&tables.nodes);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, ret_id);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_NO_CHECK_POPULATION_REFS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = tsk_node_table_clear(&tables.nodes);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, ret_id);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
ret = tsk_node_table_clear(&tables.nodes);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* edges */
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, TSK_NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_PARENT);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_CHILD);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, INFINITY, 1, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, -1.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_LEFT_LESS_ZERO);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.1, 1, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.5, 0.1, 1, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 0.5, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);
ret = tsk_edge_table_clear(&tables.edges);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* sites */
ret_id = tsk_site_table_add_row(&tables.sites, INFINITY, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);
ret = tsk_site_table_clear(&tables.sites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, -0.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);
ret = tsk_site_table_clear(&tables.sites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 1.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SITE_POSITION);
ret = tsk_site_table_clear(&tables.sites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_SITE_DUPLICATES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SITE_POSITION);
ret = tsk_site_table_clear(&tables.sites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.4, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_SITE_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);
ret = tsk_site_table_clear(&tables.sites);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.6, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
/* mutations */
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 2, 0, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 2, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* A mixture of known and unknown times on a site fails */
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN);
/* But on different sites, passes */
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 0, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 1, 2, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 1, 0, 1.0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_EQUAL);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id
= tsk_mutation_table_add_row(&tables.mutations, 0, 1, 1, 1.0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 1, TSK_NULL, 1.0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, 0, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
/* Unknown times pass */
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Correctly ordered times pass */
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Incorrectly ordered times fail */
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
/* Putting incorrectly ordered times on diff sites passes */
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 0, TSK_NULL, 2, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 0, TSK_NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, NAN, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, INFINITY, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, TSK_NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, TSK_NULL, 1, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(&tables.mutations, 1, 1, 0, 2, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);
ret = tsk_mutation_table_clear(&tables.mutations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MUTATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* migrations */
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 0.5, 2, 0, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 0.5, 1, 2, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 0.5, 1, 0, 2, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 0.5, 1, 0, 1, INFINITY, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 0.5, 1, 0, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 0.5, 1, 1, 0, 0.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_MIGRATION_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MIGRATIONS);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, INFINITY, 1, 0, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, -0.3, 0.5, 1, 0, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_LEFT_LESS_ZERO);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.0, 1.5, 1, 0, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.6, 0.5, 1, 0, 1, 1.5, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);
ret = tsk_migration_table_clear(&tables.migrations);
CU_ASSERT_EQUAL_FATAL(ret, 0);
parse_individuals(individuals, &tables.individuals);
CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 3);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_CHECK_INDIVIDUAL_ORDERING);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_INDIVIDUALS);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check that an individual can't be its own parent */
tables.individuals.parents[0] = 0;
tables.individuals.parents[1] = 1;
tables.individuals.parents[2] = 2;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT);
tables.individuals.parents[0] = -2;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_check_integrity_no_populations(void)
{
int ret;
tsk_id_t ret_id;
tsk_id_t ret_num_trees;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Add in some bad population references and check that we can use
* TSK_NO_CHECK_POPULATION_REFS with TSK_CHECK_TREES */
tables.nodes.population[0] = 10;
/* Not calling with TSK_CHECK_TREES so casting is safe */
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_NO_CHECK_POPULATION_REFS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_num_trees = tsk_table_collection_check_integrity(
&tables, TSK_CHECK_TREES | TSK_NO_CHECK_POPULATION_REFS);
/* CHECK_TREES returns the number of trees */
CU_ASSERT_EQUAL_FATAL(ret_num_trees, 3);
tables.nodes.population[0] = TSK_NULL;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_migration_table_add_row(
&tables.migrations, 0.4, 0.5, 1, 0, 1, 1.5, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = (int) tsk_table_collection_check_integrity(
&tables, TSK_NO_CHECK_POPULATION_REFS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_num_trees = tsk_table_collection_check_integrity(
&tables, TSK_CHECK_TREES | TSK_NO_CHECK_POPULATION_REFS);
CU_ASSERT_EQUAL_FATAL(ret_num_trees, 3);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_table_collection_check_integrity(void)
{
test_table_collection_check_integrity_with_options(0);
test_table_collection_check_integrity_with_options(TSK_TC_NO_EDGE_METADATA);
}
static void
test_table_collection_check_integrity_bad_indexes_example(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
/* We start with a concrete example where you can get bad trees
* by building some valid tables, clearing the edges, and then
* building new ones without rebuilding the indexes. */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 5;
/* nodes */
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
/* edges */
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 5.0, 2, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 5.0, 2, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
/* build index */
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* okay now build a new table without rebuilding the indexes */
tsk_edge_table_clear(&tables.edges);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* make sure we don't use too-long indexes */
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_NOT_INDEXED);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 4.0, 2, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
/* should error, as tree sequence will be wrong */
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_check_integrity_bad_indexes(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
/* Now hit some other weird cases by manipulating the indexes directly */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 5;
/* nodes */
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
/* edges */
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 1.0, 2.0, 2, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_edge_table_add_row(&tables.edges, 2.0, 5.0, 2, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_edge_table_add_row(&tables.edges, 1.0, 3.0, 2, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
/* build index */
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT(ret_id > 0);
/* edge removed before it is added */
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.indexes.edge_insertion_order[0] = 1;
tables.indexes.edge_insertion_order[2] = 0;
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);
/* edge added twice (implies another is never added) */
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.indexes.edge_insertion_order[0] = 0;
tables.indexes.edge_insertion_order[1] = 0;
tables.indexes.edge_removal_order[0] = 1;
tables.indexes.edge_removal_order[2] = 2;
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);
/* edge never removed but should have been */
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.indexes.edge_removal_order[0] = 0;
tables.indexes.edge_removal_order[1] = 1;
tables.indexes.edge_removal_order[2] = 2;
tables.indexes.edge_removal_order[3] = 3;
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);
/* edge progression out of order */
tables.edges.right[2] = 4.0;
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);
/* edge never used */
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.indexes.edge_insertion_order[0] = 0;
tables.indexes.edge_insertion_order[1] = 3;
tables.indexes.edge_insertion_order[2] = 0;
tables.indexes.edge_insertion_order[3] = 3;
tables.indexes.edge_removal_order[0] = 0;
tables.indexes.edge_removal_order[1] = 3;
tables.indexes.edge_removal_order[2] = 0;
tables.indexes.edge_removal_order[3] = 3;
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_BAD_INDEXES);
/* make sure we don't use the too-short indexes */
ret_id = tsk_edge_table_add_row(&tables.edges, 4.0, 5.0, 2, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 4);
ret_id = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_TABLES_NOT_INDEXED);
tsk_table_collection_free(&tables);
}
static void
test_check_integrity_bad_mutation_parent_topology(void)
{
int ret;
tsk_id_t ret_trees;
tsk_table_collection_t tables;
const char *sites = "0 0\n";
/* Make a mutation on a parallel branch the parent*/
const char *bad_mutations = "0 0 1 -1\n"
"0 1 1 0\n";
/* A mutation above is set as child*/
const char *reverse_mutations = "0 0 1 -1\n"
"0 4 1 0\n";
const char *reverse_sites = "0.5 0\n"
"0 0\n";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 1);
parse_mutations(bad_mutations, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 2);
tables.sequence_length = 1.0;
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_trees, 1);
ret_trees
= tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_BAD_MUTATION_PARENT);
parse_mutations(reverse_mutations, &tables.mutations);
ret_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_trees, 1);
ret_trees
= tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
/* Now check that TSK_CHECK_MUTATION_PARENTS implies TSK_CHECK_TREES
by triggering an error with reversed sites */
parse_sites(reverse_sites, &tables.sites);
ret_trees
= tsk_table_collection_check_integrity(&tables, TSK_CHECK_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret_trees, TSK_ERR_UNSORTED_SITES);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_compute_mutation_parents_tolerates_invalid_input(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_id_t site;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
site = tsk_site_table_add_row(&tables.sites, 0.0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(site >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, site, 1, TSK_NULL, TSK_UNKNOWN_TIME, "C", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.mutations.parent[0] = 42;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tables.mutations.parent[0] == TSK_NULL);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_compute_mutation_parents_restores_on_error(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_id_t site;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
site = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
CU_ASSERT_FATAL(site >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, site, 1, TSK_NULL, TSK_UNKNOWN_TIME, "C", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, site, 0, TSK_NULL, TSK_UNKNOWN_TIME, "G", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.mutations.parent[0] = 111;
tables.mutations.parent[1] = 222;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
CU_ASSERT_EQUAL(tables.mutations.parent[0], 111);
CU_ASSERT_EQUAL(tables.mutations.parent[1], 222);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_subset_with_options(tsk_flags_t options)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables_copy;
int k;
tsk_id_t nodes[4];
tsk_id_t zero_p[] = { 0 };
tsk_id_t one_p[] = { 1 };
ret = tsk_table_collection_init(&tables, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_init(&tables_copy, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// does not error on empty tables
ret = tsk_table_collection_subset(&tables, NULL, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// four nodes from two diploids; the first is from pop 0
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 2.0, TSK_NULL, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// unused individual who is the parent of others
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, zero_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// unused individual
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// unused population
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.4, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// unused site
ret_id = tsk_site_table_add_row(&tables.sites, 0.5, "C", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, 0, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// empty nodes should get empty tables
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(&tables_copy, NULL, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.individuals.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);
// unless NO_CHANGE_POPULATIONS is provided
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(
&tables_copy, NULL, 0, TSK_SUBSET_NO_CHANGE_POPULATIONS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.individuals.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);
CU_ASSERT_FATAL(
tsk_population_table_equals(&tables.populations, &tables_copy.populations, 0));
// or KEEP_UNREFERENCED
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(
&tables_copy, NULL, 0, TSK_SUBSET_KEEP_UNREFERENCED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);
CU_ASSERT_FATAL(
tsk_individual_table_equals(&tables.individuals, &tables_copy.individuals, 0));
CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);
CU_ASSERT_FATAL(tsk_site_table_equals(&tables.sites, &tables_copy.sites, 0));
// or both
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(&tables_copy, NULL, 0,
TSK_SUBSET_KEEP_UNREFERENCED | TSK_SUBSET_NO_CHANGE_POPULATIONS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, 0);
CU_ASSERT_FATAL(
tsk_individual_table_equals(&tables.individuals, &tables_copy.individuals, 0));
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, 0);
CU_ASSERT_FATAL(
tsk_population_table_equals(&tables.populations, &tables_copy.populations, 0));
CU_ASSERT_FATAL(tsk_site_table_equals(&tables.sites, &tables_copy.sites, 0));
// the identity transformation, since unused pops are at the end
for (k = 0; k < 4; k++) {
nodes[k] = k;
}
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(
&tables_copy, nodes, 4, TSK_SUBSET_KEEP_UNREFERENCED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
// or, remove unused things:
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_node_table_equals(&tables.nodes, &tables_copy.nodes, 0));
CU_ASSERT_EQUAL_FATAL(tables_copy.individuals.num_rows, 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, 1);
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, 2);
CU_ASSERT_FATAL(
tsk_mutation_table_equals(&tables.mutations, &tables_copy.mutations, 0));
// reverse twice should get back to the start, since unused pops are at the end
for (k = 0; k < 4; k++) {
nodes[k] = 3 - k;
}
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(
&tables_copy, nodes, 4, TSK_SUBSET_KEEP_UNREFERENCED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(
&tables_copy, nodes, 4, TSK_SUBSET_KEEP_UNREFERENCED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
tsk_table_collection_free(&tables_copy);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_subset(void)
{
test_table_collection_subset_with_options(0);
test_table_collection_subset_with_options(TSK_TC_NO_EDGE_METADATA);
}
static void
test_table_collection_subset_unsorted(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables_copy;
int k;
tsk_id_t nodes[3];
tsk_id_t one_p[] = { 1 };
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_init(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// these tables are a big mess
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, TSK_NULL, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.0, TSK_NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, one_p, 1, NULL, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 0.5, 2, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.5, 1.0, 2, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.4, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, 2, TSK_UNKNOWN_TIME, "B", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// but still, this should leave them unchanged
for (k = 0; k < 3; k++) {
nodes[k] = k;
}
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(
&tables_copy, nodes, 3, TSK_SUBSET_KEEP_UNREFERENCED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
tsk_table_collection_free(&tables_copy);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_subset_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables_copy;
tsk_id_t nodes[4] = { 0, 1, 2, 3 };
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_init(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// four nodes from two diploids; the first is from pop 0
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 2.0, TSK_NULL, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Migrations are not supported */
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_migration_table_add_row(&tables_copy.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.migrations.num_rows, 1);
ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
// test out of bounds nodes
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
nodes[0] = -1;
ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
nodes[0] = 6;
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
// check integrity
nodes[0] = 0;
nodes[1] = 1;
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_truncate(&tables_copy.nodes, 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_node_table_add_row(
&tables_copy.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_subset(&tables_copy, nodes, 4, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_table_collection_free(&tables);
tsk_table_collection_free(&tables_copy);
}
static void
test_table_collection_union(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables_empty;
tsk_table_collection_t tables_copy;
tsk_id_t node_mapping[3];
tsk_id_t parents[2] = { -1, -1 };
char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
tsk_memset(node_mapping, 0xff, sizeof(node_mapping));
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_init(&tables_empty, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables_empty.sequence_length = 1;
ret = tsk_table_collection_init(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// does not error on empty tables
ret = tsk_table_collection_union(&tables, &tables_empty, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// does not error on empty tables but that differ on top level metadata
ret = tsk_table_collection_set_metadata(
&tables, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_union(&tables, &tables_empty, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// three nodes, two pop, three ind, two edge, two site, two mut
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 2, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, parents, 2, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
parents[0] = 0;
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, parents, 2, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
parents[1] = 1;
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, parents, 2, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.4, "T", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_sort(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// union with empty should not change
// other is empty
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_union(
&tables_copy, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
// self is empty
ret = tsk_table_collection_clear(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_union(
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
// union all shared nodes + subset original nodes = original table
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_union(
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
CU_ASSERT_EQUAL_FATAL(ret, 0);
node_mapping[0] = 0;
node_mapping[1] = 1;
node_mapping[2] = 2;
ret = tsk_table_collection_subset(&tables_copy, node_mapping, 3, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy, 0));
// union with one shared node
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
node_mapping[0] = TSK_NULL;
node_mapping[1] = TSK_NULL;
node_mapping[2] = 2;
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(
tables_copy.populations.num_rows, tables.populations.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(
tables_copy.individuals.num_rows, tables.individuals.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, tables.nodes.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.edges.num_rows, tables.edges.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, tables.sites.num_rows);
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, tables.mutations.num_rows + 2);
// union with one shared node, but no add pop
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
node_mapping[0] = TSK_NULL;
node_mapping[1] = TSK_NULL;
node_mapping[2] = 2;
ret = tsk_table_collection_union(
&tables_copy, &tables, node_mapping, TSK_UNION_NO_ADD_POP);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, tables.populations.num_rows);
CU_ASSERT_EQUAL_FATAL(
tables_copy.individuals.num_rows, tables.individuals.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, tables.nodes.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.edges.num_rows, tables.edges.num_rows + 2);
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, tables.sites.num_rows);
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, tables.mutations.num_rows + 2);
tsk_table_collection_free(&tables_copy);
tsk_table_collection_free(&tables_empty);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_disjoint_union(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables1;
tsk_table_collection_t tables2;
tsk_table_collection_t tables12;
tsk_id_t node_mapping[4];
tsk_memset(node_mapping, 0xff, sizeof(node_mapping));
ret = tsk_table_collection_init(&tables1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables1.sequence_length = 2;
// set up nodes, which will be shared
// flags, time, pop, ind, metadata, metadata_length
ret_id = tsk_node_table_add_row(
&tables1.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tables1.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tables1.nodes, 0, 0.5, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tables1.nodes, 0, 1.5, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_copy(&tables1, &tables2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// for tables1:
// on [0, 1] we have 0, 1 inherit from 2
// left, right, parent, child, metadata, metadata_length
ret_id = tsk_edge_table_add_row(&tables1.edges, 0.0, 1.0, 2, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables1.edges, 0.0, 1.0, 2, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables1.sites, 0.4, "T", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables1.mutations, ret_id, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_sort(&tables1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// all this goes in tables12 so far
ret = tsk_table_collection_copy(&tables1, &tables12, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// for tables2; and need to add to tables12 also:
// on [1, 2] we have 0, 1 inherit from 3
// left, right, parent, child, metadata, metadata_length
ret_id = tsk_edge_table_add_row(&tables2.edges, 1.0, 2.0, 3, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables2.edges, 1.0, 2.0, 3, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables2.sites, 1.4, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables2.mutations, ret_id, 1, TSK_NULL, TSK_UNKNOWN_TIME, "T", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_sort(&tables2, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// also tables12
ret_id = tsk_edge_table_add_row(&tables12.edges, 1.0, 2.0, 3, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables12.edges, 1.0, 2.0, 3, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables12.sites, 1.4, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables12.mutations, ret_id, 1, TSK_NULL, TSK_UNKNOWN_TIME, "T", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables12, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_sort(&tables12, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// now disjoint union-ing tables1 and tables2 should get tables12
ret = tsk_table_collection_copy(&tables1, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
node_mapping[0] = 0;
node_mapping[1] = 1;
node_mapping[2] = 2;
node_mapping[3] = 3;
ret = tsk_table_collection_union(&tables, &tables2, node_mapping,
TSK_UNION_NO_CHECK_SHARED | TSK_UNION_ALL_EDGES | TSK_UNION_ALL_MUTATIONS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(
tsk_table_collection_equals(&tables, &tables12, TSK_CMP_IGNORE_PROVENANCE));
tsk_table_collection_free(&tables12);
tsk_table_collection_free(&tables2);
tsk_table_collection_free(&tables1);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_union_middle_merge(void)
{
/* Test ability to have non-shared history both above and below the
* shared bits. The full genealogy, in `tu`, is:
* 3 4
* \ /
* 2
* / \
* 0 1
* and the left lineage is in `ta` and right in `tb` */
int ret;
tsk_id_t ret_id;
tsk_id_t node_mapping[] = { TSK_NULL, 1, TSK_NULL };
tsk_id_t node_order[] = { 0, 3, 1, 2, 4 };
tsk_table_collection_t ta, tb, tu;
ret = tsk_table_collection_init(&ta, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ta.sequence_length = 1;
ret = tsk_table_collection_init(&tb, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tb.sequence_length = 1;
ret = tsk_table_collection_init(&tu, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tu.sequence_length = 1;
ret_id = tsk_node_table_add_row(
&tu.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node u0
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&ta.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node a0 = u0
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tu.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node u1
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tb.nodes, TSK_NODE_IS_SAMPLE, 0, TSK_NULL, TSK_NULL, NULL, 0); // node b0 = u1
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tu.nodes, 0, 1, TSK_NULL, TSK_NULL, NULL, 0); // node u2
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tu.edges, 0, 1, 2, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tu.edges, 0, 1, 2, 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&ta.nodes, 0, 1, TSK_NULL, TSK_NULL, NULL, 0); // node a1 = u2
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&ta.edges, 0, 1, 1, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tb.nodes, 0, 1, TSK_NULL, TSK_NULL, NULL, 0); // node b1 = u2
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tb.edges, 0, 1, 1, 0, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tu.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node u3
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tu.edges, 0, 0.5, 3, 2, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&ta.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node a2 = u3
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&ta.edges, 0, 0.5, 2, 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tu.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node u4
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tu.edges, 0.5, 1, 4, 2, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_node_table_add_row(
&tb.nodes, 0, 2, TSK_NULL, TSK_NULL, NULL, 0); // node b2 = u4
CU_ASSERT(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tb.edges, 0.5, 1, 2, 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&ta.sites, 0.25, "A", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&ta.sites, 0.75, "X", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tb.sites, 0.25, "A", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tb.sites, 0.75, "X", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tu.sites, 0.25, "A", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tu.sites, 0.75, "X", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tu.mutations, 0, 3, TSK_NULL, 3.5, "B", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&ta.mutations, 0, 2, TSK_NULL, 3.5, "B", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tu.mutations, 0, 2, TSK_NULL, 1.5, "D", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&ta.mutations, 0, 1, TSK_NULL, 1.5, "D", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tb.mutations, 0, 1, TSK_NULL, 1.5, "D", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tu.mutations, 0, 2, TSK_NULL, 1.2, "E", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&ta.mutations, 0, 1, TSK_NULL, 1.2, "E", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tb.mutations, 0, 1, TSK_NULL, 1.2, "E", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tu.mutations, 0, 0, TSK_NULL, 0.5, "C", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&ta.mutations, 0, 0, TSK_NULL, 0.5, "C", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tu.mutations, 1, 4, TSK_NULL, 2.4, "Y", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tb.mutations, 1, 2, TSK_NULL, 2.4, "Y", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tu.mutations, 1, 1, TSK_NULL, 0.4, "Z", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tb.mutations, 1, 0, TSK_NULL, 0.4, "Z", 1, NULL, 0);
CU_ASSERT(ret_id >= 0);
ret = tsk_table_collection_build_index(&ta, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&ta, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_build_index(&tb, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&tb, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_build_index(&tu, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&tu, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_union(&ta, &tb, node_mapping, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_table_collection_subset(&ta, node_order, 5, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_provenance_table_clear(&ta.provenances);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_FATAL(tsk_table_collection_equals(&tu, &ta, 0));
tsk_table_collection_free(&ta);
tsk_table_collection_free(&tb);
tsk_table_collection_free(&tu);
}
static void
test_table_collection_union_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_table_collection_t tables_copy;
tsk_id_t node_mapping[] = { 0, 1 };
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_init(&tables_copy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
// two nodes, two pop, two ind, one edge, one site, one mut
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
// trigger diff histories error
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_mutation_table_add_row(
&tables_copy.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_DIFF_HISTORIES);
// Migrations are not supported
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_migration_table_add_row(&tables_copy.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(tables_copy.migrations.num_rows, 1);
ret = tsk_table_collection_union(
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
// test out of bounds node_mapping
node_mapping[0] = -4;
node_mapping[1] = 6;
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_BAD_MAP);
// check integrity
node_mapping[0] = 0;
node_mapping[1] = 1;
ret_id = tsk_node_table_add_row(
&tables_copy.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_union(&tables, &tables_copy, node_mapping, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_table_collection_free(&tables_copy);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_clear_with_options(tsk_flags_t options)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
bool clear_provenance = !!(options & TSK_CLEAR_PROVENANCE);
bool clear_metadata_schemas = !!(options & TSK_CLEAR_METADATA_SCHEMAS);
bool clear_ts_metadata = !!(options & TSK_CLEAR_TS_METADATA_AND_SCHEMA);
tsk_bookmark_t num_rows;
tsk_bookmark_t expected_rows = { .provenances = clear_provenance ? 0 : 1 };
tsk_size_t expected_len = clear_metadata_schemas ? 0 : 4;
tsk_size_t expected_len_ts = clear_ts_metadata ? 0 : 4;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id
= tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_individual_table_set_metadata_schema(&tables.individuals, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_node_table_set_metadata_schema(&tables.nodes, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_edge_table_set_metadata_schema(&tables.edges, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_migration_table_set_metadata_schema(&tables.migrations, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_site_table_set_metadata_schema(&tables.sites, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_mutation_table_set_metadata_schema(&tables.mutations, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_population_table_set_metadata_schema(&tables.populations, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_time_units(&tables, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(&tables, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata_schema(&tables, "test", 4);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_provenance_table_add_row(&tables.provenances, "today", 5, "test", 4);
CU_ASSERT_FATAL(ret_id >= 0);
ret = tsk_table_collection_clear(&tables, options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_record_num_rows(&tables, &num_rows);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(num_rows.individuals, expected_rows.individuals);
CU_ASSERT_EQUAL(num_rows.nodes, expected_rows.nodes);
CU_ASSERT_EQUAL(num_rows.edges, expected_rows.edges);
CU_ASSERT_EQUAL(num_rows.migrations, expected_rows.migrations);
CU_ASSERT_EQUAL(num_rows.sites, expected_rows.sites);
CU_ASSERT_EQUAL(num_rows.mutations, expected_rows.mutations);
CU_ASSERT_EQUAL(num_rows.populations, expected_rows.populations);
CU_ASSERT_EQUAL(num_rows.provenances, expected_rows.provenances);
CU_ASSERT_FALSE(tsk_table_collection_has_index(&tables, 0));
CU_ASSERT_EQUAL(tables.individuals.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.nodes.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.edges.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.migrations.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.sites.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.mutations.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.populations.metadata_schema_length, expected_len);
CU_ASSERT_EQUAL(tables.metadata_schema_length, expected_len_ts);
CU_ASSERT_EQUAL(tables.metadata_length, expected_len_ts);
CU_ASSERT_EQUAL(tables.time_units_length, 4);
tsk_table_collection_free(&tables);
}
static void
test_table_collection_clear(void)
{
test_table_collection_clear_with_options(0);
test_table_collection_clear_with_options(TSK_CLEAR_PROVENANCE);
test_table_collection_clear_with_options(TSK_CLEAR_METADATA_SCHEMAS);
test_table_collection_clear_with_options(TSK_CLEAR_TS_METADATA_AND_SCHEMA);
test_table_collection_clear_with_options(
TSK_CLEAR_PROVENANCE | TSK_CLEAR_METADATA_SCHEMAS);
test_table_collection_clear_with_options(
TSK_CLEAR_PROVENANCE | TSK_CLEAR_TS_METADATA_AND_SCHEMA);
test_table_collection_clear_with_options(
TSK_CLEAR_METADATA_SCHEMAS | TSK_CLEAR_TS_METADATA_AND_SCHEMA);
test_table_collection_clear_with_options(TSK_CLEAR_PROVENANCE
| TSK_CLEAR_METADATA_SCHEMAS
| TSK_CLEAR_TS_METADATA_AND_SCHEMA);
}
static void
test_table_collection_takeset_indexes(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t1, t2;
tsk_id_t *ins;
tsk_id_t *rem;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ins = tsk_malloc(t1.edges.num_rows * sizeof(*ins));
CU_ASSERT_FATAL(ins != NULL);
rem = tsk_malloc(t1.edges.num_rows * sizeof(*rem));
CU_ASSERT_FATAL(rem != NULL);
memcpy(ins, t1.indexes.edge_insertion_order,
(size_t) (t1.edges.num_rows * sizeof(*ins)));
memcpy(
rem, t1.indexes.edge_removal_order, (size_t) (t1.edges.num_rows * sizeof(*rem)));
ret = tsk_table_collection_copy(&t1, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_drop_index(&t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_takeset_indexes(&t2, ins, rem);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(
tsk_memcmp(t1.indexes.edge_insertion_order, t2.indexes.edge_insertion_order,
t1.edges.num_rows * sizeof(*ins)),
0);
CU_ASSERT_EQUAL(tsk_memcmp(t1.indexes.edge_removal_order,
t2.indexes.edge_removal_order, t1.edges.num_rows * sizeof(*rem)),
0);
ret = tsk_table_collection_takeset_indexes(&t2, ins, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_table_collection_takeset_indexes(&t2, NULL, rem);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
tsk_treeseq_free(&ts);
}
static void
test_table_collection_delete_older(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t;
const char *mutations = "0 2 1 -1\n"
"0 2 0 0\n"
"1 0 1 -1\n"
"2 5 1 -1\n";
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
/* Add some migrations */
tsk_population_table_add_row(&t.populations, NULL, 0);
tsk_population_table_add_row(&t.populations, NULL, 0);
tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 0, 1, 0.05, NULL, 0);
tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 1, 0, 0.09, NULL, 0);
tsk_migration_table_add_row(&t.migrations, 0, 10, 0, 0, 1, 0.10, NULL, 0);
CU_ASSERT_EQUAL(t.migrations.num_rows, 3);
/* Note: trees 1 and 2 are identical now
*
0.09┊ 5 ┊ 5 ┊ 5 ┊
┊ ┏┻┓ ┊ ┏━┻┓ ┊ ┏━┻┓ ┊
0.07┊ ┃ ┃ ┊ ┃ 4 ┊ ┃ 4 ┊
┊ ┃ ┃ ┊ ┃ ┏┻┓ ┊ ┃ ┏┻┓ ┊
0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
0.00 2.00 7.00 10.00
*/
ret = tsk_table_collection_delete_older(&t, 0.09, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &t, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 9);
/* Lost the mutation over 5 */
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);
/* We delete the migration at exactly 0.09. */
CU_ASSERT_EQUAL(tsk_treeseq_get_num_migrations(&ts), 1);
tsk_table_collection_free(&t);
tsk_treeseq_free(&ts);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
{ "test_node_table", test_node_table },
{ "test_node_table_update_row", test_node_table_update_row },
{ "test_node_table_keep_rows", test_node_table_keep_rows },
{ "test_node_table_takeset", test_node_table_takeset },
{ "test_edge_table", test_edge_table },
{ "test_edge_table_update_row", test_edge_table_update_row },
{ "test_edge_table_update_row_no_metadata",
test_edge_table_update_row_no_metadata },
{ "test_edge_table_keep_rows", test_edge_table_keep_rows },
{ "test_edge_table_keep_rows_no_metadata",
test_edge_table_keep_rows_no_metadata },
{ "test_edge_table_takeset", test_edge_table_takeset },
{ "test_edge_table_copy_semantics", test_edge_table_copy_semantics },
{ "test_edge_table_squash", test_edge_table_squash },
{ "test_edge_table_squash_multiple_parents",
test_edge_table_squash_multiple_parents },
{ "test_edge_table_squash_empty", test_edge_table_squash_empty },
{ "test_edge_table_squash_single_edge", test_edge_table_squash_single_edge },
{ "test_edge_table_squash_bad_intervals", test_edge_table_squash_bad_intervals },
{ "test_edge_table_squash_metadata", test_edge_table_squash_metadata },
{ "test_site_table", test_site_table },
{ "test_site_table_update_row", test_site_table_update_row },
{ "test_site_table_keep_rows", test_site_table_keep_rows },
{ "test_site_table_takeset", test_site_table_takeset },
{ "test_mutation_table", test_mutation_table },
{ "test_mutation_table_update_row", test_mutation_table_update_row },
{ "test_mutation_table_takeset", test_mutation_table_takeset },
{ "test_mutation_table_keep_rows", test_mutation_table_keep_rows },
{ "test_mutation_table_keep_rows_parent_references",
test_mutation_table_keep_rows_parent_references },
{ "test_migration_table", test_migration_table },
{ "test_migration_table_update_row", test_migration_table_update_row },
{ "test_migration_table_keep_rows", test_migration_table_keep_rows },
{ "test_migration_table_takeset", test_migration_table_takeset },
{ "test_individual_table", test_individual_table },
{ "test_individual_table_takeset", test_individual_table_takeset },
{ "test_individual_table_update_row", test_individual_table_update_row },
{ "test_individual_table_keep_rows", test_individual_table_keep_rows },
{ "test_individual_table_keep_rows_parent_references",
test_individual_table_keep_rows_parent_references },
{ "test_population_table", test_population_table },
{ "test_population_table_update_row", test_population_table_update_row },
{ "test_population_table_keep_rows", test_population_table_keep_rows },
{ "test_population_table_takeset", test_population_table_takeset },
{ "test_provenance_table", test_provenance_table },
{ "test_provenance_table_update_row", test_provenance_table_update_row },
{ "test_provenance_table_keep_rows", test_provenance_table_keep_rows },
{ "test_provenance_table_takeset", test_provenance_table_takeset },
{ "test_table_size_increments", test_table_size_increments },
{ "test_table_expansion", test_table_expansion },
{ "test_ragged_expansion", test_ragged_expansion },
{ "test_table_collection_equals_options", test_table_collection_equals_options },
{ "test_table_collection_simplify_errors",
test_table_collection_simplify_errors },
{ "test_table_collection_time_units", test_table_collection_time_units },
{ "test_table_collection_reference_sequence",
test_table_collection_reference_sequence },
{ "test_table_collection_has_reference_sequence",
test_table_collection_has_reference_sequence },
{ "test_table_collection_metadata", test_table_collection_metadata },
{ "test_reference_sequence_state_machine",
test_reference_sequence_state_machine },
{ "test_reference_sequence_take", test_reference_sequence_take },
{ "test_reference_sequence", test_reference_sequence },
{ "test_simplify_tables_drops_indexes", test_simplify_tables_drops_indexes },
{ "test_simplify_empty_tables", test_simplify_empty_tables },
{ "test_simplify_metadata", test_simplify_metadata },
{ "test_link_ancestors_no_edges", test_link_ancestors_no_edges },
{ "test_link_ancestors_input_errors", test_link_ancestors_input_errors },
{ "test_link_ancestors_single_tree", test_link_ancestors_single_tree },
{ "test_link_ancestors_paper", test_link_ancestors_paper },
{ "test_link_ancestors_samples_and_ancestors_overlap",
test_link_ancestors_samples_and_ancestors_overlap },
{ "test_link_ancestors_multiple_to_single_tree",
test_link_ancestors_multiple_to_single_tree },
{ "test_ibd_segments_debug", test_ibd_segments_debug },
{ "test_ibd_segments_caterpillar_tree", test_ibd_segments_caterpillar_tree },
{ "test_ibd_segments_single_tree", test_ibd_segments_single_tree },
{ "test_ibd_segments_single_tree_options",
test_ibd_segments_single_tree_options },
{ "test_ibd_segments_multiple_trees", test_ibd_segments_multiple_trees },
{ "test_ibd_segments_empty_result", test_ibd_segments_empty_result },
{ "test_ibd_segments_min_span_max_time", test_ibd_segments_min_span_max_time },
{ "test_ibd_segments_single_tree_between",
test_ibd_segments_single_tree_between },
{ "test_ibd_segments_samples_are_descendants",
test_ibd_segments_samples_are_descendants },
{ "test_ibd_segments_multiple_ibd_paths", test_ibd_segments_multiple_ibd_paths },
{ "test_ibd_segments_odd_topologies", test_ibd_segments_odd_topologies },
{ "test_ibd_segments_errors", test_ibd_segments_errors },
{ "test_sorter_interface", test_sorter_interface },
{ "test_sort_tables_canonical_errors", test_sort_tables_canonical_errors },
{ "test_sort_tables_canonical", test_sort_tables_canonical },
{ "test_sort_tables_drops_indexes", test_sort_tables_drops_indexes },
{ "test_sort_tables_edge_metadata", test_sort_tables_edge_metadata },
{ "test_sort_tables_errors", test_sort_tables_errors },
{ "test_sort_tables_individuals", test_sort_tables_individuals },
{ "test_sort_tables_mutation_times", test_sort_tables_mutation_times },
{ "test_sort_tables_mutations", test_sort_tables_mutations },
{ "test_sort_tables_migrations", test_sort_tables_migrations },
{ "test_sort_tables_no_edge_metadata", test_sort_tables_no_edge_metadata },
{ "test_sort_tables_offsets", test_sort_tables_offsets },
{ "test_edge_update_invalidates_index", test_edge_update_invalidates_index },
{ "test_copy_table_collection", test_copy_table_collection },
{ "test_dump_unindexed", test_dump_unindexed },
{ "test_dump_load_empty", test_dump_load_empty },
{ "test_dump_load_unsorted", test_dump_load_unsorted },
{ "test_dump_load_metadata_schema", test_dump_load_metadata_schema },
{ "test_dump_fail_no_file", test_dump_fail_no_file },
{ "test_load_reindex", test_load_reindex },
{ "test_table_overflow", test_table_overflow },
{ "test_column_overflow", test_column_overflow },
{ "test_table_collection_check_integrity",
test_table_collection_check_integrity },
{ "test_table_collection_check_integrity_no_populations",
test_table_collection_check_integrity_no_populations },
{ "test_table_collection_check_integrity_bad_indexes_example",
test_table_collection_check_integrity_bad_indexes_example },
{ "test_table_collection_check_integrity_bad_indexes",
test_table_collection_check_integrity_bad_indexes },
{ "test_check_integrity_bad_mutation_parent_topology",
test_check_integrity_bad_mutation_parent_topology },
{ "test_table_collection_compute_mutation_parents_tolerates_invalid_input",
test_table_collection_compute_mutation_parents_tolerates_invalid_input },
{ "test_table_collection_compute_mutation_parents_restores_on_error",
test_table_collection_compute_mutation_parents_restores_on_error },
{ "test_table_collection_subset", test_table_collection_subset },
{ "test_table_collection_subset_unsorted",
test_table_collection_subset_unsorted },
{ "test_table_collection_subset_errors", test_table_collection_subset_errors },
{ "test_table_collection_union", test_table_collection_union },
{ "test_table_collection_disjoint_union", test_table_collection_disjoint_union },
{ "test_table_collection_union_middle_merge",
test_table_collection_union_middle_merge },
{ "test_table_collection_union_errors", test_table_collection_union_errors },
{ "test_table_collection_clear", test_table_collection_clear },
{ "test_table_collection_takeset_indexes",
test_table_collection_takeset_indexes },
{ "test_table_collection_delete_older", test_table_collection_delete_older },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/test_trees.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
#include
#include
#include
#include
/*=======================================================
* Verification utilities.
*======================================================*/
/* Checks if the specified trees are topologically equivalent, i.e, represent
* the same tree without checking state specific to seeking.*/
static void
check_trees_equal(tsk_tree_t *self, tsk_tree_t *other)
{
tsk_size_t N = self->num_nodes;
CU_ASSERT_FATAL(self->tree_sequence == other->tree_sequence);
CU_ASSERT_FATAL(self->index == other->index);
CU_ASSERT_FATAL(self->interval.left == other->interval.left);
CU_ASSERT_FATAL(self->interval.right == other->interval.right);
CU_ASSERT_FATAL(self->sites_length == other->sites_length);
CU_ASSERT_FATAL(self->sites == other->sites);
CU_ASSERT_FATAL(self->samples == other->samples);
CU_ASSERT_FATAL(self->num_edges == other->num_edges);
CU_ASSERT_FATAL(tsk_memcmp(self->parent, other->parent, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(tsk_tree_equals(self, other));
}
static void
check_trees_identical(tsk_tree_t *self, tsk_tree_t *other)
{
tsk_size_t N = self->num_nodes;
check_trees_equal(self, other);
CU_ASSERT_FATAL(self->left_index == other->left_index);
CU_ASSERT_FATAL(self->right_index == other->right_index);
CU_ASSERT_FATAL(self->direction == other->direction);
CU_ASSERT_FATAL(
tsk_memcmp(self->left_child, other->left_child, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(
tsk_memcmp(self->right_child, other->right_child, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(
tsk_memcmp(self->left_sib, other->left_sib, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(
tsk_memcmp(self->right_sib, other->right_sib, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(
tsk_memcmp(self->num_children, other->num_children, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_FATAL(tsk_memcmp(self->edge, other->edge, N * sizeof(tsk_id_t)) == 0);
CU_ASSERT_EQUAL_FATAL(self->num_samples == NULL, other->num_samples == NULL)
CU_ASSERT_EQUAL_FATAL(
self->num_tracked_samples == NULL, other->num_tracked_samples == NULL)
if (self->num_samples != NULL) {
CU_ASSERT_FATAL(tsk_memcmp(self->num_samples, other->num_samples,
N * sizeof(*self->num_samples))
== 0);
CU_ASSERT_FATAL(tsk_memcmp(self->num_tracked_samples, other->num_tracked_samples,
N * sizeof(*self->num_tracked_samples))
== 0);
}
CU_ASSERT_EQUAL_FATAL(self->left_sample == NULL, other->left_sample == NULL)
CU_ASSERT_EQUAL_FATAL(self->right_sample == NULL, other->left_sample == NULL)
CU_ASSERT_EQUAL_FATAL(self->next_sample == NULL, other->next_sample == NULL)
if (self->left_sample != NULL) {
CU_ASSERT_FATAL(tsk_memcmp(self->left_sample, other->left_sample,
N * sizeof(*self->left_sample))
== 0);
CU_ASSERT_FATAL(tsk_memcmp(self->right_sample, other->right_sample,
N * sizeof(*self->right_sample))
== 0);
CU_ASSERT_FATAL(
tsk_memcmp(self->next_sample, other->next_sample,
self->tree_sequence->num_samples * sizeof(*self->next_sample))
== 0);
}
}
static void
verify_compute_mutation_parents(tsk_treeseq_t *ts)
{
int ret;
tsk_size_t size = tsk_treeseq_get_num_mutations(ts) * sizeof(tsk_id_t);
tsk_id_t *parent = tsk_malloc(size);
tsk_table_collection_t tables;
CU_ASSERT_FATAL(parent != NULL);
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memcpy(parent, tables.mutations.parent, size);
/* tsk_table_collection_print_state(&tables, stdout); */
/* Make sure the tables are actually updated */
tsk_memset(tables.mutations.parent, 0xff, size);
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tsk_memcmp(parent, tables.mutations.parent, size), 0);
/* printf("after\n"); */
/* tsk_table_collection_print_state(&tables, stdout); */
free(parent);
tsk_table_collection_free(&tables);
}
static void
verify_compute_mutation_times(tsk_treeseq_t *ts)
{
int ret;
tsk_size_t j;
tsk_size_t size = tsk_treeseq_get_num_mutations(ts) * sizeof(tsk_id_t);
tsk_id_t *time = tsk_malloc(size);
tsk_table_collection_t tables;
CU_ASSERT_FATAL(time != NULL);
ret = tsk_treeseq_copy_tables(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memcpy(time, tables.mutations.time, size);
/* Time should be set to TSK_UNKNOWN_TIME before computing */
for (j = 0; j < size; j++) {
tables.mutations.time[j] = TSK_UNKNOWN_TIME;
}
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tsk_memcmp(time, tables.mutations.time, size), 0);
free(time);
tsk_table_collection_free(&tables);
}
static void
verify_individual_nodes(tsk_treeseq_t *ts)
{
int ret;
tsk_individual_t individual;
tsk_id_t k;
tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts);
tsk_size_t num_individuals = tsk_treeseq_get_num_individuals(ts);
tsk_size_t j;
for (k = 0; k < (tsk_id_t) num_individuals; k++) {
ret = tsk_treeseq_get_individual(ts, k, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < individual.nodes_length; j++) {
CU_ASSERT_FATAL(individual.nodes[j] < (tsk_id_t) num_nodes);
CU_ASSERT_EQUAL_FATAL(k, ts->tables->nodes.individual[individual.nodes[j]]);
}
}
}
static void
verify_tree_pos(const tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *tree_parents)
{
int ret;
const tsk_size_t N = tsk_treeseq_get_num_nodes(ts);
const tsk_id_t *edges_parent = ts->tables->edges.parent;
const tsk_id_t *edges_child = ts->tables->edges.child;
const double *restrict edges_left = ts->tables->edges.left;
const double *restrict edges_right = ts->tables->edges.right;
tsk_tree_position_t tree_pos;
tsk_id_t *known_parent;
tsk_id_t *parent = tsk_malloc(N * sizeof(*parent));
tsk_id_t u, index, j, e;
bool valid;
CU_ASSERT_FATAL(parent != NULL);
ret = tsk_tree_position_init(&tree_pos, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (u = 0; u < (tsk_id_t) N; u++) {
parent[u] = TSK_NULL;
}
for (index = 0; index < (tsk_id_t) num_trees; index++) {
known_parent = tree_parents + N * (tsk_size_t) index;
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_TRUE(valid);
CU_ASSERT_EQUAL(index, tree_pos.index);
for (j = tree_pos.out.start; j < tree_pos.out.stop; j++) {
e = tree_pos.out.order[j];
parent[edges_child[e]] = TSK_NULL;
}
for (j = tree_pos.in.start; j < tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
parent[edges_child[e]] = edges_parent[e];
}
for (u = 0; u < (tsk_id_t) N; u++) {
CU_ASSERT_EQUAL(parent[u], known_parent[u]);
}
}
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FALSE(valid);
for (j = tree_pos.out.start; j < tree_pos.out.stop; j++) {
e = tree_pos.out.order[j];
parent[edges_child[e]] = TSK_NULL;
}
for (u = 0; u < (tsk_id_t) N; u++) {
CU_ASSERT_EQUAL(parent[u], TSK_NULL);
}
for (index = (tsk_id_t) num_trees - 1; index >= 0; index--) {
known_parent = tree_parents + N * (tsk_size_t) index;
valid = tsk_tree_position_prev(&tree_pos);
CU_ASSERT_TRUE(valid);
CU_ASSERT_EQUAL(index, tree_pos.index);
for (j = tree_pos.out.start; j > tree_pos.out.stop; j--) {
e = tree_pos.out.order[j];
parent[edges_child[e]] = TSK_NULL;
}
for (j = tree_pos.in.start; j > tree_pos.in.stop; j--) {
CU_ASSERT_FATAL(j >= 0);
e = tree_pos.in.order[j];
parent[edges_child[e]] = edges_parent[e];
}
for (u = 0; u < (tsk_id_t) N; u++) {
CU_ASSERT_EQUAL(parent[u], known_parent[u]);
}
}
valid = tsk_tree_position_prev(&tree_pos);
CU_ASSERT_FALSE(valid);
for (j = tree_pos.out.start; j > tree_pos.out.stop; j--) {
e = tree_pos.out.order[j];
parent[edges_child[e]] = TSK_NULL;
}
for (u = 0; u < (tsk_id_t) N; u++) {
CU_ASSERT_EQUAL(parent[u], TSK_NULL);
}
for (index = 0; index < (tsk_id_t) num_trees; index++) {
known_parent = tree_parents + N * (tsk_size_t) index;
ret = tsk_tree_position_init(&tree_pos, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_position_seek_forward(&tree_pos, index);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(index, tree_pos.index);
for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
if (edges_left[e] <= tree_pos.interval.left
&& tree_pos.interval.left < edges_right[e]) {
parent[edges_child[e]] = edges_parent[e];
}
}
for (u = 0; u < (tsk_id_t) N; u++) {
CU_ASSERT_EQUAL(parent[u], known_parent[u]);
}
tsk_tree_position_free(&tree_pos);
for (u = 0; u < (tsk_id_t) N; u++) {
parent[u] = TSK_NULL;
}
}
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FALSE(valid);
for (index = (tsk_id_t) num_trees - 1; index >= 0; index--) {
known_parent = tree_parents + N * (tsk_size_t) index;
ret = tsk_tree_position_init(&tree_pos, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_position_seek_backward(&tree_pos, index);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(index, tree_pos.index);
for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {
e = tree_pos.in.order[j];
if (edges_right[e] >= tree_pos.interval.right
&& tree_pos.interval.right > edges_left[e]) {
parent[edges_child[e]] = edges_parent[e];
}
}
for (u = 0; u < (tsk_id_t) N; u++) {
CU_ASSERT_EQUAL(parent[u], known_parent[u]);
}
for (u = 0; u < (tsk_id_t) N; u++) {
parent[u] = TSK_NULL;
}
tsk_tree_position_free(&tree_pos);
}
tsk_safe_free(parent);
}
static void
verify_trees(tsk_treeseq_t *ts, tsk_size_t num_trees, tsk_id_t *parents)
{
int ret;
tsk_id_t u, j, v;
uint32_t mutation_index, site_index;
tsk_size_t k, l, tree_sites_length;
const tsk_site_t *sites = NULL;
tsk_tree_t tree, skip_tree;
tsk_size_t num_edges;
tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts);
tsk_size_t num_sites = tsk_treeseq_get_num_sites(ts);
tsk_size_t num_mutations = tsk_treeseq_get_num_mutations(ts);
const double *breakpoints = tsk_treeseq_get_breakpoints(ts);
ret = tsk_tree_init(&tree, ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_init(&skip_tree, ts, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(ts), num_trees);
CU_ASSERT_EQUAL(tree.index, -1);
site_index = 0;
mutation_index = 0;
j = 0;
for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {
CU_ASSERT_EQUAL(j, (tsk_id_t) tree.index);
tsk_tree_print_state(&tree, _devnull);
/* tsk_tree_print_state(&tree, stdout); */
CU_ASSERT_EQUAL(tree.interval.left, breakpoints[j]);
num_edges = 0;
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = tsk_tree_get_parent(&tree, u, &v);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(v, parents[j * (tsk_id_t) num_nodes + u]);
if (v != TSK_NULL) {
num_edges++;
}
}
CU_ASSERT_EQUAL(num_edges, tree.num_edges);
ret = tsk_tree_get_sites(&tree, &sites, &tree_sites_length);
CU_ASSERT_EQUAL(ret, 0);
for (k = 0; k < tree_sites_length; k++) {
CU_ASSERT_EQUAL(sites[k].id, (tsk_id_t) site_index);
for (l = 0; l < sites[k].mutations_length; l++) {
CU_ASSERT_EQUAL(sites[k].mutations[l].id, (tsk_id_t) mutation_index);
CU_ASSERT_EQUAL(sites[k].mutations[l].site, (tsk_id_t) site_index);
mutation_index++;
}
site_index++;
}
/* Check the skip tree */
ret = tsk_tree_first(&skip_tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
ret = tsk_tree_seek(&skip_tree, breakpoints[j], TSK_SEEK_SKIP);
CU_ASSERT_EQUAL(ret, 0);
/* Calling print_state here also verifies the integrity of the tree */
tsk_tree_print_state(&skip_tree, _devnull);
check_trees_equal(&tree, &skip_tree);
ret = tsk_tree_last(&skip_tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
ret = tsk_tree_seek(&skip_tree, breakpoints[j], TSK_SEEK_SKIP);
CU_ASSERT_EQUAL(ret, 0);
tsk_tree_print_state(&skip_tree, _devnull);
check_trees_equal(&tree, &skip_tree);
j++;
}
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(site_index, num_sites);
CU_ASSERT_EQUAL(mutation_index, num_mutations);
CU_ASSERT_EQUAL(tree.index, -1);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(ts), breakpoints[j]);
tsk_tree_free(&tree);
tsk_tree_free(&skip_tree);
verify_tree_pos(ts, num_trees, parents);
}
static tsk_tree_t *
get_tree_list(tsk_treeseq_t *ts)
{
int ret;
tsk_tree_t t, *trees;
tsk_size_t num_trees;
num_trees = tsk_treeseq_get_num_trees(ts);
ret = tsk_tree_init(&t, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
trees = tsk_malloc(num_trees * sizeof(tsk_tree_t));
CU_ASSERT_FATAL(trees != NULL);
for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {
CU_ASSERT_FATAL(t.index < (tsk_id_t) num_trees);
ret = tsk_tree_copy(&t, &trees[t.index], 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
check_trees_equal(&trees[t.index], &t);
/* Make sure the left and right coordinates are also OK */
CU_ASSERT_EQUAL(trees[t.index].interval.left, t.interval.left);
CU_ASSERT_EQUAL(trees[t.index].interval.right, t.interval.right);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_free(&t);
CU_ASSERT_EQUAL_FATAL(ret, 0);
return trees;
}
static void
verify_tree_next_prev(tsk_treeseq_t *ts)
{
int ret;
tsk_tree_t *trees, t;
tsk_id_t j;
tsk_id_t num_trees = (tsk_id_t) tsk_treeseq_get_num_trees(ts);
trees = get_tree_list(ts);
ret = tsk_tree_init(&t, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Single forward pass */
j = 0;
for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {
CU_ASSERT_EQUAL_FATAL(j, t.index);
check_trees_equal(&t, &trees[t.index]);
j++;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, num_trees);
/* Single reverse pass */
j = num_trees;
for (ret = tsk_tree_last(&t); ret == TSK_TREE_OK; ret = tsk_tree_prev(&t)) {
CU_ASSERT_EQUAL_FATAL(j - 1, t.index);
check_trees_equal(&t, &trees[t.index]);
j--;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, 0);
/* Full forward, then reverse */
j = 0;
for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {
CU_ASSERT_EQUAL_FATAL(j, t.index);
check_trees_equal(&t, &trees[t.index]);
j++;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, num_trees);
while ((ret = tsk_tree_prev(&t)) == TSK_TREE_OK) {
CU_ASSERT_EQUAL_FATAL(j - 1, t.index);
check_trees_equal(&t, &trees[t.index]);
j--;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, 0);
CU_ASSERT_EQUAL_FATAL(t.index, -1);
/* Full reverse then forward */
j = num_trees;
for (ret = tsk_tree_last(&t); ret == TSK_TREE_OK; ret = tsk_tree_prev(&t)) {
CU_ASSERT_EQUAL_FATAL(j - 1, t.index);
check_trees_equal(&t, &trees[t.index]);
j--;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, 0);
while ((ret = tsk_tree_next(&t)) == TSK_TREE_OK) {
CU_ASSERT_EQUAL_FATAL(j, t.index);
check_trees_equal(&t, &trees[t.index]);
j++;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, num_trees);
CU_ASSERT_EQUAL_FATAL(t.index, -1);
/* Do a zigzagging traversal */
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
for (j = 1; j < TSK_MIN(10, num_trees / 2); j++) {
while (t.index < num_trees - j) {
ret = tsk_tree_next(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
}
CU_ASSERT_EQUAL_FATAL(t.index, num_trees - j);
check_trees_equal(&t, &trees[t.index]);
while (t.index > j) {
ret = tsk_tree_prev(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
}
CU_ASSERT_EQUAL_FATAL(t.index, j);
check_trees_equal(&t, &trees[t.index]);
}
ret = tsk_tree_clear(&t);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Calling next() on a cleared tree should be the same as first() */
j = 0;
while ((ret = tsk_tree_next(&t)) == TSK_TREE_OK) {
CU_ASSERT_EQUAL_FATAL(j, t.index);
check_trees_equal(&t, &trees[t.index]);
j++;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, num_trees);
ret = tsk_tree_free(&t);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_init(&t, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Calling prev() on an uninitialised tree should be the same as last() */
j = num_trees;
while ((ret = tsk_tree_prev(&t)) == TSK_TREE_OK) {
CU_ASSERT_EQUAL_FATAL(j - 1, t.index);
check_trees_equal(&t, &trees[t.index]);
j--;
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(j, 0);
/* Free the trees. */
ret = tsk_tree_free(&t);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) tsk_treeseq_get_num_trees(ts); j++) {
tsk_tree_free(&trees[j]);
}
free(trees);
}
static void
verify_edge_array_single_tree(
tsk_tree_t *tree, tsk_edge_table_t *edge_table, tsk_size_t num_nodes)
{
int ret;
tsk_id_t c, edge_id;
tsk_edge_t edge;
tsk_size_t count_edges = 0;
for (c = 0; c <= (tsk_id_t) num_nodes; c++) {
edge_id = tree->edge[c];
if (edge_id == TSK_NULL) {
/*c is either (virtual) root,
or is not associated with an edge along this tree */
CU_ASSERT_EQUAL(tree->parent[c], TSK_NULL);
} else {
ret = tsk_edge_table_get_row(edge_table, edge_id, &edge);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(edge.id, edge_id);
CU_ASSERT_EQUAL(edge.parent, tree->parent[c]);
CU_ASSERT_EQUAL(edge.child, c);
count_edges++;
}
}
CU_ASSERT_EQUAL(count_edges, tree->num_edges);
}
static void
verify_edge_array_trees(tsk_treeseq_t *ts)
{
int ret;
tsk_tree_t t;
tsk_edge_table_t edge_table;
tsk_size_t num_nodes;
tsk_id_t c;
num_nodes = tsk_treeseq_get_num_nodes(ts);
edge_table = ts->tables->edges;
ret = tsk_tree_init(&t, ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* verify initialized edge array */
for (c = 0; c <= (tsk_id_t) num_nodes; c++) {
CU_ASSERT_EQUAL(t.edge[c], TSK_NULL)
}
/* verify edge array for each tree in treesequence */
for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {
verify_edge_array_single_tree(&t, &edge_table, num_nodes);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* verify cleared edge array */
for (c = 0; c <= (tsk_id_t) num_nodes; c++) {
CU_ASSERT_EQUAL(t.edge[c], TSK_NULL)
}
tsk_tree_free(&t);
}
/* When we keep all sites in simplify, the genotypes for the subset of the
* samples should be the same as the original */
static void
verify_simplify_genotypes(tsk_treeseq_t *ts, tsk_treeseq_t *subset,
const tsk_id_t *samples, tsk_size_t num_samples)
{
int ret;
tsk_size_t m = tsk_treeseq_get_num_sites(ts);
tsk_vargen_t vargen, subset_vargen;
tsk_variant_t *variant, *subset_variant;
tsk_size_t j, k;
int32_t a1, a2;
const tsk_id_t *sample_index_map;
sample_index_map = tsk_treeseq_get_sample_index_map(ts);
/* tsk_treeseq_print_state(ts, stdout); */
/* tsk_treeseq_print_state(subset, stdout); */
ret = tsk_vargen_init(&vargen, ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_init(
&subset_vargen, subset, NULL, 0, NULL, TSK_ISOLATED_NOT_MISSING);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(m, tsk_treeseq_get_num_sites(subset));
for (j = 0; j < m; j++) {
ret = tsk_vargen_next(&vargen, &variant);
CU_ASSERT_EQUAL_FATAL(ret, 1);
ret = tsk_vargen_next(&subset_vargen, &subset_variant);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(variant->site.id, (tsk_id_t) j)
CU_ASSERT_EQUAL(subset_variant->site.id, (tsk_id_t) j)
CU_ASSERT_EQUAL(variant->site.position, subset_variant->site.position);
for (k = 0; k < num_samples; k++) {
CU_ASSERT_FATAL(sample_index_map[samples[k]] < (tsk_id_t) ts->num_samples);
a1 = variant->genotypes[sample_index_map[samples[k]]];
a2 = subset_variant->genotypes[k];
/* printf("a1 = %d, a2 = %d\n", a1, a2); */
/* printf("k = %d original node = %d " */
/* "original_index = %d a1=%.*s a2=%.*s\n", */
/* (int) k, samples[k], sample_index_map[samples[k]], */
/* variant->allele_lengths[a1], variant->alleles[a1], */
/* subset_variant->allele_lengths[a2], subset_variant->alleles[a2]);
*/
CU_ASSERT_FATAL(a1 < (int) variant->num_alleles);
CU_ASSERT_FATAL(a2 < (int) subset_variant->num_alleles);
CU_ASSERT_EQUAL_FATAL(
variant->allele_lengths[a1], subset_variant->allele_lengths[a2]);
CU_ASSERT_NSTRING_EQUAL_FATAL(variant->alleles[a1],
subset_variant->alleles[a2], variant->allele_lengths[a1]);
}
}
tsk_vargen_free(&vargen);
tsk_vargen_free(&subset_vargen);
}
static void
verify_simplify_properties(tsk_treeseq_t *ts, tsk_treeseq_t *subset,
const tsk_id_t *samples, tsk_size_t num_samples, tsk_id_t *node_map)
{
int ret;
tsk_node_t n1, n2;
tsk_tree_t full_tree, subset_tree;
const tsk_site_t *tree_sites;
tsk_size_t tree_sites_length;
uint32_t j, k;
tsk_id_t u, mrca1, mrca2;
tsk_size_t total_sites;
CU_ASSERT_EQUAL(
tsk_treeseq_get_sequence_length(ts), tsk_treeseq_get_sequence_length(subset));
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(subset), num_samples);
CU_ASSERT(tsk_treeseq_get_num_nodes(ts) >= tsk_treeseq_get_num_nodes(subset));
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(subset), num_samples);
/* Check the sample properties */
for (j = 0; j < num_samples; j++) {
ret = tsk_treeseq_get_node(ts, samples[j], &n1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(node_map[samples[j]], (tsk_id_t) j);
ret = tsk_treeseq_get_node(subset, node_map[samples[j]], &n2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n1.population, n2.population);
CU_ASSERT_EQUAL_FATAL(n1.time, n2.time);
CU_ASSERT_EQUAL_FATAL(n1.flags, n2.flags);
CU_ASSERT_EQUAL_FATAL(n1.metadata_length, n2.metadata_length);
CU_ASSERT_NSTRING_EQUAL(n1.metadata, n2.metadata, n2.metadata_length);
}
/* Check that node mappings are correct */
for (j = 0; j < tsk_treeseq_get_num_nodes(ts); j++) {
ret = tsk_treeseq_get_node(ts, (tsk_id_t) j, &n1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
if (node_map[j] != TSK_NULL) {
ret = tsk_treeseq_get_node(subset, node_map[j], &n2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n1.population, n2.population);
CU_ASSERT_EQUAL_FATAL(n1.time, n2.time);
CU_ASSERT_EQUAL_FATAL(n1.flags, n2.flags);
CU_ASSERT_EQUAL_FATAL(n1.metadata_length, n2.metadata_length);
CU_ASSERT_NSTRING_EQUAL(n1.metadata, n2.metadata, n2.metadata_length);
}
}
if (num_samples == 0) {
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(subset), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(subset), 0);
} else if (num_samples == 1) {
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(subset), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(subset), 1);
}
/* Check the pairwise MRCAs */
ret = tsk_tree_init(&full_tree, ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_init(&subset_tree, subset, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&full_tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
ret = tsk_tree_first(&subset_tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
total_sites = 0;
while (1) {
while (full_tree.interval.right <= subset_tree.interval.right) {
for (j = 0; j < num_samples; j++) {
for (k = j + 1; k < num_samples; k++) {
ret = tsk_tree_get_mrca(&full_tree, samples[j], samples[k], &mrca1);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_mrca(&subset_tree, node_map[samples[j]],
node_map[samples[k]], &mrca2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
if (mrca1 == TSK_NULL) {
CU_ASSERT_EQUAL_FATAL(mrca2, TSK_NULL);
} else {
CU_ASSERT_EQUAL(node_map[mrca1], mrca2);
}
}
}
ret = tsk_tree_next(&full_tree);
CU_ASSERT_FATAL(ret >= 0);
if (ret != 1) {
break;
}
}
/* Check the sites in this tree */
ret = tsk_tree_get_sites(&subset_tree, &tree_sites, &tree_sites_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < tree_sites_length; j++) {
CU_ASSERT(subset_tree.interval.left <= tree_sites[j].position);
CU_ASSERT(tree_sites[j].position < subset_tree.interval.right);
for (k = 0; k < tree_sites[j].mutations_length; k++) {
ret = tsk_tree_get_parent(
&subset_tree, tree_sites[j].mutations[k].node, &u);
CU_ASSERT_EQUAL(ret, 0);
}
total_sites++;
}
ret = tsk_tree_next(&subset_tree);
if (ret != 1) {
break;
}
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(subset), total_sites);
tsk_tree_free(&subset_tree);
tsk_tree_free(&full_tree);
}
static void
verify_simplify(tsk_treeseq_t *ts)
{
int ret;
tsk_size_t n = tsk_treeseq_get_num_samples(ts);
tsk_size_t num_samples[] = { 0, 1, 2, 3, n / 2, n - 1, n };
tsk_size_t j;
const tsk_id_t *sample;
tsk_id_t *node_map = tsk_malloc(tsk_treeseq_get_num_nodes(ts) * sizeof(tsk_id_t));
tsk_treeseq_t subset;
tsk_flags_t options = TSK_SIMPLIFY_FILTER_SITES;
CU_ASSERT_FATAL(node_map != NULL);
sample = tsk_treeseq_get_samples(ts);
if (tsk_treeseq_get_num_migrations(ts) > 0) {
ret = tsk_treeseq_simplify(ts, sample, 2, 0, &subset, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED);
/* Exiting early here because simplify isn't supported with migrations. */
goto out;
}
for (j = 0; j < sizeof(num_samples) / sizeof(*num_samples); j++) {
if (num_samples[j] <= n) {
ret = tsk_treeseq_simplify(
ts, sample, num_samples[j], options, &subset, node_map);
/* printf("ret = %s\n", tsk_strerror(ret)); */
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_simplify_properties(ts, &subset, sample, num_samples[j], node_map);
tsk_treeseq_free(&subset);
/* Keep all sites */
ret = tsk_treeseq_simplify(ts, sample, num_samples[j], 0, &subset, node_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_simplify_properties(ts, &subset, sample, num_samples[j], node_map);
verify_simplify_genotypes(ts, &subset, sample, num_samples[j]);
tsk_treeseq_free(&subset);
}
}
out:
free(node_map);
}
typedef struct {
tsk_id_t tree_index;
tsk_id_t node;
tsk_size_t count;
} sample_count_test_t;
static void
verify_sample_counts(tsk_treeseq_t *ts, tsk_size_t num_tests, sample_count_test_t *tests,
tsk_flags_t seek_options)
{
int ret;
tsk_size_t j, num_samples, n, k;
tsk_id_t stop, sample_index;
tsk_tree_t tree;
const tsk_id_t *samples;
n = tsk_treeseq_get_num_samples(ts);
samples = tsk_treeseq_get_samples(ts);
/* First run with the TSK_NO_SAMPLE_COUNTS feature */
ret = tsk_tree_init(&tree, ts, TSK_NO_SAMPLE_COUNTS);
CU_ASSERT_EQUAL(ret, 0);
for (j = 0; j < num_tests; j++) {
ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tests[j].count, num_samples);
/* all operations depending on tracked samples should fail. */
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);
/* The root should be NULL */
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);
}
tsk_tree_free(&tree);
/* Now run with TSK_SAMPLE_COUNTS but with no samples tracked. */
ret = tsk_tree_init(&tree, ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
for (j = 0; j < num_tests; j++) {
ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tests[j].count, num_samples);
/* all operations depending on tracked samples should fail. */
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 0);
/* The root should not be NULL */
CU_ASSERT_NOT_EQUAL(tree.virtual_root, TSK_NULL);
}
tsk_tree_free(&tree);
/* Run with TSK_SAMPLE_LISTS and TSK_NO_SAMPLE_COUNTS */
ret = tsk_tree_init(&tree, ts, TSK_SAMPLE_LISTS | TSK_NO_SAMPLE_COUNTS);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
for (j = 0; j < num_tests; j++) {
ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tests[j].count, num_samples);
/* all operations depending on tracked samples should fail. */
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &num_samples);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);
sample_index = tree.left_sample[tests[j].node];
k = 0;
if (sample_index != TSK_NULL) {
stop = tree.right_sample[tests[j].node];
while (true) {
k++;
CU_ASSERT_FATAL(k <= tests[j].count);
if (sample_index == stop) {
break;
}
sample_index = tree.next_sample[sample_index];
}
}
CU_ASSERT_EQUAL(tests[j].count, k);
}
tsk_tree_free(&tree);
/* Now use TSK_SAMPLE_LISTS */
ret = tsk_tree_init(&tree, ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_set_tracked_samples(&tree, n, samples);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
for (j = 0; j < num_tests; j++) {
ret = tsk_tree_seek_index(&tree, tests[j].tree_index, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_samples(&tree, tests[j].node, &num_samples);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tests[j].count, num_samples);
/* We're tracking all samples, so the count should be the same */
ret = tsk_tree_get_num_tracked_samples(&tree, tests[j].node, &num_samples);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tests[j].count, num_samples);
sample_index = tree.left_sample[tests[j].node];
k = 0;
if (sample_index != TSK_NULL) {
stop = tree.right_sample[tests[j].node];
while (true) {
k++;
if (sample_index == stop) {
break;
}
sample_index = tree.next_sample[sample_index];
}
}
CU_ASSERT_EQUAL(tests[j].count, k);
}
tsk_tree_free(&tree);
}
static void
verify_sample_sets_for_tree(tsk_tree_t *tree)
{
int ret, stack_top, j;
tsk_id_t u, v;
tsk_size_t tmp, n, num_nodes, num_samples;
tsk_id_t *stack, *samples;
const tsk_treeseq_t *ts = tree->tree_sequence;
tsk_id_t *sample_index_map = ts->sample_index_map;
const tsk_id_t *list_left = tree->left_sample;
const tsk_id_t *list_right = tree->right_sample;
const tsk_id_t *list_next = tree->next_sample;
tsk_id_t stop, sample_index;
n = tsk_treeseq_get_num_samples(ts);
num_nodes = tsk_treeseq_get_num_nodes(ts);
stack = tsk_malloc(n * sizeof(tsk_id_t));
samples = tsk_malloc(n * sizeof(tsk_id_t));
CU_ASSERT_FATAL(stack != NULL);
CU_ASSERT_FATAL(samples != NULL);
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
if (tree->left_child[u] == TSK_NULL && !tsk_treeseq_is_sample(ts, u)) {
CU_ASSERT_EQUAL(list_left[u], TSK_NULL);
CU_ASSERT_EQUAL(list_right[u], TSK_NULL);
} else {
stack_top = 0;
num_samples = 0;
stack[stack_top] = u;
while (stack_top >= 0) {
v = stack[stack_top];
stack_top--;
if (tsk_treeseq_is_sample(ts, v)) {
samples[num_samples] = v;
num_samples++;
}
for (v = tree->right_child[v]; v != TSK_NULL; v = tree->left_sib[v]) {
stack_top++;
stack[stack_top] = v;
}
}
ret = tsk_tree_get_num_samples(tree, u, &tmp);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_samples, tmp);
j = 0;
sample_index = list_left[u];
if (sample_index != TSK_NULL) {
stop = list_right[u];
while (true) {
CU_ASSERT_TRUE_FATAL(j < (tsk_id_t) n);
CU_ASSERT_EQUAL_FATAL(sample_index, sample_index_map[samples[j]]);
j++;
if (sample_index == stop) {
break;
}
sample_index = list_next[sample_index];
}
}
CU_ASSERT_EQUAL_FATAL(j, (int) num_samples);
}
}
free(stack);
free(samples);
}
static void
verify_sample_sets(tsk_treeseq_t *ts)
{
int ret;
tsk_tree_t t;
tsk_id_t j;
ret = tsk_tree_init(&t, ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL(ret, 0);
for (ret = tsk_tree_first(&t); ret == TSK_TREE_OK; ret = tsk_tree_next(&t)) {
verify_sample_sets_for_tree(&t);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (ret = tsk_tree_last(&t); ret == TSK_TREE_OK; ret = tsk_tree_prev(&t)) {
verify_sample_sets_for_tree(&t);
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < (tsk_id_t) tsk_treeseq_get_num_trees(ts); j++) {
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_seek_index(&t, j, TSK_SEEK_SKIP);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_sample_sets_for_tree(&t);
ret = tsk_tree_last(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_seek_index(&t, j, TSK_SEEK_SKIP);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_sample_sets_for_tree(&t);
}
tsk_tree_free(&t);
}
static void
verify_empty_tree_sequence(tsk_treeseq_t *ts, double sequence_length)
{
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_migrations(ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(ts), sequence_length);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(ts), 1);
}
/*=======================================================
* Simplest test cases.
*======================================================*/
static void
test_simplest_discrete_genome(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t ret_id;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
tables.sequence_length = 1.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sequence_length = 1;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.right[0] = 0.999;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;
tables.edges.left[0] = 0.999;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.left[0] = 0;
ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0.001;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0;
/* Need another population for a migration */
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id
= tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.left[0] = 0.001;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.left[0] = 0;
tables.migrations.right[0] = 0.999;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.right[0] = 1;
/* An empty tree sequence is has a discrete genome. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_discrete_time(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 0 0\n"
"0 0 0";
const char *edges = "0 1 2 0,1,3,4\n";
const char *sites = "0.1 0\n"
"0.2 0\n"
"0.3 0\n"
"0.4 0\n";
const char *mutations = "0 0 1\n"
"1 1 1\n"
"2 3 1\n"
"3 4 1";
const char *migrations = "0 1 0 0 1 1";
tsk_treeseq_from_text(
&ts, 1, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.nodes.time[0] = 0.0001;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.nodes.time[0] = 0;
tables.mutations.time[0] = 0.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.mutations.time[0] = 0;
tables.migrations.time[0] = 0.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.migrations.time[0] = 0;
tables.mutations.time[0] = TSK_UNKNOWN_TIME;
tables.mutations.time[1] = TSK_UNKNOWN_TIME;
tables.mutations.time[2] = TSK_UNKNOWN_TIME;
tables.mutations.time[3] = TSK_UNKNOWN_TIME;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
/* An empty tree sequence is has a discrete time. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_min_time(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
const char *nodes = "1 0.1 0 -1\n"
"1 0.1 0 -1\n"
"1 0.1 0 -1\n"
"0 1 0 -1\n"
"0 2 0 -1\n";
const char *edges = "0 2 3 0,1\n"
"0 2 4 2,3\n";
const char *sites = "0 0\n"
"1 0\n";
const char *mutations = "0 2 1 -1 0.5\n"
"1 3 1 -1 1.5\n";
tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_min_time(&ts), 0.1, 1E-6);
ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_min_time(&ts), 0.1, 1E-6);
tsk_treeseq_free(&ts);
/* Setting mutation times to unknown should have no effect on min time. */
tables.mutations.time[0] = TSK_UNKNOWN_TIME;
tables.mutations.time[1] = TSK_UNKNOWN_TIME;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_min_time(&ts), 0.1, 1E-6);
tsk_treeseq_free(&ts);
tables.mutations.time[0] = 0.5;
tables.mutations.time[1] = 1.5;
/* An empty tree sequence has infinity min time. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_min_time(&ts), INFINITY);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_max_time(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
const char *nodes = "1 0.1 0 -1\n"
"1 0.1 0 -1\n"
"1 0.1 0 -1\n"
"0 1 0 -1\n"
"0 2 0 -1\n";
const char *edges = "0 2 3 0,1\n"
"0 2 4 2,3\n";
const char *sites = "0 0\n"
"1 0\n";
const char *mutations = "0 2 1 -1 0.5\n"
"1 3 1 -1 1.5\n";
tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_max_time(&ts), 2.0, 1E-6);
ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_max_time(&ts), 2.0, 1E-6);
tsk_treeseq_free(&ts);
/* Setting mutation times to unknown should have no effect on max time. */
tables.mutations.time[0] = TSK_UNKNOWN_TIME;
tables.mutations.time[1] = TSK_UNKNOWN_TIME;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL(tsk_treeseq_get_max_time(&ts), 2.0, 1E-6);
tsk_treeseq_free(&ts);
tables.mutations.time[0] = 0.5;
tables.mutations.time[1] = 1.5;
/* An empty tree sequence has negative infinity max time. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_max_time(&ts), -INFINITY);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_records(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2,
TSK_SIMPLIFY_KEEP_UNARY | TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified,
NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE);
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_nonbinary_records(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 4 0,1,2,3\n";
tsk_treeseq_t ts, simplified;
tsk_tree_t t;
tsk_id_t sample_ids[] = { 0, 1, 2, 3 };
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL(t.num_children[4], 4);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);
tsk_tree_free(&t);
ret = tsk_treeseq_simplify(&ts, sample_ids, 4, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 4, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 4, TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_unary_records(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0\n"
"0 2 0";
const char *edges = "0 1 2 0\n"
"0 1 3 1\n"
"0 1 4 2,3\n";
tsk_treeseq_t ts, simplified, simplified_other;
tsk_tree_t t;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_populations(&ts), 1);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL(t.num_children[2], 1);
CU_ASSERT_EQUAL(t.num_children[4], 2);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);
tsk_tree_free(&t);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified_other, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(
tsk_table_collection_equals(simplified.tables, simplified_other.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&simplified_other);
tsk_treeseq_free(&ts);
}
static void
test_simplest_unary_with_individuals(void)
{
int ret;
const char *nodes = "1 0 0 -1\n"
"1 0 0 0\n"
"0 1 0 -1\n"
"0 1 0 1\n"
"0 2 0 -1\n"
"0 3 0 -1\n"
"0 3 0 2\n"
"0 1 0 -1\n"
"0 1 0 3\n"
"0 0 0 -1\n"
"0 0 0 4\n"
"0 1 0 3\n";
const char *edges = "0 2 2 0\n"
"0 2 3 1\n"
"2 3 7 0\n"
"2 3 8 1,9\n"
"2 3 11 10\n"
"0 2 4 2,3\n"
"0 1 5 4\n"
"1 2 6 4\n";
const char *individuals = "0 0.5 -1,-1\n"
"0 1.5,3.1 -1,-1\n"
"0 2.1 0,1\n"
"0 3.2 1,2\n"
"0 4.2 2,3\n";
const char *nodes_expect = "1 0 0 -1\n"
"1 0 0 0\n"
"0 1 0 1\n"
"0 1 0 3\n"
"0 2 0 -1\n"
"0 3 0 2\n";
const char *edges_expect = "0 2 2 1\n"
"2 3 3 1\n"
"0 2 4 0,2\n"
"1 2 5 4\n";
const char *individuals_expect = "0 0.5 -1,-1\n"
"0 1.5,3.1 -1,-1\n"
"0 2.1 0,1\n"
"0 3.2 1,2\n";
tsk_treeseq_t ts, simplified, expected;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, NULL, NULL, individuals, NULL, 0);
tsk_treeseq_from_text(&expected, 3, nodes_expect, edges_expect, NULL, NULL, NULL,
individuals_expect, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 12);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_individuals(&ts), 5);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_populations(&ts), 1);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2,
TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS | TSK_SIMPLIFY_FILTER_INDIVIDUALS,
&simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(simplified.tables, expected.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&expected);
tsk_treeseq_free(&ts);
}
static void
test_simplest_non_sample_leaf_records(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 0 0\n"
"0 0 0";
const char *edges = "0 1 2 0,1,3,4\n";
const char *sites = "0.1 0\n"
"0.2 0\n"
"0.3 0\n"
"0.4 0\n";
const char *mutations = "0 0 1\n"
"1 1 1\n"
"2 3 1\n"
"3 4 1";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_vargen_t vargen;
tsk_variant_t *var;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
tsk_vargen_print_state(&vargen, _devnull);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->genotypes[0], 1);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_vargen_free(&vargen);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&simplified);
}
static void
test_simplest_degenerate_multiple_root_records(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0\n"
"0 1 3 1\n";
tsk_treeseq_t ts, simplified;
tsk_tree_t t;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 2);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 2);
CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), 3);
CU_ASSERT_EQUAL(t.num_edges, 2);
CU_ASSERT_EQUAL(t.right_sib[2], 3);
CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL);
CU_ASSERT_EQUAL(t.num_children[2], 1);
CU_ASSERT_EQUAL(t.num_children[0], 0);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 2);
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_simplest_multiple_root_records(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0\n";
const char *edges = "0 1 4 0,1\n"
"0 1 5 2,3\n";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1, 2, 3 };
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 6);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_treeseq_simplify(&ts, sample_ids, 4, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 6);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);
tsk_treeseq_free(&simplified);
/* Make one tree degenerate */
ret = tsk_treeseq_simplify(&ts, sample_ids, 3, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_zero_root_tree(void)
{
int ret;
const char *nodes = "0 0 0\n"
"0 0 0\n"
"0 0 0\n"
"0 0 0\n"
"0 1 0\n"
"0 1 0\n";
const char *edges = "0 1 4 0,1\n"
"0 1 5 2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 6);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 0);
CU_ASSERT_EQUAL(t.num_edges, 4);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_right_root(&t), TSK_NULL);
CU_ASSERT_EQUAL(t.right_sib[2], 3);
CU_ASSERT_EQUAL(t.right_sib[3], TSK_NULL);
CU_ASSERT_EQUAL(t.num_children[0], 0);
CU_ASSERT_EQUAL(t.num_children[4], 2);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_multi_root_tree(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 3 1,2\n";
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_tree_init(&t, &ts, 0);
tsk_tree_print_state(&t, _devnull);
/* Make sure the initial roots are set correctly */
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);
CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL);
CU_ASSERT_EQUAL(t.right_sib[0], 1);
CU_ASSERT_EQUAL(t.left_sib[1], 0);
CU_ASSERT_EQUAL(t.right_sib[1], 2);
CU_ASSERT_EQUAL(t.left_sib[2], 1);
CU_ASSERT_EQUAL(t.right_sib[2], TSK_NULL);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 2);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);
CU_ASSERT_EQUAL(t.right_sib[0], 3);
CU_ASSERT_EQUAL(t.num_edges, 2);
CU_ASSERT_EQUAL(t.num_children[0], 0);
CU_ASSERT_EQUAL(t.num_children[3], 2);
tsk_tree_print_state(&t, _devnull);
CU_ASSERT_EQUAL(tsk_tree_set_root_threshold(&t, 1), TSK_ERR_UNSUPPORTED_OPERATION);
ret = tsk_tree_next(&t);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_tree_set_root_threshold(&t, 0), TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_tree_set_root_threshold(&t, 2);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_tree_get_root_threshold(&t), 2);
ret = tsk_tree_next(&t);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 3);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_tree_mrca(void)
{
int ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_id_t mrca, ret_id;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_mrca(&t, 0, 0, &mrca);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(mrca, 0);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_root_mutations(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0,1\n";
const char *sites = "0.1 0";
const char *mutations = "0 2 1";
tsk_flags_t options = 0;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_treeseq_t ts, simplified;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, options, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&simplified), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&simplified), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&simplified), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&simplified), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&simplified), 1);
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_back_mutations(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n";
const char *edges = "0 1 3 0,1\n"
"0 1 4 2,3\n";
const char *sites = "0.5 0";
const char *mutations = "0 3 1 -1\n"
"0 0 0 0";
tsk_treeseq_t ts;
tsk_vargen_t vargen;
tsk_variant_t *var;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 5);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_vargen_init(&vargen, &ts, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_vargen_next(&vargen, &var);
CU_ASSERT_EQUAL_FATAL(ret, 1);
CU_ASSERT_EQUAL(var->num_alleles, 2);
CU_ASSERT_NSTRING_EQUAL(var->alleles[0], "0", 1);
CU_ASSERT_NSTRING_EQUAL(var->alleles[1], "1", 1);
CU_ASSERT_EQUAL(var->genotypes[0], 0);
CU_ASSERT_EQUAL(var->genotypes[1], 1);
CU_ASSERT_EQUAL(var->genotypes[2], 0);
CU_ASSERT_EQUAL(var->site.id, 0);
CU_ASSERT_EQUAL(var->site.mutations_length, 2);
tsk_vargen_free(&vargen);
tsk_treeseq_free(&ts);
}
static void
test_simplest_general_samples(void)
{
const char *nodes = "1 0 0\n"
"0 1 0\n"
"1 0 0";
const char *edges = "0 1 1 0,2\n";
const char *sites = "0.5 0\n"
"0.75 0\n";
const char *mutations = "0 2 1\n"
"1 0 1";
const tsk_id_t samples[2] = { 0, 2 };
const tsk_id_t *s;
int ret;
tsk_treeseq_t ts, simplified;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
s = tsk_treeseq_get_samples(&ts);
CU_ASSERT_FATAL(s != NULL);
CU_ASSERT_EQUAL(s[0], 0);
CU_ASSERT_EQUAL(s[1], 2);
ret = tsk_treeseq_simplify(&ts, samples, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
s = tsk_treeseq_get_samples(&simplified);
CU_ASSERT_FATAL(s != NULL);
CU_ASSERT_EQUAL(s[0], 0);
CU_ASSERT_EQUAL(s[1], 1);
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_holey_tree_sequence(void)
{
const char *nodes_txt = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges_txt = "0 1 2 0\n"
"2 3 2 0\n"
"0 1 2 1\n"
"2 3 2 1\n";
const char *sites_txt = "0.5 0\n"
"1.5 0\n"
"2.5 0\n";
const char *mutations_txt = "0 0 1\n"
"1 1 1\n"
"2 2 1\n";
int ret;
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_treeseq_from_text(
&ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_holey_tsk_treeseq_mutation_parents(void)
{
const char *nodes_txt = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges_txt = "0 1 2 0\n"
"2 3 2 0\n"
"0 1 2 1\n"
"2 3 2 1\n";
const char *sites_txt = "0.5 0\n"
"1.5 0\n"
"2.5 0\n";
const char *mutations_txt = "0 0 1\n"
"0 0 1\n"
"1 1 1\n"
"1 1 1\n"
"2 2 1\n"
"2 2 1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
int ret;
tsk_treeseq_from_text(
&ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.mutations.parent[0], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[1], 0);
CU_ASSERT_EQUAL(tables.mutations.parent[2], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[3], 2);
CU_ASSERT_EQUAL(tables.mutations.parent[4], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[5], 4);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_simplest_initial_gap_tree_sequence(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "2 3 2 0,1\n";
const char *sites = "0.5 0\n"
"1.5 0\n"
"2.5 0\n";
const char *mutations = "0 0 1\n"
"1 1 1\n"
"2 2 1";
int ret;
tsk_treeseq_t ts, simplified;
const tsk_id_t z = TSK_NULL;
tsk_id_t parents[] = {
z,
z,
z,
2,
2,
z,
};
tsk_size_t num_trees = 2;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
verify_trees(&ts, num_trees, parents);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2, 0, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_KEEP_UNARY, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_initial_gap_zero_roots(void)
{
const char *nodes = "0 0 0\n"
"0 0 0\n"
"0 1 0";
const char *edges = "2 3 2 0,1\n";
int ret;
tsk_treeseq_t ts;
const tsk_id_t z = TSK_NULL;
tsk_id_t parents[] = {
z,
z,
z,
2,
2,
z,
};
uint32_t num_trees = 2;
tsk_tree_t tree;
tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
verify_trees(&ts, num_trees, parents);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);
CU_ASSERT_EQUAL(tree.parent[0], 2);
CU_ASSERT_EQUAL(tree.parent[1], 2);
CU_ASSERT_EQUAL(tree.num_children[2], 2);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_simplest_holey_tsk_treeseq_zero_roots(void)
{
const char *nodes_txt = "0 0 0\n"
"0 0 0\n"
"0 1 0";
const char *edges_txt = "0 1 2 0\n"
"2 3 2 0\n"
"0 1 2 1\n"
"2 3 2 1\n";
int ret;
tsk_treeseq_t ts;
const tsk_id_t z = TSK_NULL;
tsk_id_t parents[] = {
2,
2,
z,
z,
z,
z,
2,
2,
z,
};
uint32_t num_trees = 3;
tsk_tree_t tree;
tsk_treeseq_from_text(&ts, 3, nodes_txt, edges_txt, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 3);
verify_trees(&ts, num_trees, parents);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);
CU_ASSERT_EQUAL(tree.parent[0], 2);
CU_ASSERT_EQUAL(tree.parent[1], 2);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);
CU_ASSERT_EQUAL(tree.num_children[2], 2);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);
CU_ASSERT_EQUAL(tree.num_children[2], 0);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 0);
CU_ASSERT_EQUAL(tree.parent[0], 2);
CU_ASSERT_EQUAL(tree.parent[1], 2);
CU_ASSERT_EQUAL(tree.num_children[2], 2);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_simplest_initial_gap_tsk_treeseq_mutation_parents(void)
{
const char *nodes_txt = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges_txt = "2 3 2 0,1\n";
const char *sites_txt = "0.5 0\n"
"1.5 0\n"
"2.5 0\n";
const char *mutations_txt = "0 0 1\n"
"0 0 1\n"
"1 1 1\n"
"1 1 1\n"
"2 2 1\n"
"2 2 1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
int ret;
tsk_treeseq_from_text(
&ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.mutations.parent[0], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[1], 0);
CU_ASSERT_EQUAL(tables.mutations.parent[2], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[3], 2);
CU_ASSERT_EQUAL(tables.mutations.parent[4], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[5], 4);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_simplest_final_gap_tree_sequence(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 2 2 0,1\n";
const char *sites = "0.5 0\n"
"1.5 0\n"
"2.5 0\n";
const char *mutations = "0 0 1\n"
"1 1 1\n"
"2 0 1";
tsk_treeseq_t ts;
const tsk_id_t z = TSK_NULL;
tsk_id_t parents[] = {
2,
2,
z,
z,
z,
z,
};
uint32_t num_trees = 2;
tsk_treeseq_from_text(&ts, 3, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 3.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
verify_trees(&ts, num_trees, parents);
tsk_treeseq_free(&ts);
}
static void
test_simplest_final_gap_tsk_treeseq_mutation_parents(void)
{
const char *nodes_txt = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges_txt = "0 2 2 0,1\n";
const char *sites_txt = "0.5 0\n"
"1.5 0\n"
"2.5 0\n";
const char *mutations_txt = "0 0 1\n"
"0 0 1\n"
"1 1 1\n"
"1 1 1\n"
"2 0 1\n"
"2 0 1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
int ret;
tsk_treeseq_from_text(
&ts, 3, nodes_txt, edges_txt, NULL, sites_txt, mutations_txt, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.mutations.parent[0], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[1], 0);
CU_ASSERT_EQUAL(tables.mutations.parent[2], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[3], 2);
CU_ASSERT_EQUAL(tables.mutations.parent[4], -1);
CU_ASSERT_EQUAL(tables.mutations.parent[5], 4);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_simplest_individuals(void)
{
const char *individuals = "1 0.25 -1,-1\n"
"2 0.5,0.25 -1,-1\n"
"3 0.75 0,1\n";
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 1\n"
"0 0 -1 -1\n"
"1 0 -1 0\n"
"0 0 -1 1\n"
"0 0 -1 2\n";
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_node_t node;
tsk_individual_t individual;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
int ret;
tsk_id_t pat_id, mat_id;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_individuals(individuals, &tables.individuals);
CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 3);
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 6);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_get_node(&ts, 0, &node);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(node.individual, TSK_NULL);
ret = tsk_treeseq_get_node(&ts, 1, &node);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(node.individual, 1);
ret = tsk_treeseq_get_individual(&ts, 0, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(individual.id, 0);
CU_ASSERT_EQUAL_FATAL(individual.flags, 1);
CU_ASSERT_EQUAL_FATAL(individual.location_length, 1);
CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.25);
CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2);
CU_ASSERT_EQUAL_FATAL(individual.parents[0], -1);
CU_ASSERT_EQUAL_FATAL(individual.parents[1], -1);
pat_id = individual.id;
CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 1);
CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 3);
ret = tsk_treeseq_get_individual(&ts, 1, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(individual.id, 1);
CU_ASSERT_EQUAL_FATAL(individual.flags, 2);
CU_ASSERT_EQUAL_FATAL(individual.location_length, 2);
CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.5);
CU_ASSERT_EQUAL_FATAL(individual.location[1], 0.25);
CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2);
CU_ASSERT_EQUAL_FATAL(individual.parents[0], -1);
CU_ASSERT_EQUAL_FATAL(individual.parents[1], -1);
mat_id = individual.id;
CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 2);
CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 1);
CU_ASSERT_EQUAL_FATAL(individual.nodes[1], 4);
ret = tsk_treeseq_get_individual(&ts, 2, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(individual.id, 2);
CU_ASSERT_EQUAL_FATAL(individual.flags, 3);
CU_ASSERT_EQUAL_FATAL(individual.location_length, 1);
CU_ASSERT_EQUAL_FATAL(individual.location[0], 0.75);
CU_ASSERT_EQUAL_FATAL(individual.parents_length, 2);
CU_ASSERT_EQUAL_FATAL(individual.parents[0], pat_id);
CU_ASSERT_EQUAL_FATAL(individual.parents[1], mat_id);
CU_ASSERT_EQUAL_FATAL(individual.nodes_length, 1);
CU_ASSERT_EQUAL_FATAL(individual.nodes[0], 5);
ret = tsk_treeseq_get_individual(&ts, 3, &individual);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
/* NaN/ifinity values are allowed in locations they do not
* affect the integrity of the model. */
tables.individuals.location[0] = NAN;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_treeseq_get_individual(&ts, 0, &individual);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT(!tsk_isfinite(individual.location[0]));
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_bad_individuals(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0\n"
"0 1 2 1\n"
"0 1 4 3\n";
const char *individuals = "1 0.25 -1\n"
"2 0.5,0.25 0\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
tsk_id_t ret_id;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* Make sure we have a good set of records */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
/* Bad individual ID */
tables.nodes.individual[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.nodes.individual[0] = TSK_NULL;
/* Bad individual ID */
tables.nodes.individual[0] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.nodes.individual[0] = TSK_NULL;
/* Add two individuals */
parse_individuals(individuals, &tables.individuals);
CU_ASSERT_EQUAL_FATAL(tables.individuals.num_rows, 2);
/* Make sure we have a good set of records */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
/* Bad individual ID */
tables.nodes.individual[0] = 2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.nodes.individual[0] = TSK_NULL;
/* Bad parent ID */
tables.individuals.parents[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = 42;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;
/* Parent is self */
tables.individuals.parents[0] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_INDIVIDUAL_SELF_PARENT);
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;
/* Unsorted individuals are OK*/
tables.individuals.parents[0] = 1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
tsk_treeseq_free(&ts);
tables.individuals.parents[0] = TSK_NULL;
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_bad_edges(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0\n"
"0 1 2 1\n"
"0 1 4 3\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
int ret;
tsk_id_t ret_id;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* Make sure we have a good set of records */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
/* Bad population ID */
tables.nodes.population[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.nodes.population[0] = 0;
/* Bad population ID */
tables.nodes.population[0] = 1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.nodes.population[0] = 0;
/* Bad interval */
tables.edges.right[0] = 0.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;
/* Nonfinite coords */
tables.edges.left[0] = NAN;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tsk_treeseq_free(&ts);
tables.edges.left[0] = 1.0;
tables.edges.left[0] = INFINITY;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tsk_treeseq_free(&ts);
tables.edges.left[0] = 1.0;
tables.edges.right[0] = NAN;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;
tables.edges.right[0] = -INFINITY;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;
/* Left coordinate < 0. */
tables.edges.left[0] = -1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_LEFT_LESS_ZERO);
tsk_treeseq_free(&ts);
tables.edges.left[0] = 0.0;
/* Right coordinate > sequence length. */
tables.edges.right[0] = 2.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;
/* Duplicate records */
tables.edges.child[0] = 1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_DUPLICATE_EDGES);
tsk_treeseq_free(&ts);
tables.edges.child[0] = 0;
/* Duplicate records */
tables.edges.child[0] = 1;
tables.edges.left[0] = 0.5;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NOT_SORTED_LEFT);
tsk_treeseq_free(&ts);
tables.edges.child[0] = 0;
tables.edges.left[0] = 0.0;
/* child node == parent */
tables.edges.child[1] = 2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);
tsk_treeseq_free(&ts);
tables.edges.child[1] = 1;
/* Unsorted child nodes */
tables.edges.child[0] = 1;
tables.edges.child[1] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD);
tsk_treeseq_free(&ts);
tables.edges.child[0] = 0;
tables.edges.child[1] = 1;
/* discontinuous parent nodes */
/* Swap rows 1 and 2 */
tables.edges.parent[1] = 4;
tables.edges.child[1] = 3;
tables.edges.parent[2] = 2;
tables.edges.child[2] = 1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS);
tsk_treeseq_free(&ts);
tables.edges.parent[2] = 4;
tables.edges.child[2] = 3;
tables.edges.parent[1] = 2;
tables.edges.child[1] = 1;
/* Null parent */
tables.edges.parent[0] = TSK_NULL;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NULL_PARENT);
tsk_treeseq_free(&ts);
tables.edges.parent[0] = 2;
/* parent not in nodes list */
tables.nodes.num_rows = 2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.nodes.num_rows = 5;
/* parent negative */
tables.edges.parent[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.edges.parent[0] = 2;
/* Null child */
tables.edges.child[0] = TSK_NULL;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NULL_CHILD);
tsk_treeseq_free(&ts);
tables.edges.child[0] = 0;
/* child node reference out of bounds */
tables.edges.child[0] = 100;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.edges.child[0] = 0;
/* child node reference negative */
tables.edges.child[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.edges.child[0] = 0;
/* Make sure we've preserved a good tree sequence */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_bad_indexes(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0\n"
"0 1 2 1\n"
"0 1 4 3\n";
tsk_table_collection_t tables;
tsk_id_t bad_indexes[] = { -1, 3, 4, 1000 };
tsk_size_t j;
tsk_id_t ret_id;
tsk_id_t ret_num_trees;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 5);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* Make sure we have a good set of records */
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLES_NOT_INDEXED);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
/* TSK_CHECK_TREES returns the number of trees */
CU_ASSERT_EQUAL_FATAL(ret_num_trees, 1);
for (j = 0; j < sizeof(bad_indexes) / sizeof(*bad_indexes); j++) {
tables.indexes.edge_insertion_order[0] = bad_indexes[j];
ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tables.indexes.edge_insertion_order[0] = 0;
tables.indexes.edge_removal_order[0] = bad_indexes[j];
ret_num_trees = tsk_table_collection_check_integrity(&tables, TSK_CHECK_TREES);
CU_ASSERT_EQUAL_FATAL(ret_num_trees, TSK_ERR_EDGE_OUT_OF_BOUNDS);
tables.indexes.edge_removal_order[0] = 0;
}
ret = tsk_table_collection_drop_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = (int) tsk_table_collection_check_integrity(&tables, TSK_CHECK_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TABLES_NOT_INDEXED);
tsk_table_collection_free(&tables);
}
static void
test_simplest_bad_migrations(void)
{
tsk_table_collection_t tables;
int ret;
tsk_id_t ret_id;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
/* insert two populations and one node to refer to. */
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
/* One migration, node 0 goes from population 0 to 1. */
ret_id
= tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* We only need basic intregity checks for migrations */
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Bad node reference */
tables.migrations.node[0] = -1;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.migrations.node[0] = 0;
/* Bad node reference */
tables.migrations.node[0] = 1;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.migrations.node[0] = 0;
/* Bad population reference */
tables.migrations.source[0] = -1;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tables.migrations.source[0] = 0;
/* Bad population reference */
tables.migrations.source[0] = 2;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tables.migrations.source[0] = 0;
/* Bad population reference */
tables.migrations.dest[0] = -1;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tables.migrations.dest[0] = 1;
/* Bad population reference */
tables.migrations.dest[0] = 2;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tables.migrations.dest[0] = 1;
/* Bad time values */
tables.migrations.time[0] = NAN;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);
tables.migrations.time[0] = 1.0;
tables.migrations.time[0] = INFINITY;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);
tables.migrations.time[0] = 1.0;
/* Bad left coordinate */
tables.migrations.left[0] = -1;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_LEFT_LESS_ZERO);
tables.migrations.left[0] = 0;
tables.migrations.left[0] = NAN;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tables.migrations.left[0] = 0;
tables.migrations.left[0] = -INFINITY;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tables.migrations.left[0] = 0;
/* Bad right coordinate */
tables.migrations.right[0] = 2;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);
tables.migrations.right[0] = 1;
tables.migrations.right[0] = NAN;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tables.migrations.right[0] = 1;
tables.migrations.right[0] = INFINITY;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_GENOME_COORDS_NONFINITE);
tables.migrations.right[0] = 1;
/* Bad interval coordinate */
tables.migrations.right[0] = 0;
ret = (int) tsk_table_collection_check_integrity(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);
tables.migrations.right[0] = 1;
tsk_table_collection_free(&tables);
}
static void
test_simplest_migration_simplify(void)
{
tsk_table_collection_t tables;
int ret;
tsk_id_t ret_id;
tsk_id_t samples[] = { 0, 1 };
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
/* insert two populations and one node to refer to. */
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
/* One migration, node 0 goes from population 0 to 1. */
ret_id
= tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED);
tsk_table_collection_free(&tables);
}
static void
test_simplest_overlapping_parents(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"0 1 -1\n";
const char *edges = "0 1 2 0\n"
"0 1 2 1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_tree_t tree;
int ret;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);
tables.edges.left[0] = 0;
tables.edges.parent[0] = 2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tree.parent[0], 2);
CU_ASSERT_EQUAL(tree.parent[1], 2);
CU_ASSERT_EQUAL(tree.left_sib[2], TSK_NULL);
CU_ASSERT_EQUAL(tree.right_sib[2], TSK_NULL);
CU_ASSERT_EQUAL(tree.left_child[2], 0);
CU_ASSERT_EQUAL(tree.right_child[2], 1);
CU_ASSERT_EQUAL(tree.left_sib[0], TSK_NULL);
CU_ASSERT_EQUAL(tree.right_sib[0], 1);
CU_ASSERT_EQUAL(tree.left_sib[1], 0);
CU_ASSERT_EQUAL(tree.right_sib[1], TSK_NULL);
CU_ASSERT_EQUAL(tree.num_children[2], 2);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_contradictory_children(void)
{
const char *nodes = "1 0 -1\n"
"1 1 -1\n"
"0 1 -1\n";
const char *edges = "0 1 1 0\n"
"0 1 2 0\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
int ret;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);
tables.sequence_length = 1.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_simplest_overlapping_edges_simplify(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"0 1 -1";
const char *edges = "0 2 3 0\n"
"1 3 3 1\n"
"0 3 3 2\n";
tsk_id_t samples[] = { 0, 1, 2 };
tsk_table_collection_t tables;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 3;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 4);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 3);
ret = tsk_table_collection_simplify(&tables, samples, 3, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 4);
CU_ASSERT_EQUAL(tables.edges.num_rows, 3);
/* Identical to the input.
0 2 3 0
1 3 3 1
0 3 3 2
*/
CU_ASSERT_EQUAL(tables.edges.left[0], 0);
CU_ASSERT_EQUAL(tables.edges.left[1], 1);
CU_ASSERT_EQUAL(tables.edges.left[2], 0);
CU_ASSERT_EQUAL(tables.edges.right[0], 2);
CU_ASSERT_EQUAL(tables.edges.right[1], 3);
CU_ASSERT_EQUAL(tables.edges.right[2], 3);
CU_ASSERT_EQUAL(tables.edges.parent[0], 3);
CU_ASSERT_EQUAL(tables.edges.parent[1], 3);
CU_ASSERT_EQUAL(tables.edges.parent[2], 3);
CU_ASSERT_EQUAL(tables.edges.child[0], 0);
CU_ASSERT_EQUAL(tables.edges.child[1], 1);
CU_ASSERT_EQUAL(tables.edges.child[2], 2);
tsk_table_collection_free(&tables);
}
static void
test_simplest_overlapping_unary_edges_simplify(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"0 1 -1";
const char *edges = "0 2 2 0\n"
"1 3 2 1\n";
tsk_id_t samples[] = { 0, 1 };
tsk_table_collection_t tables;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 3;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);
CU_ASSERT_EQUAL(tables.edges.num_rows, 2);
/* Because we only sample 0 and 1, the flanking unary edges are removed
1 2 2 0
1 2 2 1
*/
CU_ASSERT_EQUAL(tables.edges.left[0], 1);
CU_ASSERT_EQUAL(tables.edges.right[0], 2);
CU_ASSERT_EQUAL(tables.edges.parent[0], 2);
CU_ASSERT_EQUAL(tables.edges.child[0], 0);
CU_ASSERT_EQUAL(tables.edges.left[1], 1);
CU_ASSERT_EQUAL(tables.edges.right[1], 2);
CU_ASSERT_EQUAL(tables.edges.parent[1], 2);
CU_ASSERT_EQUAL(tables.edges.child[1], 1);
tsk_table_collection_free(&tables);
}
static void
test_simplest_overlapping_unary_edges_internal_samples_simplify(void)
{
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"1 1 -1";
const char *edges = "0 2 2 0\n"
"1 3 2 1\n";
tsk_id_t samples[] = { 0, 1, 2 };
tsk_table_collection_t tables;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 3;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 3);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 2);
ret = tsk_table_collection_simplify(&tables, samples, 3, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);
CU_ASSERT_EQUAL(tables.edges.num_rows, 2);
/* Identical to the input.
0 2 2 0
1 3 2 1
*/
CU_ASSERT_EQUAL(tables.edges.left[0], 0);
CU_ASSERT_EQUAL(tables.edges.left[1], 1);
CU_ASSERT_EQUAL(tables.edges.right[0], 2);
CU_ASSERT_EQUAL(tables.edges.right[1], 3);
CU_ASSERT_EQUAL(tables.edges.parent[0], 2);
CU_ASSERT_EQUAL(tables.edges.parent[1], 2);
CU_ASSERT_EQUAL(tables.edges.child[0], 0);
CU_ASSERT_EQUAL(tables.edges.child[1], 1);
tsk_table_collection_free(&tables);
}
static void
test_simplest_reduce_site_topology(void)
{
/* Two trees side by side, with a site on the second one. The first
* tree should disappear. */
const char *nodes = "1 0 -1\n"
"1 0 -1\n"
"0 1 -1\n"
"0 2 -1\n";
const char *edges = "0 1 2 0\n"
"0 1 2 1\n"
"1 2 3 0\n"
"1 2 3 1\n";
const char *sites = "1.0 0\n";
tsk_id_t samples[] = { 0, 1 };
tsk_table_collection_t tables;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 2;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 4);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 4);
parse_sites(sites, &tables.sites);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 1);
ret = tsk_table_collection_simplify(
&tables, samples, 2, TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);
CU_ASSERT_EQUAL(tables.edges.num_rows, 2);
CU_ASSERT_EQUAL(tables.edges.left[0], 0);
CU_ASSERT_EQUAL(tables.edges.left[1], 0);
CU_ASSERT_EQUAL(tables.edges.right[0], 2);
CU_ASSERT_EQUAL(tables.edges.right[1], 2);
CU_ASSERT_EQUAL(tables.edges.parent[0], 2);
CU_ASSERT_EQUAL(tables.edges.parent[1], 2);
CU_ASSERT_EQUAL(tables.edges.child[0], 0);
CU_ASSERT_EQUAL(tables.edges.child[1], 1);
tsk_table_collection_free(&tables);
}
static void
test_simplest_simplify_defragment(void)
{
const char *nodes = "0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 2 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"0 1 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n"
"1 0 -1\n";
const char *edges = "0.00000000 0.20784841 8 12\n"
"0.00000000 0.42202433 8 15\n"
"0.00000000 0.63541014 8 16\n"
"0.42202433 1.00000000 9 15\n"
"0.00000000 1.00000000 9 17\n"
"0.00000000 1.00000000 10 14\n"
"0.20784841 1.00000000 11 12\n"
"0.00000000 1.00000000 11 13\n"
"0.63541014 1.00000000 11 16\n"
"0.00000000 1.00000000 0 10\n"
"0.62102072 1.00000000 1 9\n"
"0.00000000 1.00000000 1 11\n"
"0.00000000 0.26002984 2 6\n"
"0.26002984 1.00000000 2 6\n"
"0.00000000 0.62102072 2 9\n"
"0.55150554 1.00000000 3 8\n"
"0.00000000 1.00000000 4 7\n"
"0.00000000 0.55150554 5 8\n";
tsk_id_t samples[] = { 12, 13, 14, 15, 16, 17 };
tsk_table_collection_t tables;
int ret;
/* This was the simplest example I could find that exercised the
* inner loops of the simplifier_extract_ancestry function */
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 18);
parse_edges(edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 18);
ret = tsk_table_collection_simplify(&tables, samples, 6, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 10);
CU_ASSERT_EQUAL(tables.edges.num_rows, 10);
tsk_table_collection_free(&tables);
}
static void
test_simplest_population_filter(void)
{
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
tsk_population_table_add_row(&tables.populations, "0", 1);
tsk_population_table_add_row(&tables.populations, "1", 1);
tsk_population_table_add_row(&tables.populations, "2", 1);
/* Two nodes referring to population 1 */
tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, TSK_NULL, NULL, 0);
tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, TSK_NULL, NULL, 0);
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);
CU_ASSERT_EQUAL(tables.populations.num_rows, 3);
CU_ASSERT_EQUAL(tables.populations.metadata[0], '0');
CU_ASSERT_EQUAL(tables.populations.metadata[1], '1');
CU_ASSERT_EQUAL(tables.populations.metadata[2], '2');
ret = tsk_table_collection_simplify(
&tables, samples, 2, TSK_SIMPLIFY_FILTER_POPULATIONS, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);
CU_ASSERT_EQUAL(tables.nodes.population[0], 0);
CU_ASSERT_EQUAL(tables.nodes.population[1], 0);
CU_ASSERT_EQUAL(tables.populations.num_rows, 1);
CU_ASSERT_EQUAL(tables.populations.metadata[0], '1');
tsk_table_collection_free(&tables);
}
static void
test_simplest_individual_filter(void)
{
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, "0", 1);
tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, "1", 1);
tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0, "2", 1);
/* Two nodes referring to individual 1 */
tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);
CU_ASSERT_EQUAL(tables.individuals.num_rows, 3);
CU_ASSERT_EQUAL(tables.individuals.metadata[0], '0');
CU_ASSERT_EQUAL(tables.individuals.metadata[1], '1');
CU_ASSERT_EQUAL(tables.individuals.metadata[2], '2');
ret = tsk_table_collection_simplify(
&tables, samples, 2, TSK_SIMPLIFY_FILTER_INDIVIDUALS, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 2);
CU_ASSERT_EQUAL(tables.nodes.individual[0], 0);
CU_ASSERT_EQUAL(tables.nodes.individual[1], 0);
CU_ASSERT_EQUAL(tables.individuals.num_rows, 1);
CU_ASSERT_EQUAL(tables.individuals.metadata[0], '1');
tsk_table_collection_free(&tables);
}
static void
test_simplest_no_node_filter(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0"; /* unreferenced node */
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_id_t node_map[] = { -1, -1, -1, -1 };
tsk_id_t j;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_simplify(
&ts, NULL, 0, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
/* Reversing sample order makes no difference */
sample_ids[0] = 1;
sample_ids[1] = 0;
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(
&ts, sample_ids, 1, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, node_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 0);
for (j = 0; j < 4; j++) {
CU_ASSERT_EQUAL(node_map[j], j);
}
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(&ts, sample_ids, 1,
TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_KEEP_INPUT_ROOTS
| TSK_SIMPLIFY_KEEP_UNARY,
&simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 1);
tsk_treeseq_free(&simplified);
sample_ids[0] = 0;
sample_ids[1] = 0;
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_no_update_flags(void)
{
const char *nodes = "0 0 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
/* We have a mixture of sample and non-samples in the input tables */
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
ret = tsk_treeseq_simplify(&ts, sample_ids, 2,
TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS | TSK_SIMPLIFY_NO_FILTER_NODES, &simplified,
NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_simplest_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
genotypes[0] = -1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
/* Check the null tree */
genotypes[0] = 1;
CU_ASSERT_FALSE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
/* Assign the ancestral_state */
genotypes[0] = 1;
genotypes[1] = 1;
ancestral_state = 0;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 2);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[1].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[1].state, 1);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_nonbinary_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 4 0,1,2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0, 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_unary_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0\n"
"0 2 0";
const char *edges = "0 1 2 0\n"
"0 1 3 1\n"
"0 1 4 2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 2);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_non_sample_leaf_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 0 0\n"
"0 0 0";
const char *edges = "0 1 2 0,1,3,4\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_internal_sample_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
genotypes[2] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 1);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 0);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_multiple_root_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0\n";
const char *edges = "0 1 4 0,1\n"
"0 1 5 2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0, 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
genotypes[1] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 4);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_chained_map_mutations(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 1 0\n"
"1 1 0\n"
"0 2 0";
const char *edges = "0 1 2 0\n"
"0 1 3 1\n"
"0 1 4 2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 0, 0, 0 };
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[2] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 2);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 2);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[1].parent, 0);
CU_ASSERT_EQUAL_FATAL(transitions[1].state, 0);
free(transitions);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_simplest_mutation_edges(void)
{
const char *nodes = "1 0 0\n"
"0 1 0\n"
"0 1 0";
const char *edges = "0 1 1 0\n"
"1 2 2 0\n";
const char *sites = "0.5 0\n"
"1.5 0\n";
const char *mutations = "0 2 1\n"
"0 1 1\n"
"0 0 1\n"
"1 2 1\n"
"1 1 1\n"
"1 0 1\n";
tsk_treeseq_t ts;
tsk_tree_t tree;
/* We have mutations over roots, samples and just isolated nodes */
tsk_id_t mutation_edges[] = { -1, -1, 0, -1, -1, 1 };
tsk_size_t i, j, k, t;
tsk_mutation_t mut;
tsk_site_t site;
int ret;
tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
for (j = 0; j < tsk_treeseq_get_num_mutations(&ts); j++) {
ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, &mut);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(mut.edge, mutation_edges[j]);
}
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
i = 0;
for (t = 0; t < 2; t++) {
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
for (j = 0; j < tree.sites_length; j++) {
site = tree.sites[j];
for (k = 0; k < site.mutations_length; k++) {
CU_ASSERT_EQUAL(site.mutations[k].edge, mutation_edges[i]);
i++;
}
}
}
CU_ASSERT_EQUAL(i, 6);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
/*=======================================================
* Single tree tests.
*======================================================*/
static void
test_single_tree_good_records(void)
{
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 7);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_nonbinary_tree_good_records(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n"
"0 3 0\n";
const char *edges = "0 1 7 0,1,2,3\n"
"0 1 8 4,5\n"
"0 1 9 6,7,8";
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 7);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 10);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_bad_records(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
/* Not sorted in time order */
tables.nodes.time[5] = 0.5;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);
tsk_treeseq_free(&ts);
tables.nodes.time[5] = 2.0;
/* Left value greater than sequence right */
tables.edges.left[2] = 2.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);
tsk_treeseq_free(&ts);
tables.edges.left[2] = 0.0;
/* Non finite */
tables.nodes.time[5] = INFINITY;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);
tsk_treeseq_free(&ts);
tables.nodes.time[5] = 2.0;
tables.nodes.time[5] = NAN;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_TIME_NONFINITE);
tsk_treeseq_free(&ts);
tables.nodes.time[5] = 2.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_single_tree_good_mutations(void)
{
tsk_treeseq_t ts;
tsk_size_t j;
tsk_size_t num_sites = 3;
tsk_size_t num_mutations = 7;
tsk_site_t other_sites[num_sites];
tsk_mutation_t other_mutations[num_mutations];
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 1.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 7);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), num_sites);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), num_mutations);
for (j = 0; j < num_sites; j++) {
ret = tsk_treeseq_get_site(&ts, (tsk_id_t) j, other_sites + j);
CU_ASSERT_EQUAL(ret, 0);
}
for (j = 0; j < num_mutations; j++) {
ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, other_mutations + j);
CU_ASSERT_EQUAL(ret, 0);
}
CU_ASSERT_EQUAL(other_sites[0].position, 0.125);
CU_ASSERT_NSTRING_EQUAL(other_sites[0].ancestral_state, "0", 1);
CU_ASSERT_EQUAL(other_sites[1].position, 0.25);
CU_ASSERT_NSTRING_EQUAL(other_sites[1].ancestral_state, "0", 1);
CU_ASSERT_EQUAL(other_sites[2].position, 0.5);
CU_ASSERT_NSTRING_EQUAL(other_sites[2].ancestral_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[0].id, 0);
CU_ASSERT_EQUAL(other_mutations[0].node, 2);
CU_ASSERT_NSTRING_EQUAL(other_mutations[0].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[0].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[1].id, 1);
CU_ASSERT_EQUAL(other_mutations[1].node, 4);
CU_ASSERT_NSTRING_EQUAL(other_mutations[1].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[1].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[2].id, 2);
CU_ASSERT_EQUAL(other_mutations[2].node, 0);
CU_ASSERT_NSTRING_EQUAL(other_mutations[2].derived_state, "0", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[2].inherited_state, "1", 1);
CU_ASSERT_EQUAL(other_mutations[3].id, 3);
CU_ASSERT_EQUAL(other_mutations[3].node, 0);
CU_ASSERT_NSTRING_EQUAL(other_mutations[3].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[3].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[4].id, 4);
CU_ASSERT_EQUAL(other_mutations[4].node, 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[4].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[4].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[5].id, 5);
CU_ASSERT_EQUAL(other_mutations[5].node, 2);
CU_ASSERT_NSTRING_EQUAL(other_mutations[5].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[5].inherited_state, "0", 1);
CU_ASSERT_EQUAL(other_mutations[6].id, 6);
CU_ASSERT_EQUAL(other_mutations[6].node, 3);
CU_ASSERT_NSTRING_EQUAL(other_mutations[6].derived_state, "1", 1);
CU_ASSERT_NSTRING_EQUAL(other_mutations[6].inherited_state, "0", 1);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_bad_mutations(void)
{
int ret = 0;
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n";
const char *mutations = "0 0 1 -1 0\n"
"1 1 1 -1 0\n"
"2 4 1 -1 1\n"
"2 1 0 2 0\n"
"2 1 1 3 0\n"
"2 2 1 -1 0\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
parse_mutations(mutations, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 3);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 6);
tables.sequence_length = 1.0;
/* Check to make sure we have legal mutations */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
tsk_treeseq_free(&ts);
/* negative coordinate */
tables.sites.position[0] = -1.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0.0;
/* non finite coordinates */
tables.sites.position[0] = NAN;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0.0;
tables.sites.position[0] = INFINITY;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0.0;
/* coordinate == sequence length */
tables.sites.position[2] = 1.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);
tsk_treeseq_free(&ts);
tables.sites.position[2] = 0.2;
/* coordinate > sequence length */
tables.sites.position[2] = 1.1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);
tsk_treeseq_free(&ts);
tables.sites.position[2] = 0.2;
/* Duplicate positions */
tables.sites.position[0] = 0.1;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_DUPLICATE_SITE_POSITION);
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0.0;
/* Unsorted positions */
tables.sites.position[0] = 0.3;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0.0;
/* site < 0 */
tables.mutations.site[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.mutations.site[0] = 0;
/* site == num_sites */
tables.mutations.site[0] = 3;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.mutations.site[0] = 0;
/* node = NULL */
tables.mutations.node[0] = TSK_NULL;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.mutations.node[0] = 0;
/* node >= num_nodes */
tables.mutations.node[0] = 7;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.mutations.node[0] = 0;
/* parent < -1 */
tables.mutations.parent[0] = -2;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.mutations.parent[0] = TSK_NULL;
/* parent >= num_mutations */
tables.mutations.parent[0] = 7;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tables.mutations.parent[0] = TSK_NULL;
/* parent on a different site */
tables.mutations.parent[1] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE);
tsk_treeseq_free(&ts);
tables.mutations.parent[1] = TSK_NULL;
/* parent is the same mutation */
tables.mutations.parent[0] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_EQUAL);
tsk_treeseq_free(&ts);
tables.mutations.parent[0] = TSK_NULL;
/* parent_id > mutation id */
tables.mutations.parent[3] = 4;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
tsk_treeseq_free(&ts);
tables.mutations.parent[3] = 2;
/* time < node time */
tables.mutations.time[2] = 0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);
tsk_treeseq_free(&ts);
tables.mutations.time[2] = 1;
/* time > parent mutation */
tables.mutations.time[4] = 0.5;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);
tsk_treeseq_free(&ts);
tables.mutations.time[4] = 0;
/* time > parent node */
tables.mutations.time[0] = 1.5;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE);
tsk_treeseq_free(&ts);
tables.mutations.time[0] = 0;
/* Check to make sure we've maintained legal mutations */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_single_tree_iter(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n"
"0 3 0\n";
const char *edges = "0 6 4 0,1\n"
"0 6 5 2,3\n"
"0 6 6 4,5\n";
tsk_id_t parents[] = { 4, 4, 5, 5, 6, 6, TSK_NULL };
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t u, v, w;
tsk_size_t num_samples;
tsk_size_t num_nodes = 7;
tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_edge_array_trees(&ts);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
CU_ASSERT_EQUAL(tree.num_children[4], 2);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1);
tsk_tree_print_state(&tree, _devnull);
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = tsk_tree_get_parent(&tree, u, &v);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(v, parents[u]);
}
ret = tsk_tree_get_num_samples(&tree, 0, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 1);
ret = tsk_tree_get_num_samples(&tree, 4, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 2);
ret = tsk_tree_get_num_samples(&tree, 6, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 4);
ret = tsk_tree_get_mrca(&tree, 0, 1, &w);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(w, 4);
ret = tsk_tree_get_mrca(&tree, 0, 2, &w);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(w, 6);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, 0);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_nonbinary_tree_iter(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n"
"0 3 0\n";
const char *edges = "0 1 7 0,1,2,3\n"
"0 1 8 4,5\n"
"0 1 9 6,7,8\n";
tsk_id_t parents[] = { 7, 7, 7, 7, 8, 8, 9, 9, 9, TSK_NULL };
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t u, v, w;
tsk_size_t num_samples;
tsk_size_t num_nodes = 10;
tsk_size_t total_samples = 7;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_edge_array_trees(&ts);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
tsk_tree_print_state(&tree, _devnull);
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = tsk_tree_get_parent(&tree, u, &v);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(v, parents[u]);
}
for (u = 0; u < (tsk_id_t) total_samples; u++) {
ret = tsk_tree_get_num_samples(&tree, u, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 1);
CU_ASSERT_EQUAL(tree.left_child[u], TSK_NULL);
}
u = 7;
ret = tsk_tree_get_num_samples(&tree, u, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 4);
CU_ASSERT_EQUAL(tree.right_child[u], 3);
CU_ASSERT_EQUAL(tree.left_sib[3], 2);
CU_ASSERT_EQUAL(tree.left_sib[2], 1);
CU_ASSERT_EQUAL(tree.left_sib[1], 0);
CU_ASSERT_EQUAL(tree.left_sib[0], TSK_NULL);
CU_ASSERT_EQUAL(tree.num_children[u], 4);
u = 8;
ret = tsk_tree_get_num_samples(&tree, u, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 2);
CU_ASSERT_EQUAL(tree.right_child[u], 5);
CU_ASSERT_EQUAL(tree.left_sib[5], 4);
CU_ASSERT_EQUAL(tree.left_sib[4], TSK_NULL);
CU_ASSERT_EQUAL(tree.num_children[u], 2);
u = 9;
ret = tsk_tree_get_num_samples(&tree, u, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 7);
CU_ASSERT_EQUAL(tree.right_child[u], 8);
CU_ASSERT_EQUAL(tree.left_sib[8], 7);
CU_ASSERT_EQUAL(tree.left_sib[7], 6);
CU_ASSERT_EQUAL(tree.left_sib[6], TSK_NULL);
CU_ASSERT_EQUAL(tree.num_children[u], 3);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&tree), 1);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&tree), 9);
ret = tsk_tree_get_mrca(&tree, 0, 1, &w);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(w, 7);
ret = tsk_tree_get_mrca(&tree, 0, 4, &w);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(w, 9);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, 0);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_general_samples_iter(void)
{
int ret;
const char *nodes = "0 3 0\n"
"0 2 0\n"
"0 1 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n";
const char *edges = "0 6 2 3,4\n"
"0 6 1 5,6\n"
"0 6 0 1,2\n";
tsk_id_t parents[] = { TSK_NULL, 0, 0, 2, 2, 1, 1 };
const tsk_id_t *samples;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t u, v, w;
tsk_size_t num_samples;
tsk_size_t num_nodes = 7;
tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
samples = tsk_treeseq_get_samples(&ts);
CU_ASSERT_EQUAL(samples[0], 3);
CU_ASSERT_EQUAL(samples[1], 4);
CU_ASSERT_EQUAL(samples[2], 5);
CU_ASSERT_EQUAL(samples[3], 6);
verify_edge_array_trees(&ts);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
tsk_tree_print_state(&tree, _devnull);
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = tsk_tree_get_parent(&tree, u, &v);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(v, parents[u]);
}
ret = tsk_tree_get_num_samples(&tree, 3, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 1);
ret = tsk_tree_get_num_samples(&tree, 2, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 2);
ret = tsk_tree_get_num_samples(&tree, 0, &num_samples);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(num_samples, 4);
ret = tsk_tree_get_mrca(&tree, 3, 4, &w);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(w, 2);
ret = tsk_tree_get_mrca(&tree, 3, 6, &w);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(w, 0);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, 0);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_iter_times(void)
{
int ret = 0;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 2 0\n"
"1 3 0\n"
"0 1 0\n"
"0 4 0\n"
"0 5 0\n";
const char *edges = "0 6 4 0,1\n"
"0 6 5 2,3\n"
"0 6 6 4,5\n";
tsk_id_t parents[] = { 4, 4, 5, 5, 6, 6, TSK_NULL };
double times[] = { 0.0, 0.0, 2.0, 3.0, 1.0, 4.0, 5.0 };
double t;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t u, v;
uint32_t num_nodes = 7;
tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
tsk_tree_print_state(&tree, _devnull);
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = tsk_tree_get_parent(&tree, u, &v);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(v, parents[u]);
ret = tsk_tree_get_time(&tree, u, &t);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(t, times[u]);
}
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, 0);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_iter_depths(void)
{
int ret = 0;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n"
"0 3 0\n";
const char *edges = "0 6 4 0,1\n"
"0 6 5 2,3\n"
"0 6 6 4,5\n";
int depths[] = { 2, 2, 2, 2, 1, 1, 0 };
int depth;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t u;
uint32_t num_nodes = 7;
tsk_treeseq_from_text(&ts, 6, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), num_nodes);
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = tsk_tree_get_depth(&tree, u, &depth);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(depth, depths[u]);
}
ret = tsk_tree_get_depth(&tree, (tsk_id_t) num_nodes + 1, &depth);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_depth(&tree, TSK_NULL, &depth);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL(ret, 0);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_simplify(void)
{
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_simplify(&ts);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 3);
CU_ASSERT_EQUAL(tables.edges.num_rows, 2);
/* Zero samples gives us the empty table collection */
ret = tsk_table_collection_simplify(&tables, samples, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 0);
CU_ASSERT_EQUAL(tables.edges.num_rows, 0);
/* Make sure we detect unsorted edges */
ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
unsort_edges(&tables.edges, 0);
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGES_NOT_SORTED_CHILD);
/* detect bad parents */
ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.edges.parent[0] = -1;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_PARENT);
/* detect bad children */
ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.edges.child[0] = -1;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NULL_CHILD);
/* detect loops */
ret = tsk_treeseq_copy_tables(&ts, &tables, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.edges.child[0] = tables.edges.parent[0];
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_NODE_TIME_ORDERING);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_single_tree_simplify_debug(void)
{
tsk_treeseq_t ts, simplified;
tsk_id_t samples[] = { 0, 1 };
int ret;
FILE *tmp = fopen(_tmp_file_name, "w");
CU_ASSERT_FATAL(tmp != NULL);
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
tsk_set_debug_stream(tmp);
ret = tsk_treeseq_simplify(&ts, samples, 2, TSK_DEBUG, &simplified, NULL);
tsk_set_debug_stream(stdout);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(ftell(tmp) > 0);
fclose(tmp);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&simplified);
}
static void
test_single_tree_simplify_keep_input_roots(void)
{
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t samples[] = { 0, 1 };
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
verify_simplify(&ts);
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_simplify(
&tables, samples, 2, TSK_SIMPLIFY_KEEP_INPUT_ROOTS, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables.nodes.num_rows, 4);
CU_ASSERT_EQUAL(tables.edges.num_rows, 3);
CU_ASSERT_EQUAL(tables.sites.num_rows, 3);
CU_ASSERT_EQUAL(tables.mutations.num_rows, 4);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_single_tree_simplify_no_sample_nodes(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t1, t2;
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_copy_tables(&ts, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* We zero out the sample column in t1, and run simplify. We should
* get back the same table */
tsk_memset(t1.nodes.flags, 0, sizeof(*t1.nodes.flags) * t1.nodes.num_rows);
ret = tsk_table_collection_simplify(&t1, samples, 4, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_simplify_null_samples(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t t1, t2;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_copy_tables(&ts, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_simplify(&t1, NULL, 0, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_compute_mutation_parents(void)
{
int ret = 0;
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n";
const char *mutations = "0 0 1 -1\n"
"1 1 1 -1\n"
"2 4 1 -1\n"
"2 1 0 2 \n"
"2 1 1 3 \n"
"2 2 1 -1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
parse_mutations(mutations, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 3);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 6);
tables.sequence_length = 1.0;
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check to make sure we have legal mutations */
ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
/* Compute the mutation parents */
verify_compute_mutation_parents(&ts);
tsk_treeseq_free(&ts);
/* Bad site reference */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;
/* Bad site reference */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;
/* mutation sites out of order */
tables.mutations.site[0] = 2;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
tables.mutations.site[0] = 0;
/* sites out of order */
tables.sites.position[0] = 0.11;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);
tables.sites.position[0] = 0;
/* Bad node reference */
tables.mutations.node[0] = -1;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.mutations.node[0] = 0;
/* Bad node reference */
tables.mutations.node[0] = (tsk_id_t) tables.nodes.num_rows;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.mutations.node[0] = 0;
/* Mutations not ordered by tree */
tables.mutations.node[2] = 1;
tables.mutations.node[3] = 4;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
tables.mutations.node[2] = 4;
tables.mutations.node[3] = 1;
/* Need to reset the parent field here */
tsk_memset(
tables.mutations.parent, 0xff, tables.mutations.num_rows * sizeof(tsk_id_t));
/* Mutations not ordered by site */
tables.mutations.site[3] = 1;
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
tables.mutations.site[3] = 2;
/* Check to make sure we still have legal mutations */
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 6);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_single_tree_compute_mutation_times(void)
{
int ret = 0;
const char *sites = "0 0\n"
"0.1 0\n"
"0.2 0\n"
"0.3 0\n";
const char *mutations = "0 0 1 -1 3\n"
"1 1 1 -1 3\n"
"2 4 1 -1 8\n"
"2 1 0 2 4\n"
"2 2 1 -1 4\n"
"2 1 1 3 2\n"
"3 6 1 -1 10\n";
/* 6 */
/* 6 */
/* / \ */
/* / \ */
/* 2 \ */
/* / 5 */
/* 4 / \ */
/* 0 1,3,4 5 \ */
/* 0 1 2 3 */
tsk_treeseq_t ts;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
tables.nodes.time[4] = 6;
tables.nodes.time[5] = 8;
tables.nodes.time[6] = 10;
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
parse_mutations(mutations, &tables.mutations);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 4);
CU_ASSERT_EQUAL_FATAL(tables.mutations.num_rows, 7);
tables.sequence_length = 1.0;
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
/* Check to make sure we have legal mutations */
ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);
/* Compute the mutation times */
verify_compute_mutation_times(&ts);
/* Verify consistency of individuals */
verify_individual_nodes(&ts);
tsk_treeseq_free(&ts);
/* Bad random param */
ret = tsk_table_collection_compute_mutation_times(&tables, (double *) 1, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Bad site reference */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;
/* Bad site reference */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;
/* mutation sites out of order */
tables.mutations.site[0] = 2;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
tables.mutations.site[0] = 0;
/* sites out of order */
tables.sites.position[0] = 0.11;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);
tables.sites.position[0] = 0;
/* Bad node reference */
tables.mutations.node[0] = -1;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.mutations.node[0] = 0;
/* Bad node reference */
tables.mutations.node[0] = (tsk_id_t) tables.nodes.num_rows;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tables.mutations.node[0] = 0;
/* Mutations not ordered by site */
tables.mutations.site[2] = 0;
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_MUTATIONS);
tables.mutations.site[2] = 2;
ret = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 7);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_single_tree_mutation_edges(void)
{
int ret = 0;
tsk_size_t i, j, k;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_mutation_t mut;
tsk_site_t site;
tsk_id_t mutation_edges[] = { 2, 4, 0, 0, 1, 2, 3 };
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
for (j = 0; j < 7; j++) {
ret = tsk_treeseq_get_mutation(&ts, (tsk_id_t) j, &mut);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(mut.edge, mutation_edges[j]);
}
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL(ret, TSK_TREE_OK);
i = 0;
for (j = 0; j < tree.sites_length; j++) {
site = tree.sites[j];
for (k = 0; k < site.mutations_length; k++) {
CU_ASSERT_EQUAL(site.mutations[k].edge, mutation_edges[i]);
i++;
}
}
CU_ASSERT_EQUAL(i, 7);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_is_descendant(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 0, 4));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 1, 4));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 0, 6));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 1, 6));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 4, 6));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 2, 5));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 3, 5));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 2, 6));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 3, 6));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 5, 6));
/* Nodes are descendents of themselves. */
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 0, 0));
CU_ASSERT_TRUE(tsk_tree_is_descendant(&tree, 1, 1));
CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 0, 1));
CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 0, 2));
CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 0, 5));
/* Out of bounds nodes always return false.*/
CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, -1, 5));
CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, 100, 5));
CU_ASSERT_FALSE(tsk_tree_is_descendant(&tree, -1, -1));
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_total_branch_length(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t tree;
double length;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, TSK_NULL, &length), 0);
CU_ASSERT_EQUAL_FATAL(length, 9);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 7, &length), 0);
CU_ASSERT_EQUAL_FATAL(length, 9);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_get_total_branch_length(&tree, tree.virtual_root, &length), 0);
CU_ASSERT_EQUAL_FATAL(length, 9);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 4, &length), 0);
CU_ASSERT_EQUAL_FATAL(length, 2);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 0, &length), 0);
CU_ASSERT_EQUAL_FATAL(length, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, 5, &length), 0);
CU_ASSERT_EQUAL_FATAL(length, 4);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_total_branch_length(&tree, -2, &length),
TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_get_total_branch_length(&tree, 8, &length), TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_num_lineages(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_size_t num_lineages;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 0, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 4);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, -1, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 1, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 3);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 2, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 2);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 2.999, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 2);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 3, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_num_lineages(&tree, 300, &num_lineages), 0);
CU_ASSERT_EQUAL_FATAL(num_lineages, 0);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_num_lineages(&tree, INFINITY, &num_lineages), TSK_ERR_TIME_NONFINITE);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_num_lineages(&tree, NAN, &num_lineages), TSK_ERR_TIME_NONFINITE);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_map_mutations(void)
{
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 1, 1, 1 };
int ret = 0;
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state, j;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 1);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 0);
free(transitions);
genotypes[0] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 1);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
genotypes[0] = 0;
genotypes[1] = 0;
genotypes[2] = 0;
genotypes[3] = 0;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 0);
free(transitions);
for (j = 1; j < 64; j++) {
genotypes[0] = j;
genotypes[1] = 0;
genotypes[2] = 0;
genotypes[3] = 0;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 1);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, j);
free(transitions);
}
genotypes[0] = 2;
genotypes[1] = 1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 2);
CU_ASSERT_EQUAL_FATAL(transitions[0].node, 4);
CU_ASSERT_EQUAL_FATAL(transitions[0].parent, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(transitions[0].state, 1);
CU_ASSERT_EQUAL_FATAL(transitions[1].node, 0);
CU_ASSERT_EQUAL_FATAL(transitions[1].parent, 0);
CU_ASSERT_EQUAL_FATAL(transitions[1].state, 2);
free(transitions);
genotypes[0] = 1;
genotypes[1] = 2;
genotypes[2] = 3;
genotypes[3] = 4;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 3);
free(transitions);
ancestral_state = 5;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 4);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 5);
free(transitions);
ancestral_state = -1;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ANCESTRAL_STATE);
ancestral_state = 64;
ret = tsk_tree_map_mutations(&t, genotypes, NULL, TSK_MM_FIXED_ANCESTRAL_STATE,
&ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_ANCESTRAL_STATE);
genotypes[0] = 64;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_GENOTYPE);
genotypes[0] = -2;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_GENOTYPE);
genotypes[0] = -1;
genotypes[1] = -1;
genotypes[2] = -1;
genotypes[3] = -1;
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_GENOTYPES_ALL_MISSING);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_single_tree_map_mutations_internal_samples(void)
{
/* Example derived from test case provoking a segfault */
const char *nodes = "0 0.00000000000000 0\n"
"0 0.00000000000000 0\n"
"1 0.00000000000000 0\n"
"1 0.00000000000000 0\n"
"1 0.00000000000000 0\n"
"0 0.10792116530237 0\n"
"1 1.00674711128465 0\n"
"1 1.24675560985525 0\n"
"0 1.78536352520779 0\n";
const char *edges = "0.00000000 1.00000000 5 0\n"
"0.00000000 1.00000000 5 2\n"
"0.00000000 1.00000000 6 4\n"
"0.00000000 1.00000000 6 5\n"
"0.00000000 1.00000000 7 1\n"
"0.00000000 1.00000000 7 3\n"
"0.00000000 1.00000000 8 6\n"
"0.00000000 1.00000000 8 7\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int32_t genotypes[] = { 0, 2, 2, 1, 0 };
int ret = 0;
tsk_size_t num_transitions;
tsk_state_transition_t *transitions;
int32_t ancestral_state;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 5);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
ret = tsk_tree_map_mutations(
&t, genotypes, NULL, 0, &ancestral_state, &num_transitions, &transitions);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ancestral_state, 0);
CU_ASSERT_EQUAL_FATAL(num_transitions, 4);
free(transitions);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_single_tree_tracked_samples(void)
{
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t samples[] = { 0, 1 };
tsk_size_t n;
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_set_tracked_samples(&tree, 2, samples);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_get_num_tracked_samples(&tree, 5, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, 6, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_next(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
ret = tsk_tree_get_num_tracked_samples(&tree, 4, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
ret = tsk_tree_set_tracked_samples(&tree, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, 0, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 0);
ret = tsk_tree_get_num_tracked_samples(&tree, tree.virtual_root, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&tree);
}
static void
test_single_tree_tree_pos(void)
{
tsk_treeseq_t ts;
tsk_tree_position_t tree_pos;
bool valid;
int ret;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_position_init(&tree_pos, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FATAL(valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FATAL(!valid);
tsk_tree_position_print_state(&tree_pos, _devnull);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 6);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
valid = tsk_tree_position_prev(&tree_pos);
CU_ASSERT_FATAL(valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);
valid = tsk_tree_position_prev(&tree_pos);
CU_ASSERT_FATAL(!valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);
ret = tsk_tree_position_seek_forward(&tree_pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order)
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FATAL(!valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 6);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
ret = tsk_tree_position_seek_backward(&tree_pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);
tsk_tree_position_free(&tree_pos);
tsk_treeseq_free(&ts);
}
/*=======================================================
* Multi tree tests.
*======================================================*/
static void
test_simple_multi_tree(void)
{
// clang-format off
tsk_id_t parents[] = {
6, 5, 8, 5, TSK_NULL, 6, 8, TSK_NULL, TSK_NULL,
6, 5, 4, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,
7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL,
};
// clang-format on
uint32_t num_trees = 3;
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_multi_tree_direction_switching_tree_pos(void)
{
tsk_treeseq_t ts;
tsk_tree_position_t tree_pos;
bool valid;
int ret = 0;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_tree_position_init(&tree_pos, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FATAL(valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 2);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 6);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
valid = tsk_tree_position_prev(&tree_pos);
CU_ASSERT_FATAL(!valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);
valid = tsk_tree_position_prev(&tree_pos);
CU_ASSERT_FATAL(valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, 2);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 7);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 4);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);
valid = tsk_tree_position_next(&tree_pos);
CU_ASSERT_FATAL(!valid);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 11);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
ret = tsk_tree_position_seek_forward(&tree_pos, 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 7);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 11);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
ret = tsk_tree_position_seek_backward(&tree_pos, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 2);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 4);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, -1);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_REVERSE);
ret = tsk_tree_position_seek_forward(&tree_pos, 2);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.index, 2);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.left, 7);
CU_ASSERT_EQUAL_FATAL(tree_pos.interval.right, 10);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.start, 6);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.stop, 11);
CU_ASSERT_EQUAL_FATAL(tree_pos.in.order, ts.tables->indexes.edge_insertion_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.start, 0);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.stop, 5);
CU_ASSERT_EQUAL_FATAL(tree_pos.out.order, ts.tables->indexes.edge_removal_order);
CU_ASSERT_EQUAL_FATAL(tree_pos.direction, TSK_DIR_FORWARD);
tsk_tree_position_free(&tree_pos);
tsk_treeseq_free(&ts);
}
static void
test_unary_multi_tree(void)
{
// clang-format off
tsk_id_t parents[] = {
6, 5, 7, 5, TSK_NULL, 6, 8, 8, TSK_NULL, 5,
6, 5, 4, 4, 5, 6, 8, TSK_NULL, TSK_NULL, 5,
7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL, 5,
};
// clang-format on
tsk_treeseq_t ts;
uint32_t num_trees = 3;
tsk_treeseq_from_text(&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, unary_ex_sites,
unary_ex_mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_internal_sample_multi_tree(void)
{
// clang-format off
tsk_id_t parents[] = {
7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL,
4, 5, 4, 8, 5, 8, TSK_NULL, TSK_NULL, TSK_NULL,
6, 5, 4, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,
};
// clang-format on
tsk_treeseq_t ts;
uint32_t num_trees = 3;
tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,
NULL, internal_sample_ex_sites, internal_sample_ex_mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_internal_sample_simplified_multi_tree(void)
{
int ret;
tsk_treeseq_t ts, simplified;
tsk_id_t samples[] = { 2, 3, 5 };
tsk_id_t node_map[9];
tsk_id_t z = TSK_NULL;
// clang-format off
tsk_id_t parents[] = {
/* 0 1 2 3 4 */
3, 3, z, 2, z,
2, 4, 4, z, z,
3, 3, z, 2, z,
};
// clang-format on
uint32_t num_trees = 3;
tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,
NULL, internal_sample_ex_sites, internal_sample_ex_mutations, NULL, NULL, 0);
ret = tsk_treeseq_simplify(&ts, samples, 3, 0, &simplified, node_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(node_map[2], 0);
CU_ASSERT_EQUAL(node_map[3], 1);
CU_ASSERT_EQUAL(node_map[5], 2);
verify_trees(&simplified, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&simplified);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_multi_tree(void)
{
/* We make one mutation for each tree */
// clang-format off
tsk_id_t parents[] = {
8, 8, 8, 8, 10, 10, 9, 10, 9, 12, 12, TSK_NULL, TSK_NULL,
8, 8, 8, 8, 10, 11, 9, 10, 9, 11, 12, 12, TSK_NULL,
};
// clang-format on
tsk_treeseq_t ts;
uint32_t num_trees = 2;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL,
nonbinary_ex_sites, nonbinary_ex_mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_simplify_keep_input_roots_multi_tree(void)
{
/*
0.25┊ 8 ┊ ┊ ┊
┊ ┏━┻━┓ ┊ ┊ ┊
0.20┊ ┃ ┃ ┊ ┊ 7 ┊
┊ ┃ ┃ ┊ ┊ ┏━┻━┓ ┊
0.17┊ 6 ┃ ┊ 6 ┊ ┃ ┃ ┊
┊ ┏━┻┓ ┃ ┊ ┏━┻━┓ ┊ ┃ ┃ ┊
0.09┊ ┃ 5 ┃ ┊ ┃ 5 ┊ ┃ 5 ┊
┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓ ┊ ┃ ┏━┻┓ ┊
0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┊ ┃ ┃ 4 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊
0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
0.00 2.00 7.00 10.00
Simplifies to
0.25┊ 4 ┊ ┊ ┊
┊ ┃ ┊ ┊ ┊
0.20┊ ┃ ┊ ┊ 3 ┊
┊ ┃ ┊ ┊ ┏┻┓ ┊
0.17┊ 2 ┊ 2 ┊ ┃ ┃ ┊
┊ ┏┻┓ ┊ ┏┻┓ ┊ ┃ ┃ ┊
0.00┊ 0 1 ┊ 0 1 ┊ 0 1 ┊
0.00 2.00 7.00 10.00
*/
int ret = 0;
// clang-format off
tsk_id_t parents[] = {
2, 2, 4, -1, -1,
2, 2, -1, -1, -1,
3, 3, -1, -1, -1,
};
// clang-format on
uint32_t num_trees = 3;
tsk_id_t samples[] = { 0, 3 };
tsk_treeseq_t ts, simplified;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_simplify(
&ts, samples, 2, TSK_SIMPLIFY_KEEP_INPUT_ROOTS, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_trees(&simplified, num_trees, parents);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&simplified);
}
static void
test_left_to_right_multi_tree(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 0.090 0\n"
"0 0.170 0\n"
"0 0.253 0\n"
"0 0.071 0\n"
"0 0.202 0\n";
const char *edges = "2 10 7 2,3\n"
"0 2 4 1\n"
"2 10 4 1\n"
"0 2 4 3\n"
"2 10 4 7\n"
"0 7 5 0,4\n"
"7 10 8 0,4\n"
"0 2 6 2,5\n";
const char *sites = "1 0\n"
"4.5 0\n"
"8.5 0\n";
const char *mutations = "0 2 1\n"
"1 0 1\n"
"2 4 1\n";
// clang-format off
tsk_id_t parents[] = {
5, 4, 6, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,
5, 4, 7, 7, 5, TSK_NULL, TSK_NULL, 4, TSK_NULL,
8, 4, 7, 7, 8, TSK_NULL, TSK_NULL, 4, TSK_NULL,
};
// clang-format on
tsk_treeseq_t ts;
uint32_t num_trees = 3;
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_tree_next_prev(&ts);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_gappy_multi_tree(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 0.090 0\n"
"0 0.170 0\n"
"0 0.253 0\n"
"0 0.071 0\n"
"0 0.202 0\n";
const char *edges = "2 7 7 2\n"
"8 10 7 2\n"
"2 7 7 3\n"
"8 10 7 3\n"
"1 2 4 1\n"
"2 7 4 1\n"
"8 10 4 1\n"
"1 2 4 3\n"
"2 7 4 7\n"
"8 10 4 7\n"
"1 7 5 0,4\n"
"8 10 8 0,4\n"
"1 2 6 2,5\n";
tsk_id_t z = TSK_NULL;
// clang-format off
tsk_id_t parents[] = {
z, z, z, z, z, z, z, z, z,
5, 4, 6, 4, 5, 6, z, z, z,
5, 4, 7, 7, 5, z, z, 4, z,
z, z, z, z, z, z, z, z, z,
8, 4, 7, 7, 8, z, z, 4, z,
z, z, z, z, z, z, z, z, z,
};
// clang-format on
tsk_treeseq_t ts;
uint32_t num_trees = 6;
tsk_treeseq_from_text(&ts, 12, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_trees(&ts, num_trees, parents);
verify_tree_next_prev(&ts);
verify_edge_array_trees(&ts);
tsk_treeseq_free(&ts);
}
static void
test_tsk_treeseq_bad_records(void)
{
int ret = 0;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
uint32_t num_trees = 3;
// clang-format off
tsk_id_t parents[] = {
6, 5, 8, 5, TSK_NULL, 6, 8, TSK_NULL, TSK_NULL,
6, 5, 4, 4, 5, 6, TSK_NULL, TSK_NULL, TSK_NULL,
7, 5, 4, 4, 5, 7, TSK_NULL, TSK_NULL, TSK_NULL,
};
// clang-format on
tsk_flags_t load_flags = TSK_TS_INIT_BUILD_INDEXES;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
parse_nodes(paper_ex_nodes, &tables.nodes);
parse_edges(paper_ex_edges, &tables.edges);
parse_individuals(paper_ex_individuals, &tables.individuals);
/* Make sure we have a good set of records */
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ts.num_trees, 3);
verify_trees(&ts, num_trees, parents);
tsk_treeseq_free(&ts);
/* Left value greater than right */
tables.edges.left[0] = 10.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_EDGE_INTERVAL);
tsk_treeseq_free(&ts);
tables.edges.left[0] = 2.0;
ret = tsk_treeseq_init(&ts, &tables, load_flags);
CU_ASSERT_EQUAL(ret, 0);
verify_trees(&ts, num_trees, parents);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_convenience_arrays_multi_tree(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_treeseq_from_text(
&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);
verify_edge_array_trees(&ts);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_tree_next(&t));
CU_ASSERT_EQUAL(t.num_children[8], 2);
CU_ASSERT_TRUE(tsk_tree_next(&t));
CU_ASSERT_EQUAL(t.num_children[8], 1);
CU_ASSERT_TRUE(tsk_tree_next(&t));
CU_ASSERT_EQUAL(t.num_children[8], 0);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_multiroot_mrca(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t tree;
tsk_id_t mrca;
tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&tree, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&tree);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 0, 0, &mrca), 0);
CU_ASSERT_EQUAL(mrca, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 0, 1, &mrca), 0);
CU_ASSERT_EQUAL(mrca, 10);
/* MRCA of two nodes in different subtrees is TSK_NULL */
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 0, 2, &mrca), 0);
CU_ASSERT_EQUAL(mrca, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&tree, 2, 0, &mrca), 0);
CU_ASSERT_EQUAL(mrca, TSK_NULL);
tsk_tree_free(&tree);
tsk_treeseq_free(&ts);
}
/*=======================================================
* Sample sets
*======================================================*/
static void
test_simple_sample_sets(void)
{
// clang-format off
sample_count_test_t tests[] = {
{0, 0, 1}, {0, 5, 2}, {0, 6, 3},
{1, 4, 2}, {1, 5, 3}, {1, 6, 4}};
// clang-format on
uint32_t num_tests = 6;
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,
paper_ex_individuals, NULL, 0);
verify_sample_counts(&ts, num_tests, tests, 0);
verify_sample_counts(&ts, num_tests, tests, TSK_SEEK_SKIP);
verify_sample_sets(&ts);
tsk_treeseq_free(&ts);
}
static void
test_nonbinary_sample_sets(void)
{
// clang-format off
sample_count_test_t tests[] = {
{0, 0, 1}, {0, 8, 4}, {0, 9, 5}, {0, 10, 3}, {0, 12, 8},
{1, 5, 1}, {1, 8, 4}, {1, 9, 5}, {0, 10, 2}, {0, 11, 1}};
// clang-format on
uint32_t num_tests = 8;
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 100, nonbinary_ex_nodes, nonbinary_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
verify_sample_counts(&ts, num_tests, tests, 0);
verify_sample_counts(&ts, num_tests, tests, TSK_SEEK_SKIP);
verify_sample_sets(&ts);
tsk_treeseq_free(&ts);
}
static void
test_internal_sample_sample_sets(void)
{
// clang-format off
sample_count_test_t tests[] = {
{0, 0, 1}, {0, 5, 4}, {0, 4, 2}, {0, 7, 5},
{1, 4, 2}, {1, 5, 4}, {1, 8, 5},
{2, 5, 4}, {2, 6, 5}};
// clang-format on
uint32_t num_tests = 9;
tsk_treeseq_t ts;
tsk_treeseq_from_text(&ts, 10, internal_sample_ex_nodes, internal_sample_ex_edges,
NULL, NULL, NULL, NULL, NULL, 0);
verify_sample_counts(&ts, num_tests, tests, 0);
verify_sample_counts(&ts, num_tests, tests, TSK_SEEK_SKIP);
verify_sample_sets(&ts);
tsk_treeseq_free(&ts);
}
static void
test_non_sample_leaf_sample_lists(void)
{
const char *nodes = "1 0 0\n"
"0 0 0\n"
"1 2 0\n";
const char *edges = "0 1 2 0,1\n";
const tsk_id_t left_sample[3] = { 0, -1, 1 };
const tsk_id_t right_sample[3] = { 0, -1, 0 };
const tsk_id_t next_sample[2] = { -1, 0 };
const tsk_id_t samples[2] = { 0, 2 };
const tsk_id_t sample_index_map[3] = { 0, -1, 1 };
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_id_t i;
int ret;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
for (i = 0; i < 3; i++) {
CU_ASSERT_EQUAL_FATAL(left_sample[i], t.left_sample[i]);
CU_ASSERT_EQUAL_FATAL(right_sample[i], t.right_sample[i]);
CU_ASSERT_EQUAL_FATAL(sample_index_map[i], ts.sample_index_map[i]);
}
for (i = 0; i < 2; i++) {
CU_ASSERT_EQUAL_FATAL(next_sample[i], t.next_sample[i]);
CU_ASSERT_EQUAL_FATAL(samples[i], t.samples[i]);
}
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_virtual_root_properties(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
int depth;
double time, length;
tsk_id_t node;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_depth(&t, t.virtual_root, &depth), 0)
CU_ASSERT_EQUAL_FATAL(depth, -1);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_time(&t, t.virtual_root, &time), 0)
/* Workaround problems in IEEE floating point macros. We may want to
* add tsk_isinf (like tsk_isnan) at some point, but not worth it just
* for this test case */
CU_ASSERT_TRUE(isinf((float) time));
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&t, t.virtual_root, 0, &node), 0)
CU_ASSERT_EQUAL(node, t.virtual_root);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_mrca(&t, 0, t.virtual_root, &node), 0)
CU_ASSERT_EQUAL(node, t.virtual_root);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, t.virtual_root, &node), 0)
CU_ASSERT_EQUAL(node, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_branch_length(&t, t.virtual_root, &length), 0)
CU_ASSERT_EQUAL(length, 0);
/* The definition of "descendant" is that node v is on the path from
* u to a root. Since there is no parent link from roots to the
* virtual_root, it's consistent with this definition to return false
* for every node. */
CU_ASSERT_FALSE(tsk_tree_is_descendant(&t, 0, t.virtual_root));
CU_ASSERT_FALSE(
tsk_tree_is_descendant(&t, t.left_child[t.virtual_root], t.virtual_root));
CU_ASSERT_FALSE(tsk_tree_is_descendant(&t, t.virtual_root, 0));
/* The virtual_root *is* a descendent of itself, though. This is
* consistent with other nodes that are not "in" the tree being
* descendents of themselves, despite not being roots in the tree. */
CU_ASSERT_TRUE(tsk_tree_is_descendant(&t, t.virtual_root, t.virtual_root));
CU_ASSERT_FALSE(tsk_tree_is_sample(&t, t.virtual_root));
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 1);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_no_sample_count_semantics(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_id_t nodes;
tsk_size_t n;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, TSK_NO_SAMPLE_COUNTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(tsk_tree_get_num_roots(&t), 0);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_preorder(&t, &nodes, &n), TSK_ERR_UNSUPPORTED_OPERATION);
CU_ASSERT_EQUAL(tsk_tree_postorder(&t, &nodes, &n), TSK_ERR_UNSUPPORTED_OPERATION);
CU_ASSERT_EQUAL(tsk_tree_preorder_samples_from(&t, -1, &nodes, &n),
TSK_ERR_UNSUPPORTED_OPERATION);
CU_ASSERT_EQUAL(tsk_tree_preorder_from(&t, t.virtual_root, &nodes, &n),
TSK_ERR_UNSUPPORTED_OPERATION);
CU_ASSERT_EQUAL(tsk_tree_postorder_from(&t, t.virtual_root, &nodes, &n),
TSK_ERR_UNSUPPORTED_OPERATION);
CU_ASSERT_EQUAL(tsk_tree_preorder_samples_from(&t, t.virtual_root, &nodes, &n),
TSK_ERR_UNSUPPORTED_OPERATION);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
/*=======================================================
* Tree traversals
*=======================================================*/
static void
verify_node_lists(tsk_size_t n, tsk_id_t *l1, tsk_id_t *l2)
{
tsk_size_t j;
for (j = 0; j < n; j++) {
/* printf("%d %d\n", l1[j], l2[j]); */
CU_ASSERT_EQUAL(l1[j], l2[j]);
}
}
static void
test_single_tree_traversal(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t num_nodes = 7;
tsk_id_t preorder[] = { 6, 4, 0, 1, 5, 2, 3 };
tsk_id_t preorder_vr[] = { 7, 6, 4, 0, 1, 5, 2, 3 };
tsk_id_t preorder_samples[] = { 0, 1, 2, 3 };
tsk_id_t postorder[] = { 0, 1, 4, 2, 3, 5, 6 };
tsk_id_t postorder_vr[] = { 0, 1, 4, 2, 3, 5, 6, 7 };
tsk_id_t nodes[num_nodes + 1];
tsk_size_t n;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_preorder(&t, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, num_nodes);
verify_node_lists(n, nodes, preorder);
ret = tsk_tree_preorder_from(&t, -1, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, num_nodes);
verify_node_lists(n, nodes, preorder);
ret = tsk_tree_preorder_from(&t, t.virtual_root, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, num_nodes + 1);
verify_node_lists(n, nodes, preorder_vr);
ret = tsk_tree_preorder_samples_from(&t, -1, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 4);
verify_node_lists(n, nodes, preorder_samples);
ret = tsk_tree_preorder_samples_from(&t, t.virtual_root, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 4);
verify_node_lists(n, nodes, preorder_samples);
ret = tsk_tree_preorder_from(&t, 5, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 3);
verify_node_lists(n, nodes, preorder + 4);
ret = tsk_tree_preorder_samples_from(&t, 5, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
verify_node_lists(n, nodes, preorder_samples + 2);
ret = tsk_tree_postorder(&t, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, num_nodes);
verify_node_lists(n, nodes, postorder);
ret = tsk_tree_postorder_from(&t, -1, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, num_nodes);
verify_node_lists(n, nodes, postorder);
ret = tsk_tree_postorder_from(&t, t.virtual_root, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, num_nodes + 1);
verify_node_lists(n, nodes, postorder_vr);
ret = tsk_tree_postorder_from(&t, 4, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 3);
verify_node_lists(n, nodes, postorder);
/* Check errors */
ret = tsk_tree_preorder_from(&t, -2, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_preorder_from(&t, 8, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_preorder_samples_from(&t, -2, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_preorder_samples_from(&t, 8, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_postorder_from(&t, -2, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_postorder_from(&t, 8, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
/* printed out in tree order.
0.90┊ ┊ 11 ┊ ┊
┊ ┊ ┏┻┓ ┊ ┊
0.80┊ 10 ┊ ┃ ┃ ┊ ┊
┊ ┏┻┓ ┊ ┃ ┃ ┊ ┊
0.40┊ 9 ┃ ┃ ┊ 9 ┃ ┃ ┊ 9 ┊
┊ ┏━┻┓ ┃ ┃ ┊ ┏━┻━┓ ┃ ┃ ┊ ┏━┻━━┓ ┊
0.30┊ ┃ ┃ ┃ ┃ ┊ ┃ 8 ┃ ┃ ┊ ┃ 8 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┃ ┊ ┃ ┏┻┓ ┊
0.20┊ ┃ 7 ┃ ┃ ┊ 7 ┃ ┃ ┃ ┃ ┊ 7 ┃ ┃ ┊
┊ ┃ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┃ ┃ ┊ ┏━┻┓ ┃ ┃ ┊
0.10┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃ 6 ┃ ┃ ┊
┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┃ ┊
0.00┊ 5 2 3 4 0 1 ┊ 3 4 1 2 0 5 ┊ 4 0 3 1 2 5 ┊
0 4 8 10
*/
static void
test_multiroot_tree_traversal(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_id_t preorder[] = { 5, 9, 2, 7, 3, 4, 10, 0, 1 };
tsk_id_t preorder_vr[] = { 12, 5, 9, 2, 7, 3, 4, 10, 0, 1 };
tsk_id_t preorder_samples[] = { 5, 2, 3, 4, 0, 1 };
tsk_id_t postorder[] = { 5, 2, 3, 4, 7, 9, 0, 1, 10 };
tsk_id_t postorder_vr[] = { 5, 2, 3, 4, 7, 9, 0, 1, 10, 12 };
tsk_id_t nodes[13];
tsk_size_t n;
tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_preorder(&t, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 9);
verify_node_lists(n, nodes, preorder);
ret = tsk_tree_preorder_from(&t, -1, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 9);
verify_node_lists(n, nodes, preorder);
ret = tsk_tree_preorder_from(&t, t.virtual_root, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 10);
verify_node_lists(n, nodes, preorder_vr);
ret = tsk_tree_preorder_from(&t, 10, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 3);
verify_node_lists(n, nodes, preorder + 6);
ret = tsk_tree_preorder_samples_from(&t, -1, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 6);
verify_node_lists(n, nodes, preorder_samples);
ret = tsk_tree_preorder_samples_from(&t, t.virtual_root, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 6);
verify_node_lists(n, nodes, preorder_samples);
ret = tsk_tree_preorder_samples_from(&t, 5, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
verify_node_lists(n, nodes, preorder_samples);
ret = tsk_tree_preorder_samples_from(&t, 10, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 2);
verify_node_lists(n, nodes, preorder_samples + 4);
ret = tsk_tree_postorder(&t, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 9);
verify_node_lists(n, nodes, postorder);
ret = tsk_tree_postorder_from(&t, -1, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 9);
verify_node_lists(n, nodes, postorder);
ret = tsk_tree_postorder_from(&t, t.virtual_root, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 10);
verify_node_lists(n, nodes, postorder_vr);
ret = tsk_tree_postorder_from(&t, 10, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 3);
verify_node_lists(n, nodes, postorder + 6);
/* Nodes that aren't "in" the tree have singleton traversal lists and
* connect to no samples */
ret = tsk_tree_preorder_from(&t, 11, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
CU_ASSERT_EQUAL_FATAL(nodes[0], 11);
ret = tsk_tree_postorder_from(&t, 11, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 1);
CU_ASSERT_EQUAL_FATAL(nodes[0], 11);
ret = tsk_tree_preorder_samples_from(&t, 11, nodes, &n);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(n, 0);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
verify_seek_multi_tree(tsk_flags_t seek_options)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
double breakpoints[] = { 0, 2, 7, 10 };
tsk_id_t num_trees = 3;
tsk_id_t j, k;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,
paper_ex_individuals, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_trees; j++) {
ret = tsk_tree_seek(&t, breakpoints[j], seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, j);
ret = tsk_tree_seek_index(&t, j, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, j);
for (k = 0; k < num_trees; k++) {
ret = tsk_tree_seek(&t, breakpoints[k], seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, k);
ret = tsk_tree_seek_index(&t, k, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, k);
}
}
ret = tsk_tree_seek(&t, 1.99999, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, 0);
ret = tsk_tree_seek(&t, 6.99999, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, 1);
ret = tsk_tree_seek(&t, 9.99999, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, 2);
tsk_tree_free(&t);
/* Seek to all positions from a new tree. */
for (j = 0; j < num_trees; j++) {
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_seek(&t, breakpoints[j], seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, j);
tsk_tree_free(&t);
}
/* Seek to all positions from a non-new tree in the null state*/
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_trees; j++) {
ret = tsk_tree_seek(&t, 0, seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_prev(&t);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, -1);
ret = tsk_tree_seek(&t, breakpoints[j], seek_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(t.index, j);
}
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
static void
test_seek_multi_tree(void)
{
verify_seek_multi_tree(0);
verify_seek_multi_tree(TSK_SEEK_SKIP);
}
static void
test_seek_errors(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,
paper_ex_individuals, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_seek(&t, -1, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);
ret = tsk_tree_seek(&t, 10, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);
ret = tsk_tree_seek(&t, 11, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);
ret = tsk_tree_seek_index(&t, (tsk_id_t) ts.num_trees, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);
ret = tsk_tree_seek_index(&t, -1, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEEK_OUT_OF_BOUNDS);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
}
/*=======================================================
* KC Distance tests.
*=======================================================*/
static void
test_isolated_node_kc(void)
{
const char *single_leaf = "1 0 0";
const char *single_internal = "0 0 0";
const char *edges = "";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, single_leaf, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
tsk_treeseq_from_text(
&ts, 1, single_internal, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_single_tree_kc(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t, other_t;
double result = 0;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 1, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_init(&other_t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&other_t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
check_trees_identical(&t, &other_t);
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_tree_kc_distance(&t, &other_t, 1, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
tsk_tree_free(&other_t);
}
static void
test_two_trees_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 2 0\n"
"0 3 0\n";
const char *nodes_other = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 4 0\n"
"0 6 0\n";
const char *edges = "0 1 3 0,1\n"
"0 1 4 2,3\n";
int ret;
tsk_treeseq_t ts, other_ts;
tsk_tree_t t, other_t;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
tsk_treeseq_from_text(
&other_ts, 1, nodes_other, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &other_ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_treeseq_kc_distance(&ts, &other_ts, 1, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 4.243, 1e-2);
ret = tsk_tree_init(&other_t, &other_ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&other_t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_tree_kc_distance(&t, &other_t, 1, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, 4.243, 1e-2);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&other_ts);
tsk_tree_free(&t);
tsk_tree_free(&other_t);
}
static void
test_empty_tree_kc(void)
{
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_tree_t t;
tsk_id_t v;
int ret;
double result = 0;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);
tsk_treeseq_free(&ts);
tables.sequence_length = NAN;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);
tsk_treeseq_free(&ts);
tables.sequence_length = INFINITY;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);
tsk_treeseq_free(&ts);
tables.sequence_length = 1.0;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_empty_tree_sequence(&ts, 1.0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(t.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(t.parent[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.left_child[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.right_child[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.left_sib[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.right_sib[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MULTIPLE_ROOTS);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_nonbinary_tree_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 4 0,1,2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(result, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_nonzero_samples_kc(void)
{
const char *nodes = "0 0 0\n" /* unused node at the start */
"1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 3 1,2\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_internal_samples_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
/* Permitted in tree sequences */
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0.0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_non_sample_leaf_kc(void)
{
const char *nodes = "1 0 0\n"
"0 0 0\n"
"0 1 0\n";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0.0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0.0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_unequal_sample_size_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 2 0\n"
"0 3 0\n";
const char *nodes_other = "1 0 0\n"
"1 0 0\n"
"0 1 0\n";
const char *edges = "0 1 3 0,1\n"
"0 1 4 2,3\n";
const char *edges_other = "0 1 2 0,1\n";
int ret;
tsk_treeseq_t ts, other_ts;
tsk_tree_t t, other_t;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
tsk_treeseq_from_text(
&other_ts, 1, nodes_other, edges_other, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &other_ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLE_SIZE_MISMATCH);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_init(&other_t, &other_ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&other_t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLE_SIZE_MISMATCH);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&other_ts);
tsk_tree_free(&t);
tsk_tree_free(&other_t);
}
static void
test_unequal_samples_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 2 0\n"
"0 3 0\n";
const char *nodes_other = "0 0 0\n" /* Unused node at the start */
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 2 0\n"
"0 3 0\n";
const char *edges = "0 1 3 0,1\n"
"0 1 4 2,3\n";
const char *edges_other = "0 1 4 1,2\n"
"0 1 5 3,4\n";
int ret;
tsk_treeseq_t ts, other_ts;
tsk_tree_t t, other_t;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
tsk_treeseq_from_text(
&other_ts, 1, nodes_other, edges_other, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &other_ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLES_NOT_EQUAL);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_init(&other_t, &other_ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&other_t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &other_t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SAMPLES_NOT_EQUAL);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&other_ts);
tsk_tree_free(&t);
tsk_tree_free(&other_t);
}
static void
test_unary_nodes_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0";
const char *edges = "0 1 2 0,1\n"
"0 1 3 2";
tsk_treeseq_t ts;
tsk_tree_t t;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_no_sample_lists_kc(void)
{
tsk_treeseq_t ts;
tsk_tree_t t;
int ret = 0;
double result = 0;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_kc_distance(&t, &t, 9, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NO_SAMPLE_LISTS);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_unequal_sequence_lengths_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 2 0\n"
"0 3 0\n";
const char *edges_1 = "0 1 3 0,1\n"
"0 1 4 2,3\n";
const char *edges_2 = "0 2 3 0,1\n"
"0 2 4 2,3\n";
tsk_treeseq_t ts, other;
int ret;
double result = 0;
tsk_treeseq_from_text(&ts, 1, nodes, edges_1, NULL, NULL, NULL, NULL, NULL, 0);
tsk_treeseq_from_text(&other, 2, nodes, edges_2, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &other, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SEQUENCE_LENGTH_MISMATCH);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&other);
}
static void
test_different_number_trees_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n"
"0 3 0\n"
"0 4 0\n"
"0 5 0\n";
const char *edges = "0 10 5 0,1\n"
"0 10 6 3,4\n"
"5 10 7 2,5\n"
"0 5 8 2\n"
"0 10 8 6\n"
"5 10 8 7\n"
"0 5 9 5,8\n";
const char *other_nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 2 0\n"
"0 3 0\n"
"0 4 0\n";
const char *other_edges = "0 10 5 0,1\n"
"0 10 6 2,3\n"
"0 10 7 4,5\n"
"0 10 8 6,7\n";
tsk_treeseq_t ts, other;
double result, expected;
int ret = 0;
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
tsk_treeseq_from_text(
&other, 10, other_nodes, other_edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_treeseq_kc_distance(&ts, &other, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
expected = (sqrt(8.0) * 5.0 + sqrt(6.0) * 5.0) / 10.0;
CU_ASSERT_DOUBLE_EQUAL_FATAL(result, expected, 1e-2);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&other);
}
static void
test_offset_trees_with_errors_kc(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 2 0\n"
"0 3 0\n"
"0 4 0\n";
const char *edges = "0 10 4 0,1\n"
"0 10 5 2,3\n"
"0 10 6 4,5\n";
tsk_treeseq_t ts, other;
double result;
int ret = 0;
tsk_treeseq_from_text(
&ts, 10, unary_ex_nodes, unary_ex_edges, NULL, NULL, NULL, NULL, NULL, 0);
tsk_treeseq_from_text(&other, 10, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 10);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&other), 10);
ret = tsk_treeseq_kc_distance(&ts, &other, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);
ret = tsk_treeseq_kc_distance(&other, &ts, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNARY_NODES);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&other);
}
/*=======================================================
* Miscellaneous tests.
*======================================================*/
static void
test_genealogical_nearest_neighbours_errors(void)
{
int ret;
tsk_treeseq_t ts;
const tsk_id_t *reference_sets[2];
tsk_id_t reference_set_0[4], reference_set_1[4];
tsk_id_t focal[] = { 0, 1, 2, 3 };
tsk_size_t reference_set_size[2];
tsk_size_t num_focal = 4;
double *A = tsk_malloc(2 * num_focal * sizeof(double));
CU_ASSERT_FATAL(A != NULL);
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 0, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, INT16_MAX, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
/* Overlapping sample sets */
reference_sets[0] = focal;
reference_set_size[0] = 1;
reference_sets[1] = focal;
reference_set_size[1] = num_focal;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
/* bad values in the sample sets */
reference_set_0[0] = 0;
reference_set_0[1] = 1;
reference_set_1[0] = 2;
reference_set_1[1] = 3;
reference_set_size[0] = 2;
reference_set_size[1] = 2;
reference_sets[0] = reference_set_0;
reference_sets[1] = reference_set_1;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, 0);
reference_set_0[0] = -1;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
reference_set_0[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
reference_set_0[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts) + 1;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Duplicate values in the focal sets */
reference_set_0[0] = 1;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
reference_set_0[0] = 3;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
/* Bad sample ID */
reference_sets[0] = focal;
reference_set_size[0] = 1;
reference_sets[1] = focal + 1;
reference_set_size[1] = num_focal - 1;
focal[0] = -1;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
focal[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
focal[0] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts) + 100;
ret = tsk_treeseq_genealogical_nearest_neighbours(
&ts, focal, num_focal, reference_sets, reference_set_size, 2, 0, A);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
free(A);
}
static void
test_single_tree_balance(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
double b1, b2;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
/* Balanced binary tree with 4 leaves */
CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);
CU_ASSERT_EQUAL(sackin, 8);
CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, &colless), 0);
CU_ASSERT_EQUAL(colless, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_DOUBLE_EQUAL(b1, 2, 1e-8);
/* Test different bases for b2_index to high-precision */
CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, &b2), 0);
CU_ASSERT_DOUBLE_EQUAL(b2, 0.6020599913279623, 1e-14);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 2, &b2), 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(b2, 2, 1e-16);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 3, &b2), 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(b2, 1.2618595071429148, 1e-14);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_multiroot_balance(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin;
double b1;
tsk_treeseq_from_text(&ts, 10, multiroot_ex_nodes, multiroot_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
/* 0.80┊ 10 */
/* ┊ ┏┻┓ */
/* 0.40┊ 9 ┃ ┃ */
/* ┊ ┏━┻┓ ┃ ┃ */
/* 0.30┊ ┃ ┃ ┃ ┃ */
/* ┊ ┃ ┃ ┃ ┃ */
/* 0.20┊ ┃ 7 ┃ ┃ */
/* ┊ ┃ ┏┻┓ ┃ ┃ */
/* 0.10┊ ┃ ┃ ┃ ┃ ┃ */
/* ┊ ┃ ┃ ┃ ┃ ┃ */
/* 0.00┊ 5 2 3 4 0 1 */
CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);
CU_ASSERT_EQUAL(sackin, 7);
CU_ASSERT_EQUAL_FATAL(tsk_tree_colless_index(&t, NULL), TSK_ERR_UNDEFINED_MULTIROOT);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_DOUBLE_EQUAL(b1, 1.0, 1e-8);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, NULL), TSK_ERR_UNDEFINED_MULTIROOT);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_nonbinary_balance(void)
{
int ret;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 4 0,1,2,3\n";
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
double b1, b2;
tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
/* Star tree with 4 leaves */
CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);
CU_ASSERT_EQUAL(sackin, 4);
CU_ASSERT_EQUAL_FATAL(
tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_NONBINARY);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(b1, 0, 1e-8);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, &b2), 0);
CU_ASSERT_DOUBLE_EQUAL_FATAL(b1, 0, 1e-8);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_empty_tree_balance(void)
{
int ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_tree_t t;
tsk_size_t sackin, colless;
double b1, b2;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1.0;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_sackin_index(&t, &sackin), 0);
CU_ASSERT_EQUAL(sackin, 0);
/* Technically wrong here because we have 0 roots, but not worth worrying about */
CU_ASSERT_EQUAL_FATAL(
tsk_tree_colless_index(&t, &colless), TSK_ERR_UNDEFINED_MULTIROOT);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b1_index(&t, &b1), 0);
CU_ASSERT_EQUAL(b1, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_b2_index(&t, 10, &b2), TSK_ERR_UNDEFINED_MULTIROOT);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_b2_bad_base(void)
{
int ret;
tsk_treeseq_t ts;
tsk_tree_t t;
double result;
double bad_base[] = { -2, -1, 1 };
size_t j;
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
NULL, NULL, NULL, 0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
for (j = 0; j < sizeof(bad_base) / sizeof(*bad_base); j++) {
ret = tsk_tree_b2_index(&t, bad_base[j], &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_isfinite(result));
}
CU_ASSERT_FATAL(j > 0);
/* this one is peculiar, in that base 0 seems to give a finite answer */
ret = tsk_tree_b2_index(&t, 0, &result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(result, 0);
tsk_treeseq_free(&ts);
tsk_tree_free(&t);
}
static void
test_tree_errors(void)
{
int ret;
tsk_size_t j;
tsk_id_t num_nodes = 9;
tsk_id_t u;
tsk_node_t node;
tsk_treeseq_t ts, other_ts;
tsk_tree_t t, other_t;
tsk_id_t bad_nodes[] = { num_nodes + 1, num_nodes + 2, -1 };
tsk_id_t tracked_samples[] = { 0, 0, 0 };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,
paper_ex_individuals, NULL, 0);
ret = tsk_tree_init(&t, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_PARAM_VALUE);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
/* Out-of-bounds queries */
for (j = 0; j < sizeof(bad_nodes) / sizeof(tsk_id_t); j++) {
u = bad_nodes[j];
ret = tsk_tree_get_parent(&t, u, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_time(&t, u, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_branch_length(&t, u, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_mrca(&t, u, 0, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_mrca(&t, 0, u, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_num_samples(&t, u, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_tree_get_num_tracked_samples(&t, u, NULL);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
/* Also check tree sequence methods */
ret = tsk_treeseq_get_node(&ts, (tsk_id_t) u, &node);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
CU_ASSERT(!tsk_treeseq_is_sample(&ts, u));
CU_ASSERT(!tsk_tree_is_sample(&t, u));
}
tracked_samples[0] = 0;
tracked_samples[1] = (tsk_id_t) tsk_treeseq_get_num_samples(&ts);
ret = tsk_tree_set_tracked_samples(&t, 2, tracked_samples);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SAMPLES);
tracked_samples[1] = (tsk_id_t) tsk_treeseq_get_num_nodes(&ts);
ret = tsk_tree_set_tracked_samples(&t, 2, tracked_samples);
CU_ASSERT_EQUAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
tracked_samples[1] = 0;
ret = tsk_tree_set_tracked_samples(&t, 2, tracked_samples);
CU_ASSERT_EQUAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
tsk_treeseq_from_text(&other_ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL,
NULL, paper_ex_individuals, NULL, 0);
ret = tsk_tree_init(&other_t, &other_ts, 0);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE);
tsk_tree_free(&t);
tsk_tree_free(&other_t);
ret = tsk_tree_init(&t, &other_ts, TSK_NO_SAMPLE_COUNTS);
CU_ASSERT_EQUAL(ret, 0);
ret = tsk_tree_copy(&t, &other_t, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);
tsk_tree_free(&other_t);
ret = tsk_tree_copy(&t, &other_t, TSK_SAMPLE_LISTS);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSUPPORTED_OPERATION);
tsk_tree_free(&other_t);
tsk_tree_free(&t);
tsk_treeseq_free(&other_ts);
tsk_treeseq_free(&ts);
}
static void
test_treeseq_row_access_errors(void)
{
int ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_get_individual(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_node(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_edge(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EDGE_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_migration(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_site(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_mutation(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_population(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
ret = tsk_treeseq_get_provenance(&ts, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_treeseq_get_individuals_population_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_id_t output[2];
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, TSK_NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret_id = tsk_treeseq_get_individuals_population(&ts, output);
CU_ASSERT_EQUAL_FATAL(ret_id, TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_treeseq_get_individuals_population(void)
{
int ret;
tsk_id_t ret_id;
int j;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_id_t output[4];
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
for (j = 0; j < 2; j++) {
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, (tsk_id_t) j);
}
for (j = 0; j < 4; j++) {
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, (tsk_id_t) j);
}
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.0, 1, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.0, TSK_NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 4);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_get_individuals_population(&ts, output);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(output[0], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(output[1], 0);
CU_ASSERT_EQUAL_FATAL(output[2], TSK_NULL);
CU_ASSERT_EQUAL_FATAL(output[3], 1);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_treeseq_get_individuals_time_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
double output[2];
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.2, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0.8, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_get_individuals_time(&ts, output);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_INDIVIDUAL_TIME_MISMATCH);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_treeseq_get_individuals_time(void)
{
int ret;
tsk_id_t ret_id;
int j;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
double output[4];
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
for (j = 0; j < 2; j++) {
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
for (j = 0; j < 4; j++) {
ret_id = tsk_individual_table_add_row(
&tables.individuals, 0, NULL, 0, NULL, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.25, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.0, 1, 3, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 3.25, 0, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1.25, 0, 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 4);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_get_individuals_time(&ts, output);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(output[0], 3.25);
CU_ASSERT_EQUAL_FATAL(output[1], 1.25);
CU_ASSERT_FATAL(tsk_is_unknown_time(output[2]));
CU_ASSERT_EQUAL_FATAL(output[3], 3.0);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_tree_copy_flags(void)
{
int iret, ret;
tsk_size_t j;
tsk_treeseq_t ts;
tsk_tree_t t, other_t;
tsk_flags_t options[] = { 0, TSK_NO_SAMPLE_COUNTS, TSK_SAMPLE_LISTS,
TSK_NO_SAMPLE_COUNTS | TSK_SAMPLE_LISTS };
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, NULL, NULL,
paper_ex_individuals, NULL, 0);
for (j = 0; j < sizeof(options) / sizeof(*options); j++) {
ret = tsk_tree_init(&t, &ts, options[j]);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_init(&other_t, &ts, options[j]);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT);
CU_ASSERT_EQUAL_FATAL(ret, 0);
check_trees_identical(&t, &other_t);
tsk_tree_free(&other_t);
while ((iret = tsk_tree_next(&t)) == TSK_TREE_OK) {
ret = tsk_tree_copy(&t, &other_t, options[j]);
CU_ASSERT_EQUAL_FATAL(ret, 0);
check_trees_identical(&t, &other_t);
tsk_tree_free(&other_t);
}
CU_ASSERT_EQUAL_FATAL(iret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_copy(&t, &other_t, options[j]);
CU_ASSERT_EQUAL_FATAL(ret, 0);
while (true) {
CU_ASSERT_EQUAL_FATAL(ret, 0);
check_trees_identical(&t, &other_t);
CU_ASSERT_EQUAL_FATAL(tsk_tree_next(&t), tsk_tree_next(&other_t));
if (t.index == -1) {
break;
}
}
ret = tsk_tree_last(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
ret = tsk_tree_copy(&t, &other_t, TSK_NO_INIT | options[j]);
CU_ASSERT_EQUAL_FATAL(ret, 0);
while (true) {
CU_ASSERT_EQUAL_FATAL(ret, 0);
check_trees_identical(&t, &other_t);
CU_ASSERT_EQUAL_FATAL(tsk_tree_prev(&t), tsk_tree_prev(&other_t));
if (t.index == -1) {
break;
}
}
tsk_tree_free(&other_t);
tsk_tree_free(&t);
}
tsk_treeseq_free(&ts);
}
static void
test_deduplicate_sites(void)
{
int ret;
// Modified from paper_ex
const char *tidy_sites = "1 0\n"
"4.5 0\n"
"8.5 0\n";
const char *tidy_mutations = "0 2 1\n"
"0 1 2\n"
"0 6 3\n"
"0 3 4\n"
"1 0 1\n"
"1 2 2\n"
"1 4 3\n"
"1 5 4\n"
"2 5 1\n"
"2 7 2\n"
"2 1 3\n"
"2 0 4\n";
const char *messy_sites = "1 0\n"
"1 0\n"
"1 0\n"
"1 0\n"
"4.5 0\n"
"4.5 0\n"
"4.5 0\n"
"4.5 0\n"
"8.5 0\n"
"8.5 0\n"
"8.5 0\n"
"8.5 0\n";
const char *messy_mutations = "0 2 1\n"
"1 1 2\n"
"2 6 3\n"
"3 3 4\n"
"4 0 1\n"
"5 2 2\n"
"6 4 3\n"
"7 5 4\n"
"8 5 1\n"
"9 7 2\n"
"10 1 3\n"
"11 0 4\n";
tsk_table_collection_t tidy, messy;
ret = tsk_table_collection_init(&tidy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_init(&messy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
messy.sequence_length = 10;
tidy.sequence_length = 10;
parse_individuals(paper_ex_individuals, &tidy.individuals);
parse_nodes(paper_ex_nodes, &tidy.nodes);
parse_sites(tidy_sites, &tidy.sites);
parse_mutations(tidy_mutations, &tidy.mutations);
// test cleaning doesn't mess up the tidy one
parse_individuals(paper_ex_individuals, &messy.individuals);
parse_nodes(paper_ex_nodes, &messy.nodes);
parse_sites(tidy_sites, &messy.sites);
parse_mutations(tidy_mutations, &messy.mutations);
ret = tsk_table_collection_deduplicate_sites(&messy, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations, 0));
tsk_site_table_clear(&messy.sites);
tsk_mutation_table_clear(&messy.mutations);
// test with the actual messy one
parse_sites(messy_sites, &messy.sites);
parse_mutations(messy_mutations, &messy.mutations);
ret = tsk_table_collection_deduplicate_sites(&messy, 0);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_TRUE(tsk_site_table_equals(&tidy.sites, &messy.sites, 0));
CU_ASSERT_TRUE(tsk_mutation_table_equals(&tidy.mutations, &messy.mutations, 0));
tsk_table_collection_free(&tidy);
tsk_table_collection_free(&messy);
}
static void
test_deduplicate_sites_errors(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
ret_id = tsk_site_table_add_row(&tables.sites, 2, "A", 1, "m", 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 2, "TT", 2, "MM", 2);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_mutation_table_add_row(&tables.mutations, 0, 0, -1, 0, "T", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* Negative position */
tables.sites.position[0] = -1;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_SITE_POSITION);
tables.sites.position[0] = 2;
/* unsorted position */
tables.sites.position[1] = 0.5;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_UNSORTED_SITES);
tables.sites.position[1] = 2;
/* negative site ID */
tables.mutations.site[0] = -1;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;
/* site ID out of bounds */
tables.mutations.site[0] = 2;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_SITE_OUT_OF_BOUNDS);
tables.mutations.site[0] = 0;
/* Bad offset in metadata */
tables.sites.metadata_offset[0] = 2;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
tables.sites.metadata_offset[0] = 0;
/* Bad length in metadata */
tables.sites.metadata_offset[2] = 100;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
tables.sites.metadata_offset[2] = 3;
/* Bad offset in ancestral_state */
tables.sites.ancestral_state_offset[0] = 2;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
tables.sites.ancestral_state_offset[0] = 0;
/* Bad length in ancestral_state */
tables.sites.ancestral_state_offset[2] = 100;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_OFFSET);
tables.sites.ancestral_state_offset[2] = 3;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, 0);
tsk_table_collection_free(&tables);
}
static void
test_deduplicate_sites_zero_rows(void)
{
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL(ret, 0);
CU_ASSERT_EQUAL(tables.sites.num_rows, 0)
tsk_table_collection_free(&tables);
}
static void
test_deduplicate_sites_multichar(void)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 10;
ret_id = tsk_site_table_add_row(&tables.sites, 0, "AA", 1, "M", 1);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0, "0", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_site_table_add_row(&tables.sites, 1, "BBBBB", 5, "NNNNN", 5);
CU_ASSERT_EQUAL_FATAL(ret_id, 2);
ret_id = tsk_site_table_add_row(&tables.sites, 1, "0", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 3);
ret = tsk_table_collection_deduplicate_sites(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 2);
CU_ASSERT_EQUAL_FATAL(tables.sites.position[0], 0);
CU_ASSERT_EQUAL_FATAL(tables.sites.position[1], 1);
CU_ASSERT_EQUAL_FATAL(tables.sites.ancestral_state[0], 'A');
CU_ASSERT_EQUAL_FATAL(tables.sites.ancestral_state_offset[1], 1);
CU_ASSERT_EQUAL_FATAL(tables.sites.metadata[0], 'M');
CU_ASSERT_EQUAL_FATAL(tables.sites.metadata_offset[1], 1);
CU_ASSERT_NSTRING_EQUAL(tables.sites.ancestral_state + 1, "BBBBB", 5);
CU_ASSERT_EQUAL_FATAL(tables.sites.ancestral_state_offset[2], 6);
CU_ASSERT_NSTRING_EQUAL(tables.sites.metadata + 1, "NNNNN", 5);
CU_ASSERT_EQUAL_FATAL(tables.sites.metadata_offset[2], 6);
tsk_table_collection_free(&tables);
}
static void
test_empty_tree_sequence(void)
{
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_tree_t t;
tsk_id_t v;
int ret;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_SEQUENCE_LENGTH);
tsk_treeseq_free(&ts);
tables.sequence_length = 1.0;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
verify_empty_tree_sequence(&ts, 1.0);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(t.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(t.num_edges, 0);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 0, &v), 0);
CU_ASSERT_EQUAL_FATAL(v, TSK_NULL);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_tree_free(&t);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_last(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_left_root(&t), TSK_NULL);
CU_ASSERT_EQUAL_FATAL(t.interval.left, 0);
CU_ASSERT_EQUAL_FATAL(t.interval.right, 1);
CU_ASSERT_EQUAL_FATAL(tsk_tree_get_parent(&t, 1, &v), TSK_ERR_NODE_OUT_OF_BOUNDS);
tsk_tree_free(&t);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_zero_edges(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n";
const char *edges = "";
const char *sites = "0.1 0\n"
"0.2 0\n";
const char *mutations = "0 0 1\n"
"1 1 1\n";
tsk_treeseq_t ts, tss;
tsk_tree_t t;
tsk_id_t samples, node_map;
const tsk_id_t z = TSK_NULL;
tsk_id_t parents[] = {
z,
z,
};
int ret;
tsk_treeseq_from_text(&ts, 2, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&ts), 2.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&ts), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&ts), 1);
tsk_treeseq_print_state(&ts, _devnull);
verify_trees(&ts, 1, parents);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_first(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(t.interval.left, 0);
CU_ASSERT_EQUAL(t.interval.right, 2);
CU_ASSERT_EQUAL(t.num_edges, 0);
CU_ASSERT_EQUAL(t.parent[0], TSK_NULL);
CU_ASSERT_EQUAL(t.parent[1], TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);
CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL);
CU_ASSERT_EQUAL(t.right_sib[0], 1);
tsk_tree_print_state(&t, _devnull);
tsk_tree_free(&t);
ret = tsk_tree_init(&t, &ts, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_tree_last(&t);
CU_ASSERT_EQUAL_FATAL(ret, TSK_TREE_OK);
CU_ASSERT_EQUAL(t.interval.left, 0);
CU_ASSERT_EQUAL(t.interval.right, 2);
CU_ASSERT_EQUAL(t.parent[0], TSK_NULL);
CU_ASSERT_EQUAL(t.parent[1], TSK_NULL);
CU_ASSERT_EQUAL(tsk_tree_get_left_root(&t), 0);
CU_ASSERT_EQUAL(t.left_sib[0], TSK_NULL);
CU_ASSERT_EQUAL(t.right_sib[0], 1);
tsk_tree_print_state(&t, _devnull);
tsk_tree_free(&t);
/* We give pointers ot samples and node_map here as they must be non null */
ret = tsk_treeseq_simplify(&ts, &samples, 0, 0, &tss, &node_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_samples(&tss), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_sequence_length(&tss), 2.0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&tss), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_sites(&tss), 2);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_mutations(&tss), 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&tss), 1);
tsk_treeseq_print_state(&ts, _devnull);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&tss);
}
static void
test_tree_sequence_metadata(void)
{
int ret;
tsk_table_collection_t tc;
tsk_treeseq_t ts;
char example_metadata[100] = "An example of metadata with unicode 🎄🌳🌴🌲🎋";
char example_metadata_schema[100]
= "An example of metadata schema with unicode 🎄🌳🌴🌲🎋";
char example_time_units[100] = "An example of time units ⏰";
tsk_size_t example_metadata_length = (tsk_size_t) strlen(example_metadata);
tsk_size_t example_time_units_length = (tsk_size_t) strlen(example_metadata_schema);
tsk_size_t example_metadata_schema_length = (tsk_size_t) strlen(example_time_units);
ret = tsk_table_collection_init(&tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tc.sequence_length = 1.0;
ret = tsk_table_collection_build_index(&tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata(
&tc, example_metadata, example_metadata_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_metadata_schema(
&tc, example_metadata_schema, example_metadata_schema_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_set_time_units(
&tc, example_time_units, example_time_units_length);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tc, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_metadata_length(&ts), example_metadata_length);
CU_ASSERT_EQUAL(
tsk_treeseq_get_metadata_schema_length(&ts), example_metadata_schema_length);
CU_ASSERT_EQUAL(tsk_memcmp(tsk_treeseq_get_metadata(&ts), example_metadata,
example_metadata_length),
0);
CU_ASSERT_EQUAL(tsk_memcmp(tsk_treeseq_get_metadata_schema(&ts),
example_metadata_schema, example_metadata_schema_length),
0);
CU_ASSERT_EQUAL(tsk_treeseq_get_time_units_length(&ts), example_time_units_length);
CU_ASSERT_EQUAL(tsk_memcmp(tsk_treeseq_get_time_units(&ts), example_time_units,
example_time_units_length),
0);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tc);
}
static int
dummy_stat(tsk_size_t K, const double *X, tsk_size_t M, double *Y, void *params)
{
tsk_size_t k;
CU_ASSERT_FATAL(M == K);
CU_ASSERT_FATAL(params == NULL);
for (k = 0; k < K; k++) {
Y[k] = X[k];
}
return 0;
}
static void
test_time_uncalibrated(void)
{
int ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
tsk_treeseq_t ts2;
tsk_size_t sample_set_sizes[] = { 2, 2 };
tsk_id_t samples[] = { 0, 1, 2, 3 };
tsk_size_t num_samples;
double result[100];
double *W;
double *sigma;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ts.time_uncalibrated, false);
tsk_treeseq_free(&ts);
ret = tsk_table_collection_set_time_units(
&tables, TSK_TIME_UNITS_UNCALIBRATED, strlen(TSK_TIME_UNITS_UNCALIBRATED));
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ts.time_uncalibrated, true);
tsk_treeseq_free(&ts);
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_table_collection_set_time_units(
ts.tables, TSK_TIME_UNITS_UNCALIBRATED, strlen(TSK_TIME_UNITS_UNCALIBRATED));
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts2, ts.tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(
&ts2, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_SITE, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_allele_frequency_spectrum(
&ts2, 2, sample_set_sizes, samples, 0, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);
ret = tsk_treeseq_allele_frequency_spectrum(&ts2, 2, sample_set_sizes, samples, 0,
NULL, 0, NULL, TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
sigma = tsk_calloc(tsk_treeseq_get_num_nodes(&ts2), sizeof(double));
num_samples = tsk_treeseq_get_num_samples(&ts2);
W = tsk_calloc(num_samples, sizeof(double));
ret = tsk_treeseq_general_stat(&ts2, 1, W, 1, dummy_stat, NULL,
tsk_treeseq_get_num_trees(&ts2), tsk_treeseq_get_breakpoints(&ts2),
TSK_STAT_SITE, sigma);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_general_stat(&ts2, 1, W, 1, dummy_stat, NULL,
tsk_treeseq_get_num_trees(&ts2), tsk_treeseq_get_breakpoints(&ts2),
TSK_STAT_BRANCH, sigma);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);
ret = tsk_treeseq_general_stat(&ts2, 1, W, 1, dummy_stat, NULL,
tsk_treeseq_get_num_trees(&ts2), tsk_treeseq_get_breakpoints(&ts2),
TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, sigma);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_divergence_matrix(
&ts2, 0, NULL, NULL, 0, NULL, TSK_STAT_BRANCH, result);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_UNCALIBRATED);
ret = tsk_treeseq_divergence_matrix(&ts2, 0, NULL, NULL, 0, NULL,
TSK_STAT_BRANCH | TSK_STAT_ALLOW_TIME_UNCALIBRATED, result);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_safe_free(W);
tsk_safe_free(sigma);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&ts2);
tsk_table_collection_free(&tables);
}
static void
test_reference_sequence(void)
{
int ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_has_reference_sequence(&ts));
tsk_treeseq_free(&ts);
ret = tsk_reference_sequence_set_data(&tables.reference_sequence, "abc", 3);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_has_reference_sequence(&ts));
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
static void
test_split_edges_no_populations(void)
{
int ret;
tsk_treeseq_t ts, split_ts;
tsk_table_collection_t tables;
tsk_id_t new_nodes[] = { 9, 10, 11 };
tsk_size_t num_new_nodes = 3;
const char *metadata = "some metadata";
tsk_size_t j;
tsk_node_t node;
double time = 0.09;
tsk_id_t ret_id;
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret_id = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
tsk_treeseq_free(&ts);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_table_collection_compute_mutation_times(&tables, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_treeseq_init(&ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* NOTE: haven't worked out the exact IDs on the branches here, just
* for illustration.
0.25┊ 8 ┊ ┊ ┊
┊ ┏━┻━┓ ┊ ┊ ┊
0.20┊ ┃ ┃ ┊ ┊ 7 ┊
┊ ┃ ┃ ┊ ┊ ┏━┻━┓ ┊
0.17┊ 6 ┃ ┊ 6 ┊ ┃ ┃ ┊
┊ ┏━┻┓ ┃ ┊ ┏━┻━┓ ┊ ┃ ┃ ┊
0.09┊ 9 5 10┊ 9 5 ┊ 11 5 ┊
┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓ ┊ ┃ ┏━┻┓ ┊
0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┊ ┃ ┃ 4 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊
0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
0.00 2.00 7.00 10.00
*/
ret = tsk_treeseq_split_edges(
&ts, time, 1234, 0, metadata, strlen(metadata), 0, &split_ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&split_ts), 3);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&split_ts), 12);
for (j = 0; j < num_new_nodes; j++) {
ret = tsk_treeseq_get_node(&split_ts, new_nodes[j], &node);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(node.time, time);
CU_ASSERT_EQUAL(node.flags, 1234);
CU_ASSERT_EQUAL(node.individual, TSK_NULL);
CU_ASSERT_EQUAL(node.population, 0);
CU_ASSERT_EQUAL(node.metadata_length, strlen(metadata));
CU_ASSERT_EQUAL(strncmp(node.metadata, metadata, strlen(metadata)), 0);
}
tsk_treeseq_free(&split_ts);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_split_edges_populations(void)
{
int ret;
tsk_treeseq_t ts, split_ts;
tsk_table_collection_t tables;
double time = 0.5;
tsk_node_t node;
tsk_id_t valid_pops[] = { -1, 0, 1 };
tsk_id_t num_valid_pops = 3;
tsk_id_t j, population, ret_id;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, 1, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 1, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
for (j = 0; j < num_valid_pops; j++) {
population = valid_pops[j];
ret = tsk_treeseq_split_edges(&ts, time, 0, population, NULL, 0, 0, &split_ts);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_trees(&split_ts), 1);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&split_ts), 3);
ret = tsk_treeseq_get_node(&split_ts, 2, &node);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(node.population, population);
tsk_treeseq_free(&split_ts);
}
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_split_edges_errors(void)
{
int ret;
tsk_treeseq_t ts, split_ts;
tsk_table_collection_t tables;
double time = 0.5;
tsk_id_t invalid_pops[] = { -2, 2, 3 };
tsk_id_t num_invalid_pops = 3;
tsk_id_t j, population, ret_id;
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, 0, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 1, 1, TSK_NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);
ret_id = tsk_edge_table_add_row(&tables.edges, 0, 1, 1, 0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_split_edges(
&ts, TSK_UNKNOWN_TIME, 0, TSK_NULL, NULL, 0, 0, &split_ts);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);
for (j = 0; j < num_invalid_pops; j++) {
population = invalid_pops[j];
ret = tsk_treeseq_split_edges(&ts, time, 0, population, NULL, 0, 0, &split_ts);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
tsk_treeseq_free(&split_ts);
}
tsk_treeseq_free(&ts);
ret_id
= tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_split_edges(&ts, time, 0, population, NULL, 0, 0, &split_ts);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
tsk_treeseq_free(&split_ts);
tsk_table_collection_free(&tables);
tsk_treeseq_free(&ts);
}
static void
test_extend_haplotypes_simple(void)
{
int ret;
tsk_treeseq_t ts, ets;
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 2.0 -1 -1\n";
const char *edges = "0 10 2 0\n"
"0 10 2 1\n";
const char *sites = "0.0 0\n"
"1.0 0\n";
const char *mutations = "0 0 1 -1 0.5\n"
"1 1 1 -1 0.5\n";
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_extend_haplotypes(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE_FATAL(tsk_table_collection_equals(ts.tables, ets.tables, 0));
tsk_treeseq_free(&ts);
tsk_treeseq_free(&ets);
}
static void
test_extend_haplotypes_errors(void)
{
int ret;
tsk_treeseq_t ts, ets;
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 2.0 -1 -1\n";
const char *edges = "0 10 2 0\n"
"0 10 2 1\n";
const char *sites = "0.0 0\n"
"1.0 0\n";
const char *mutations = "0 0 1 -1 0.5\n"
"1 1 1 -1 0.5\n";
const char *mutations_no_time = "0 0 1 -1\n"
"1 1 1 -1\n";
// left, right, node source, dest, time
const char *migrations = "0 10 0 0 1 0.5\n"
"0 10 0 1 0 1.5\n";
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_extend_haplotypes(&ts, -2, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_EXTEND_EDGES_BAD_MAXITER);
tsk_treeseq_free(&ts);
tsk_treeseq_from_text(
&ts, 10, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);
ret = tsk_treeseq_extend_haplotypes(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
tsk_treeseq_free(&ts);
tsk_treeseq_from_text(
&ts, 10, nodes, edges, NULL, sites, mutations_no_time, NULL, NULL, 0);
ret = tsk_treeseq_extend_haplotypes(&ts, 10, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&ets);
}
static void
assert_equal_except_edges_and_mutation_nodes(
const tsk_treeseq_t *ts1, const tsk_treeseq_t *ts2)
{
tsk_table_collection_t t1, t2;
int ret;
ret = tsk_table_collection_copy(ts1->tables, &t1, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_copy(ts2->tables, &t2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_memset(t1.mutations.node, 0, t1.mutations.num_rows * sizeof(*t1.mutations.node));
tsk_memset(t2.mutations.node, 0, t2.mutations.num_rows * sizeof(*t2.mutations.node));
tsk_edge_table_clear(&t1.edges);
tsk_edge_table_clear(&t2.edges);
CU_ASSERT_TRUE(tsk_table_collection_equals(&t1, &t2, 0));
tsk_table_collection_free(&t1);
tsk_table_collection_free(&t2);
}
static void
test_extend_haplotypes(void)
{
int ret = 0;
int max_iter = 10;
tsk_treeseq_t ts, ets;
FILE *tmp = fopen(_tmp_file_name, "w");
/* 7 and 8 should be extended to the whole sequence;
* also 5 to the second tree (where x's are)
6 6 6 6
+-+-+ +-+-+ +-+-+ +-+-+
| | 7 x x 8 x x
| | ++-+ | | +-++ | |
4 5 4 | x 4 | 5 4 5
+++ +++ +++ | | | | +++ +++ +++
0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 3
*/
const char *nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 1.0 -1 -1\n"
"0 1.0 -1 -1\n"
"0 3.0 -1 -1\n"
"0 2.0 -1 -1\n"
"0 2.0 -1 -1\n";
// l, r, p, c
const char *edges = "0 10 4 0\n"
"0 5 4 1\n"
"7 10 4 1\n"
"0 2 5 2\n"
"5 10 5 2\n"
"0 2 5 3\n"
"5 10 5 3\n"
"2 5 7 2\n"
"2 5 7 4\n"
"5 7 8 1\n"
"5 7 8 5\n"
"2 5 6 3\n"
"0 2 6 4\n"
"5 10 6 4\n"
"0 2 6 5\n"
"7 10 6 5\n"
"2 5 6 7\n"
"5 7 6 8\n";
const char *sites = "0.0 0\n"
"9.0 0\n";
const char *mutations = "0 4 1 -1 2.5\n"
"0 4 2 0 1.5\n"
"1 6 3 -1 3.5\n"
"1 5 1 2 2.5\n"
"1 5 2 3 1.5\n";
tsk_treeseq_from_text(&ts, 10, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
for (max_iter = 1; max_iter < 10; max_iter++) {
ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_equal_except_edges_and_mutation_nodes(&ts, &ets);
CU_ASSERT_TRUE(ets.tables->edges.num_rows >= 12);
tsk_treeseq_free(&ets);
}
ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL_FATAL(ets.tables->nodes.num_rows, 9);
CU_ASSERT_EQUAL_FATAL(ets.tables->edges.num_rows, 12);
assert_equal_except_edges_and_mutation_nodes(&ts, &ets);
tsk_treeseq_free(&ets);
tsk_set_debug_stream(tmp);
ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, TSK_DEBUG, &ets);
tsk_set_debug_stream(stdout);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(ftell(tmp) > 0);
tsk_treeseq_free(&ets);
fclose(tmp);
tsk_treeseq_free(&ts);
}
static void
test_extend_haplotypes_conflicting_times(void)
{
int ret;
int max_iter = 10;
tsk_treeseq_t ts, ets;
/*
3.00┊ 3 ┊ 4 ┊
┊ ┃ ┊ ┃ ┊
2.00┊ ┃ ┊ 2 ┊
┊ ┃ ┊ ┃ ┊
1.00┊ 1 ┊ ┃ ┊
┊ ┃ ┊ ┃ ┊
0.00┊ 0 ┊ 0 ┊
0 2 4
*/
const char *nodes = "1 0.0 -1 -1\n"
"0 1.0 -1 -1\n"
"0 2.0 -1 -1\n"
"0 3.0 -1 -1\n"
"0 3.0 -1 -1\n";
// l, r, p, c
const char *edges = "0.0 2.0 1 0\n"
"2.0 4.0 2 0\n"
"0.0 2.0 3 1\n"
"2.0 4.0 4 2\n";
tsk_treeseq_from_text(&ts, 4, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ts.tables->edges.num_rows, 4);
ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, ets.tables, 0));
tsk_treeseq_free(&ets);
tsk_treeseq_free(&ts);
}
static void
test_extend_haplotypes_new_edge(void)
{
int ret;
int max_iter = 10;
tsk_treeseq_t ts, ets, ref_ts;
/* This is an example where new edges are added
* on both forwards and back passes
4.00┊ ┊ 4 ┊ 4 ┊ 4 ┊
┊ ┊ ┃ ┊ ┃ ┊ ┃ ┊
3.00┊ 2 ┊ ┃ ┊ 2 ┊ 2 ┊
┊ ┃ ┊ ┃ ┊ ┃ ┊ ┃ ┊
2.00┊ ┃ ┊ 3 ┊ ┃ ┊ 3 ┊
┊ ┃ ┊ ┃ ┊ ┃ ┊ ┃ ┊
1.00┊ 1 ┊ ┃ ┊ ┃ ┊ ┃ ┊
┊ ┃ ┊ ┃ ┊ ┃ ┊ ┃ ┊
0.00┊ 0 ┊ 0 ┊ 0 ┊ 0 ┊
0 2 4 6 8
*/
const char *nodes = "1 0.0 -1 -1\n"
"0 1.0 -1 -1\n"
"0 3.0 -1 -1\n"
"0 2.0 -1 -1\n"
"0 4.0 -1 -1\n";
// l, r, p, c
const char *edges = "0.0 2.0 1 0\n"
"2.0 4.0 3 0\n"
"6.0 8.0 3 0\n"
"4.0 5.0 2 0\n"
"5.0 6.0 2 0\n"
"0.0 2.0 2 1\n"
"6.0 7.0 2 3\n"
"7.0 8.0 2 3\n"
"4.0 8.0 4 2\n"
"2.0 4.0 4 3\n";
const char *ext_edges = "0.0 8.0 1 0\n"
"0.0 8.0 3 1\n"
"0.0 8.0 2 3\n"
"2.0 8.0 4 2\n";
const char *sites = "3.0 0\n";
// s, n , ds, t
const char *mutations = "0 4 5 -1 4.5\n"
"0 3 4 0 3.5\n"
"0 3 3 1 2.5\n"
"0 0 2 2 1.5\n"
"0 0 1 3 0.5\n";
const char *ext_mutations = "0 4 5 -1 4.5\n"
"0 2 4 0 3.5\n"
"0 3 3 1 2.5\n"
"0 1 2 2 1.5\n"
"0 0 1 3 0.5\n";
tsk_treeseq_from_text(&ts, 8, nodes, edges, NULL, sites, mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ts.tables->edges.num_rows, 10);
tsk_treeseq_from_text(
&ref_ts, 8, nodes, ext_edges, NULL, sites, ext_mutations, NULL, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ref_ts.tables->edges.num_rows, 4);
ret = tsk_treeseq_extend_haplotypes(&ts, max_iter, 0, &ets);
CU_ASSERT_EQUAL_FATAL(ret, 0);
assert_equal_except_edges_and_mutation_nodes(&ts, &ets);
CU_ASSERT_TRUE(tsk_table_collection_equals(ets.tables, ref_ts.tables, 0));
tsk_treeseq_free(&ets);
tsk_treeseq_free(&ts);
tsk_treeseq_free(&ref_ts);
}
static void
test_init_take_ownership_no_edge_metadata(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t *tables = tsk_malloc(sizeof(tsk_table_collection_t));
CU_ASSERT_NOT_EQUAL_FATAL(tables, NULL);
tsk_treeseq_from_text(&ts, 10, paper_ex_nodes, paper_ex_edges, NULL, paper_ex_sites,
paper_ex_mutations, paper_ex_individuals, NULL, 0);
ret = tsk_treeseq_copy_tables(&ts, tables, TSK_TC_NO_EDGE_METADATA);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
ret = tsk_treeseq_init(&ts, tables, TSK_TAKE_OWNERSHIP);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA);
tsk_treeseq_free(&ts);
}
static void
test_init_compute_mutation_parents(void)
{
int ret;
tsk_table_collection_t *tables, *tables2;
tsk_treeseq_t ts;
const char *sites = "0 0\n";
/* Make a mutation on a parallel branch the parent*/
const char *bad_mutations = "0 0 1 -1\n"
"0 1 1 0\n";
tables = tsk_malloc(sizeof(tsk_table_collection_t));
CU_ASSERT_NOT_EQUAL_FATAL(tables, NULL);
tables2 = tsk_malloc(sizeof(tsk_table_collection_t));
CU_ASSERT_NOT_EQUAL_FATAL(tables2, NULL);
CU_ASSERT_FATAL(tables != NULL);
ret = tsk_table_collection_init(tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables->sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables->nodes);
CU_ASSERT_EQUAL_FATAL(tables->nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables->edges);
CU_ASSERT_EQUAL_FATAL(tables->edges.num_rows, 6);
parse_sites(sites, &tables->sites);
CU_ASSERT_EQUAL_FATAL(tables->sites.num_rows, 1);
parse_mutations(bad_mutations, &tables->mutations);
CU_ASSERT_EQUAL_FATAL(tables->mutations.num_rows, 2);
tables->sequence_length = 1.0;
ret = tsk_table_collection_copy(tables, tables2, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, tables, TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_MUTATION_PARENT);
tsk_treeseq_free(&ts);
ret = tsk_treeseq_init(
&ts, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);
/* When we use take ownership, the check of parents shouldn't overwrite them*/
ret = tsk_treeseq_init(&ts, tables, TSK_TAKE_OWNERSHIP | TSK_TS_INIT_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_BAD_MUTATION_PARENT);
CU_ASSERT_EQUAL(tables->mutations.parent[0], -1);
CU_ASSERT_EQUAL(tables->mutations.parent[1], 0);
tsk_treeseq_free(&ts);
/* When we use take ownership and compute, the tables are overwritten*/
ret = tsk_treeseq_init(&ts, tables2,
TSK_TAKE_OWNERSHIP | TSK_TS_INIT_BUILD_INDEXES
| TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tables2->mutations.parent[0], -1);
CU_ASSERT_EQUAL(tables2->mutations.parent[1], -1);
/* Don't need to free tables as we took ownership */
tsk_treeseq_free(&ts);
}
static void
test_init_compute_mutation_parents_errors(void)
{
int ret;
tsk_id_t row_ret;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
const char *sites = "0.5 0\n"
"0 0\n";
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;
parse_nodes(single_tree_ex_nodes, &tables.nodes);
CU_ASSERT_EQUAL_FATAL(tables.nodes.num_rows, 7);
parse_edges(single_tree_ex_edges, &tables.edges);
CU_ASSERT_EQUAL_FATAL(tables.edges.num_rows, 6);
parse_sites(sites, &tables.sites);
CU_ASSERT_EQUAL_FATAL(tables.sites.num_rows, 2);
tables.sequence_length = 1.0;
ret = tsk_treeseq_init(
&ts, &tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNSORTED_SITES);
tsk_treeseq_free(&ts);
tsk_site_table_clear(&tables.sites);
row_ret = tsk_site_table_add_row(&tables.sites, 0.5, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(row_ret, 0);
row_ret = tsk_mutation_table_add_row(
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(row_ret, 0);
row_ret = tsk_mutation_table_add_row(
&tables.mutations, 0, 4, TSK_NULL, TSK_UNKNOWN_TIME, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(row_ret, 1);
ret = tsk_treeseq_init(
&ts, &tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TS_INIT_COMPUTE_MUTATION_PARENTS);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
}
int
main(int argc, char **argv)
{
CU_TestInfo tests[] = {
/* simplest example tests */
{ "test_simplest_discrete_genome", test_simplest_discrete_genome },
{ "test_simplest_discrete_time", test_simplest_discrete_time },
{ "test_simplest_min_time", test_simplest_min_time },
{ "test_simplest_max_time", test_simplest_max_time },
{ "test_simplest_records", test_simplest_records },
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
{ "test_simplest_unary_records", test_simplest_unary_records },
{ "test_simplest_unary_with_individuals", test_simplest_unary_with_individuals },
{ "test_simplest_non_sample_leaf_records",
test_simplest_non_sample_leaf_records },
{ "test_simplest_degenerate_multiple_root_records",
test_simplest_degenerate_multiple_root_records },
{ "test_simplest_multiple_root_records", test_simplest_multiple_root_records },
{ "test_simplest_zero_root_tree", test_simplest_zero_root_tree },
{ "test_simplest_multi_root_tree", test_simplest_multi_root_tree },
{ "test_simplest_tree_mrca", test_simplest_tree_mrca },
{ "test_simplest_root_mutations", test_simplest_root_mutations },
{ "test_simplest_back_mutations", test_simplest_back_mutations },
{ "test_simplest_general_samples", test_simplest_general_samples },
{ "test_simplest_holey_tree_sequence", test_simplest_holey_tree_sequence },
{ "test_simplest_holey_tsk_treeseq_zero_roots",
test_simplest_holey_tsk_treeseq_zero_roots },
{ "test_simplest_holey_tsk_treeseq_mutation_parents",
test_simplest_holey_tsk_treeseq_mutation_parents },
{ "test_simplest_initial_gap_tree_sequence",
test_simplest_initial_gap_tree_sequence },
{ "test_simplest_initial_gap_zero_roots", test_simplest_initial_gap_zero_roots },
{ "test_simplest_initial_gap_tsk_treeseq_mutation_parents",
test_simplest_initial_gap_tsk_treeseq_mutation_parents },
{ "test_simplest_final_gap_tree_sequence",
test_simplest_final_gap_tree_sequence },
{ "test_simplest_final_gap_tsk_treeseq_mutation_parents",
test_simplest_final_gap_tsk_treeseq_mutation_parents },
{ "test_simplest_individuals", test_simplest_individuals },
{ "test_simplest_bad_individuals", test_simplest_bad_individuals },
{ "test_simplest_bad_edges", test_simplest_bad_edges },
{ "test_simplest_bad_indexes", test_simplest_bad_indexes },
{ "test_simplest_bad_migrations", test_simplest_bad_migrations },
{ "test_simplest_migration_simplify", test_simplest_migration_simplify },
{ "test_simplest_overlapping_parents", test_simplest_overlapping_parents },
{ "test_simplest_contradictory_children", test_simplest_contradictory_children },
{ "test_simplest_overlapping_edges_simplify",
test_simplest_overlapping_edges_simplify },
{ "test_simplest_overlapping_unary_edges_simplify",
test_simplest_overlapping_unary_edges_simplify },
{ "test_simplest_overlapping_unary_edges_internal_samples_simplify",
test_simplest_overlapping_unary_edges_internal_samples_simplify },
{ "test_simplest_reduce_site_topology", test_simplest_reduce_site_topology },
{ "test_simplest_simplify_defragment", test_simplest_simplify_defragment },
{ "test_simplest_population_filter", test_simplest_population_filter },
{ "test_simplest_individual_filter", test_simplest_individual_filter },
{ "test_simplest_no_node_filter", test_simplest_no_node_filter },
{ "test_simplest_no_update_flags", test_simplest_no_update_flags },
{ "test_simplest_map_mutations", test_simplest_map_mutations },
{ "test_simplest_nonbinary_map_mutations",
test_simplest_nonbinary_map_mutations },
{ "test_simplest_unary_map_mutations", test_simplest_unary_map_mutations },
{ "test_simplest_non_sample_leaf_map_mutations",
test_simplest_non_sample_leaf_map_mutations },
{ "test_simplest_internal_sample_map_mutations",
test_simplest_internal_sample_map_mutations },
{ "test_simplest_multiple_root_map_mutations",
test_simplest_multiple_root_map_mutations },
{ "test_simplest_chained_map_mutations", test_simplest_chained_map_mutations },
{ "test_simplest_mutation_edges", test_simplest_mutation_edges },
/* Single tree tests */
{ "test_single_tree_good_records", test_single_tree_good_records },
{ "test_single_nonbinary_tree_good_records",
test_single_nonbinary_tree_good_records },
{ "test_single_tree_bad_records", test_single_tree_bad_records },
{ "test_single_tree_good_mutations", test_single_tree_good_mutations },
{ "test_single_tree_bad_mutations", test_single_tree_bad_mutations },
{ "test_single_tree_iter", test_single_tree_iter },
{ "test_single_tree_general_samples_iter",
test_single_tree_general_samples_iter },
{ "test_single_nonbinary_tree_iter", test_single_nonbinary_tree_iter },
{ "test_single_tree_iter_times", test_single_tree_iter_times },
{ "test_single_tree_iter_depths", test_single_tree_iter_depths },
{ "test_single_tree_simplify", test_single_tree_simplify },
{ "test_single_tree_simplify_debug", test_single_tree_simplify_debug },
{ "test_single_tree_simplify_keep_input_roots",
test_single_tree_simplify_keep_input_roots },
{ "test_single_tree_simplify_no_sample_nodes",
test_single_tree_simplify_no_sample_nodes },
{ "test_single_tree_simplify_null_samples",
test_single_tree_simplify_null_samples },
{ "test_single_tree_compute_mutation_parents",
test_single_tree_compute_mutation_parents },
{ "test_single_tree_compute_mutation_times",
test_single_tree_compute_mutation_times },
{ "test_single_tree_mutation_edges", test_single_tree_mutation_edges },
{ "test_single_tree_is_descendant", test_single_tree_is_descendant },
{ "test_single_tree_total_branch_length", test_single_tree_total_branch_length },
{ "test_single_tree_num_lineages", test_single_tree_num_lineages },
{ "test_single_tree_map_mutations", test_single_tree_map_mutations },
{ "test_single_tree_map_mutations_internal_samples",
test_single_tree_map_mutations_internal_samples },
{ "test_single_tree_tracked_samples", test_single_tree_tracked_samples },
{ "test_single_tree_tree_pos", test_single_tree_tree_pos },
/* Multi tree tests */
{ "test_simple_multi_tree", test_simple_multi_tree },
{ "test_multi_tree_direction_switching_tree_pos",
test_multi_tree_direction_switching_tree_pos },
{ "test_nonbinary_multi_tree", test_nonbinary_multi_tree },
{ "test_unary_multi_tree", test_unary_multi_tree },
{ "test_internal_sample_multi_tree", test_internal_sample_multi_tree },
{ "test_internal_sample_simplified_multi_tree",
test_internal_sample_simplified_multi_tree },
{ "test_simplify_keep_input_roots_multi_tree",
test_simplify_keep_input_roots_multi_tree },
{ "test_left_to_right_multi_tree", test_left_to_right_multi_tree },
{ "test_gappy_multi_tree", test_gappy_multi_tree },
{ "test_convenience_arrays_multi_tree", test_convenience_arrays_multi_tree },
{ "test_tsk_treeseq_bad_records", test_tsk_treeseq_bad_records },
/* multiroot tests */
{ "test_multiroot_mrca", test_multiroot_mrca },
/* Sample sets */
{ "test_simple_sample_sets", test_simple_sample_sets },
{ "test_nonbinary_sample_sets", test_nonbinary_sample_sets },
{ "test_internal_sample_sample_sets", test_internal_sample_sample_sets },
{ "test_non_sample_leaf_sample_lists", test_non_sample_leaf_sample_lists },
{ "test_no_sample_count_semantics", test_no_sample_count_semantics },
{ "test_virtual_root_properties", test_virtual_root_properties },
/* tree traversal orders */
{ "test_single_tree_traversal", test_single_tree_traversal },
{ "test_multiroot_tree_traversal", test_multiroot_tree_traversal },
/* Seek */
{ "test_seek_multi_tree", test_seek_multi_tree },
{ "test_seek_errors", test_seek_errors },
/* KC distance tests */
{ "test_single_tree_kc", test_single_tree_kc },
{ "test_isolated_node_kc", test_isolated_node_kc },
{ "test_two_trees_kc", test_two_trees_kc },
{ "test_empty_tree_kc", test_empty_tree_kc },
{ "test_nonbinary_tree_kc", test_nonbinary_tree_kc },
{ "test_nonzero_samples_kc", test_nonzero_samples_kc },
{ "test_internal_samples_kc", test_internal_samples_kc },
{ "test_non_sample_leaf_kc", test_non_sample_leaf_kc },
{ "test_unequal_sample_size_kc", test_unequal_sample_size_kc },
{ "test_unequal_samples_kc", test_unequal_samples_kc },
{ "test_unary_nodes_kc", test_unary_nodes_kc },
{ "test_no_sample_lists_kc", test_no_sample_lists_kc },
{ "test_unequal_sequence_lengths_kc", test_unequal_sequence_lengths_kc },
{ "test_different_number_trees_kc", test_different_number_trees_kc },
{ "test_offset_trees_with_errors_kc", test_offset_trees_with_errors_kc },
/* Tree balance/imbalance index tests */
{ "test_single_tree_balance", test_single_tree_balance },
{ "test_multiroot_balance", test_multiroot_balance },
{ "test_nonbinary_balance", test_nonbinary_balance },
{ "test_empty_tree_balance", test_empty_tree_balance },
{ "test_b2_bad_base", test_b2_bad_base },
/* Misc */
{ "test_tree_errors", test_tree_errors },
{ "test_treeseq_row_access_errors", test_treeseq_row_access_errors },
{ "test_treeseq_get_individuals_population_errors",
test_treeseq_get_individuals_population_errors },
{ "test_treeseq_get_individuals_population",
test_treeseq_get_individuals_population },
{ "test_treeseq_get_individuals_time_errors",
test_treeseq_get_individuals_time_errors },
{ "test_treeseq_get_individuals_time", test_treeseq_get_individuals_time },
{ "test_tree_copy_flags", test_tree_copy_flags },
{ "test_genealogical_nearest_neighbours_errors",
test_genealogical_nearest_neighbours_errors },
{ "test_deduplicate_sites", test_deduplicate_sites },
{ "test_deduplicate_sites_errors", test_deduplicate_sites_errors },
{ "test_deduplicate_sites_zero_rows", test_deduplicate_sites_zero_rows },
{ "test_deduplicate_sites_multichar", test_deduplicate_sites_multichar },
{ "test_empty_tree_sequence", test_empty_tree_sequence },
{ "test_zero_edges", test_zero_edges },
{ "test_tree_sequence_metadata", test_tree_sequence_metadata },
{ "test_time_uncalibrated", test_time_uncalibrated },
{ "test_reference_sequence", test_reference_sequence },
{ "test_split_edges_no_populations", test_split_edges_no_populations },
{ "test_split_edges_populations", test_split_edges_populations },
{ "test_split_edges_errors", test_split_edges_errors },
{ "test_extend_haplotypes_simple", test_extend_haplotypes_simple },
{ "test_extend_haplotypes_errors", test_extend_haplotypes_errors },
{ "test_extend_haplotypes", test_extend_haplotypes },
{ "test_extend_haplotypes_new_edge", test_extend_haplotypes_new_edge },
{ "test_extend_haplotypes_conflicting_times",
test_extend_haplotypes_conflicting_times },
{ "test_init_take_ownership_no_edge_metadata",
test_init_take_ownership_no_edge_metadata },
{ "test_init_compute_mutation_parents", test_init_compute_mutation_parents },
{ "test_init_compute_mutation_parents_errors",
test_init_compute_mutation_parents_errors },
{ NULL, NULL },
};
return test_main(tests, argc, argv);
}
================================================
FILE: c/tests/testlib.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include "testlib.h"
char *_tmp_file_name;
FILE *_devnull;
/* Simple single tree example. */
const char *single_tree_ex_nodes = /* 6 */
"1 0 -1 -1\n" /* / \ */
"1 0 -1 -1\n" /* / \ */
"1 0 -1 -1\n" /* / \ */
"1 0 -1 -1\n" /* / 5 */
"0 1 -1 -1\n" /* 4 / \ */
"0 2 -1 -1\n" /* / \ / \ */
"0 3 -1 -1\n"; /* 0 1 2 3 */
const char *single_tree_ex_edges = "0 1 4 0,1\n"
"0 1 5 2,3\n"
"0 1 6 4,5\n";
const char *single_tree_ex_sites = "0.125 0\n"
"0.25 0\n"
"0.5 0\n";
/* site, node, derived_state, [parent, time] */
const char *single_tree_ex_mutations
= "0 2 1 -1\n"
"1 4 1 -1\n"
"1 0 0 1\n" /* Back mutation over 0 */
"2 0 1 -1\n" /* recurrent mutations over samples */
"2 1 1 -1\n"
"2 2 1 -1\n"
"2 3 1 -1\n";
/*** Example from the PLOS paper ***/
/*
0.25┊ 8 ┊ ┊ ┊
┊ ┏━┻━┓ ┊ ┊ ┊
0.20┊ ┃ ┃ ┊ ┊ 7 ┊
┊ ┃ ┃ ┊ ┊ ┏━┻━┓ ┊
0.17┊ 6 ┃ ┊ 6 ┊ ┃ ┃ ┊
┊ ┏━┻┓ ┃ ┊ ┏━┻━┓ ┊ ┃ ┃ ┊
0.09┊ ┃ 5 ┃ ┊ ┃ 5 ┊ ┃ 5 ┊
┊ ┃ ┏┻┓ ┃ ┊ ┃ ┏━┻┓ ┊ ┃ ┏━┻┓ ┊
0.07┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┊ ┃ ┃ 4 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊ ┃ ┃ ┏┻┓ ┊
0.00┊ 0 1 3 2 ┊ 0 1 2 3 ┊ 0 1 2 3 ┊
0.00 2.00 7.00 10.00
*/
const char *paper_ex_nodes = "1 0 -1 0\n"
"1 0 -1 0\n"
"1 0 -1 1\n"
"1 0 -1 1\n"
"0 0.071 -1 -1\n"
"0 0.090 -1 -1\n"
"0 0.170 -1 -1\n"
"0 0.202 -1 -1\n"
"0 0.253 -1 -1\n";
const char *paper_ex_edges = "2 10 4 2\n"
"2 10 4 3\n"
"0 10 5 1\n"
"0 2 5 3\n"
"2 10 5 4\n"
"0 7 6 0,5\n"
"7 10 7 0,5\n"
"0 2 8 2,6\n";
/* We make one mutation for each tree */
const char *paper_ex_sites = "1 0\n"
"4.5 0\n"
"8.5 0\n";
const char *paper_ex_mutations = "0 2 1\n"
"1 0 1\n"
"2 5 1\n";
/* Two (diploid) individuals */
const char *paper_ex_individuals = "0 0.2,1.5 -1,-1\n"
"0 0.0,0.0 -1,-1\n";
/*** An example of a nonbinary tree sequence ***/
/*
0.41┊ 12 ┊ 12 ┊
┊ ┏━━┻━━┓ ┊ ┏━┻━━┓ ┊
0.28┊ ┃ ┃ ┊ 11 ┃ ┊
┊ ┃ ┃ ┊ ┏━┻━┓ ┃ ┊
0.13┊ ┃ 10 ┊ ┃ ┃ 10 ┊
┊ ┃ ┏━╋━┓ ┊ ┃ ┃ ┏┻┓ ┊
0.07┊ 9 ┃ ┃ ┃ ┊ 9 ┃ ┃ ┃ ┊
┊ ┏━━┻━┓ ┃ ┃ ┃ ┊ ┏━━┻━┓ ┃ ┃ ┃ ┊
0.01┊ 8 ┃ ┃ ┃ ┃ ┊ 8 ┃ ┃ ┃ ┃ ┊
┊ ┏━┳┻┳━┓ ┃ ┃ ┃ ┃ ┊ ┏━┳┻┳━┓ ┃ ┃ ┃ ┃ ┊
0.00┊ 0 1 2 3 6 4 5 7 ┊ 0 1 2 3 6 5 4 7 ┊
0 17 100
*/
const char *nonbinary_ex_nodes = "1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"0 0.01 0 -1\n"
"0 0.068 0 -1\n"
"0 0.130 0 -1\n"
"0 0.279 0 -1\n"
"0 0.405 0 -1\n";
const char *nonbinary_ex_edges = "0 100 8 0,1,2,3\n"
"0 100 9 6,8\n"
"0 100 10 4\n"
"0 17 10 5\n"
"0 100 10 7\n"
"17 100 11 5,9\n"
"0 17 12 9\n"
"0 100 12 10\n"
"17 100 12 11";
const char *nonbinary_ex_sites = "1 0\n"
"18 0\n";
const char *nonbinary_ex_mutations = "0 2 1\n"
"1 11 1";
/*** An example of a tree sequence with unary nodes
* and also a non-sample leaf (node 9). ***/
/*
0.25┊ 8 ┊ 8 ┊ ┊ ┊
┊ ┏━━┻━━┓ ┊ ┃ ┊ ┊ ┊
0.20┊ ┃ 7 ┊ ┃ ┊ 7 ┊ ┊
┊ ┃ ┃ ┊ ┃ ┊ ┏━┻━━┓ ┊ ┊
0.17┊ 6 ┃ ┊ 6 ┊ ┃ ┃ ┊ ┊
┊ ┏━┻━┓ ┃ ┊ ┏━┻━━┓ ┊ ┃ ┃ ┊ ┊
0.09┊ ┃ 5 ┃ ┊ ┃ 5 ┊ ┃ 5 ┊ ┊
┊ ┃ ┏━╋━┓ ┃ ┊ ┃ ┏━━╋━━┓ ┊ ┃ ┏━━╋━━┓ ┊ ┊
0.07┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ 4 ┃ ┊ ┃ ┃ 4 ┃ ┊ ┊
┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┃ ┊ ┃ ┃ ┏┻┓ ┃ ┊ ┊
0.00┊ 0 1 3 9 2 ┊ 0 1 2 3 9 ┊ 0 1 2 3 9 ┊ 0 1 2 3 ┊
0 2 7 10 100
*/
const char *unary_ex_nodes = "1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"1 0 0 -1\n"
"0 0.071 0 -1\n"
"0 0.090 0 -1\n"
"0 0.170 0 -1\n"
"0 0.202 0 -1\n"
"0 0.253 0 -1\n"
"0 0 0 -1\n";
const char *unary_ex_edges = "2 10 4 2,3\n"
"0 10 5 1\n"
"0 2 5 3\n"
"2 10 5 4\n"
"0 10 5 9\n"
"0 7 6 0,5\n"
"7 10 7 0\n"
"0 2 7 2\n"
"7 10 7 5\n"
"0 7 8 6\n"
"0 2 8 7\n";
/* We make one mutation for each tree, over unary nodes if they exist */
const char *unary_ex_sites = "1.0 0\n"
"4.5 0\n"
"8.5 0\n";
const char *unary_ex_mutations = "0 2 1\n"
"1 6 1\n"
"1 9 0\n"
"2 5 1\n";
/* An example of a simple tree sequence with multiple marginal trees. */
/* Simple single tree example. */
const char *multiple_tree_ex_nodes = /* */
"1 0 -1 -1\n" /* 6 | */
"1 0 -1 -1\n" /* / \ | */
"1 0 -1 -1\n" /* / \ | 5 */
"0 1 -1 -1\n" /* 4 \ | / \ */
"0 2 -1 -1\n" /* / \ \ | / 3 */
"0 3 -1 -1\n" /* / \ \ | / / \ */
"0 4 -1 -1\n"; /* 0 1 2 | 0 1 2 */
/* |----------------|---------------| */
/* 0 1 2 */
const char *multiple_tree_ex_edges = "0.75 1.0 3 1,2\n"
"0.0 0.75 4 0,1\n"
"0.75 1.0 5 0,3\n"
"0.0 0.75 6 2,4\n";
/* Odd topology -- different roots. */
const char *odd_tree1_ex_nodes = /* | | 5 */
"1 0 -1 -1\n" /* | 4 | | */
"1 0 -1 -1\n" /* 3 | | | | */
"0 1 -1 -1\n" /* | | | | | */
"0 2 -1 -1\n" /* 2 | 2 | 2 */
"0 3 -1 -1\n" /* / \ | / \ | / \ */
"0 4 -1 -1\n"; /* 0 1 | 0 1 | 0 1 */
/* |------|-------|------| */
/* 0.0 0.2 0.7 1.0*/
const char *odd_tree1_ex_edges = "0.0 1.0 2 0,1\n"
"0.0 0.2 3 2\n"
"0.2 0.7 4 2\n"
"0.7 1.0 4 2\n";
/* An example where some samples descend from other samples, and multiple roots */
const char *multi_root_tree_ex_nodes = "1 0 -1 -1\n" /* 4 5 */
"1 0 -1 -1\n" /* | | */
"1 1 -1 -1\n" /* 2 3 */
"1 1 -1 -1\n" /* | | */
"0 2 -1 -1\n" /* 0 1 */
"0 2 -1 -1\n";
const char *multi_root_tree_ex_edges = "0 1 2 0\n"
"0 1 3 1\n"
"0 1 4 2\n"
"0 1 5 3\n";
/* Examples of tree sequences where samples have different paths to the same ancestor. */
const char *multi_path_tree_ex_nodes = /* 5 | */
"1 0 -1 -1\n" /* / \ | */
"1 0 -1 -1\n" /* / 4 | 4 */
"1 0 -1 -1\n" /* / / \ | / \ */
"0 1 -1 -1\n" /* / / \ | 3 \ */
"0 2 -1 -1\n" /* / / \ | / \ \ */
"0 3 -1 -1\n"; /* 0 2 1 | 0 2 1 */
/*----------------|------------ */
/*0.0 0.2 1.0*/
const char *multi_path_tree_ex_edges = "0.2 1.0 3 0\n"
"0.2 1.0 3 2\n"
"0.0 1.0 4 1\n"
"0.0 0.2 4 2\n"
"0.2 1.0 4 3\n"
"0.0 0.2 5 0\n"
"0.0 0.2 5 4\n";
const char *multi_path_tree_ex2_nodes = "1 0 -1 -1\n"
"1 0 -1 -1\n"
"0 1 -1 -1\n"
"0 2 -1 -1\n"
"0 3 -1 -1\n";
const char *multi_path_tree_ex2_edges = "0.6 1.0 2 1\n"
"0.0 1.0 3 0\n"
"0.0 0.6 4 1\n"
"0.6 1.0 4 2\n"
"0.0 1.0 4 3\n";
/* An example of a tree sequence with internally sampled nodes. */
/*
1.20┊ ┊ 8 ┊ ┊
┊ ┊ ┏━┻━┓ ┊ ┊
1.00┊ 7 ┊ ┃ ┃ ┊ ┊
┊ ┏━┻━┓ ┊ ┃ ┃ ┊ ┊
0.70┊ ┃ ┃ ┊ ┃ ┃ ┊ 6 ┊
┊ ┃ ┃ ┊ ┃ ┃ ┊ ┏━┻━┓ ┊
0.50┊ ┃ 5 ┊ 5 ┃ ┊ ┃ 5 ┊
┊ ┃ ┏━┻┓ ┊ ┏┻━┓ ┃ ┊ ┃ ┏━┻┓ ┊
0.40┊ ┃ ┃ 4 ┊ 4 ┃ ┃ ┊ ┃ ┃ 4 ┊
┊ ┃ ┃ ┏┻┓ ┊ ┏┻┓ ┃ ┃ ┊ ┃ ┃ ┏┻┓ ┊
0.20┊ ┃ ┃ ┃ 3 ┊ ┃ ┃ ┃ 3 ┊ ┃ ┃ ┃ 3 ┊
┊ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┊
0.10┊ ┃ 1 2 ┊ ┃ 2 1 ┊ ┃ 1 2 ┊
┊ ┃ ┊ ┃ ┊ ┃ ┊
0.00┊ 0 ┊ 0 ┊ 0 ┊
0.00 2.00 8.00 10.00
*/
const char *internal_sample_ex_nodes = "1 0.0 0 -1\n"
"1 0.1 0 -1\n"
"1 0.1 0 -1\n"
"1 0.2 0 -1\n"
"0 0.4 0 -1\n"
"1 0.5 0 -1\n"
"0 0.7 0 -1\n"
"0 1.0 0 -1\n"
"0 1.2 0 -1\n";
const char *internal_sample_ex_edges = "2 8 4 0\n"
"0 10 4 2\n"
"0 2 4 3\n"
"8 10 4 3\n"
"0 10 5 1,4\n"
"8 10 6 0,5\n"
"0 2 7 0,5\n"
"2 8 8 3,5\n";
/* We make one mutation for each tree, some above the internal node */
const char *internal_sample_ex_sites = "1.0 0\n"
"4.5 0\n"
"8.5 0\n";
const char *internal_sample_ex_mutations = "0 2 1\n"
"1 5 1\n"
"2 5 1\n";
/*** An example of a tree sequence with multiple roots. ***/
/*
0.90┊ ┊ 11 ┊ ┊
┊ ┊ ┏┻┓ ┊ ┊
0.80┊ 10 ┊ ┃ ┃ ┊ ┊
┊ ┏┻┓ ┊ ┃ ┃ ┊ ┊
0.40┊ 9 ┃ ┃ ┊ 9 ┃ ┃ ┊ 9 ┊
┊ ┏━┻┓ ┃ ┃ ┊ ┏━┻━┓ ┃ ┃ ┊ ┏━┻━━┓ ┊
0.30┊ ┃ ┃ ┃ ┃ ┊ ┃ 8 ┃ ┃ ┊ ┃ 8 ┊
┊ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┃ ┊ ┃ ┏┻┓ ┊
0.20┊ ┃ 7 ┃ ┃ ┊ 7 ┃ ┃ ┃ ┃ ┊ 7 ┃ ┃ ┊
┊ ┃ ┏┻┓ ┃ ┃ ┊ ┏┻┓ ┃ ┃ ┃ ┃ ┊ ┏━┻┓ ┃ ┃ ┊
0.10┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃ 6 ┃ ┃ ┊
┊ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┃ ┃ ┃ ┃ ┃ ┊ ┃ ┏┻┓ ┃ ┃ ┊
0.00┊ 5 2 3 4 0 1 ┊ 3 4 1 2 0 5 ┊ 4 0 3 1 2 5 ┊
0 4 8 10
*/
const char *multiroot_ex_nodes = "1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"0 0.1 0 -1\n"
"0 0.2 0 -1\n"
"0 0.3 0 -1\n"
"0 0.4 0 -1\n"
"0 0.8 0 -1\n"
"0 0.9 0 -1\n";
const char *multiroot_ex_edges = "8 10 6 0,3\n"
"0 8 7 3\n"
"0 10 7 4\n"
"8 10 7 6\n"
"4 10 8 1,2\n"
"0 4 9 2\n"
"0 10 9 7\n"
"4 10 9 8\n"
"0 4 10 0,1\n"
"4 8 11 0,5\n";
/* We make one mutation over each root node */
const char *multiroot_ex_sites = "1.0 0\n"
"2.0 0\n"
"3.0 0\n"
"5.0 0\n"
"6.0 0\n"
"8.0 0\n"
"9.0 0\n";
const char *multiroot_ex_mutations = "0 10 1\n"
"1 9 1\n"
"2 5 1\n"
"3 11 1\n"
"4 9 1\n"
"5 9 1\n"
"6 5 1\n";
/*** An example of a empty tree sequence. ***/
const char *empty_ex_nodes = "1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n";
const char *empty_ex_edges = "";
/*** An example of a tree sequence with missing marginal trees. ***/
/*
| 4 | | 4 |
| / \ | | / \ |
| 3 \ | | / 3 |
| / \ \ | | / / \ |
| 0 1 2 | | 0 1 2 |
|-|-----------|-|-----------|-|
0 1 2 3 4 5
*/
const char *missing_ex_nodes = "1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"1 0.0 0 -1\n"
"0 1.0 0 -1\n"
"0 2.0 0 -1\n";
const char *missing_ex_edges = "1.0 2.0 3 0\n"
"1.0 2.0 3 1\n"
"3.0 4.0 3 1\n"
"3.0 4.0 3 2\n"
"3.0 4.0 4 0\n"
"1.0 2.0 4 2\n"
"1.0 2.0 4 3\n"
"3.0 4.0 4 3\n";
/* Simple utilities to parse text so we can write declaritive
* tests. This is not intended as a robust general input mechanism.
*/
void
parse_nodes(const char *text, tsk_node_table_t *node_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE];
const char *whitespace = " \t";
char *p;
double time;
int flags, population, individual;
char *name;
c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok(line, whitespace);
CU_ASSERT_FATAL(p != NULL);
flags = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
time = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
population = atoi(p);
p = strtok(NULL, whitespace);
if (p == NULL) {
individual = -1;
} else {
individual = atoi(p);
p = strtok(NULL, whitespace);
}
if (p == NULL) {
name = "";
} else {
name = p;
}
ret_id = tsk_node_table_add_row(
node_table, flags, time, population, individual, name, strlen(name));
CU_ASSERT_FATAL(ret_id >= 0);
}
}
void
parse_edges(const char *text, tsk_edge_table_t *edge_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE], sub_line[MAX_LINE];
const char *whitespace = " \t";
char *p, *q;
double left, right;
tsk_id_t parent, child;
uint32_t num_children;
c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok(line, whitespace);
CU_ASSERT_FATAL(p != NULL);
left = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
right = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
parent = atoi(p);
num_children = 0;
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
num_children = 1;
q = p;
while (*q != '\0') {
if (*q == ',') {
num_children++;
}
q++;
}
CU_ASSERT_FATAL(num_children >= 1);
strncpy(sub_line, p, MAX_LINE);
q = strtok(sub_line, ",");
for (k = 0; k < num_children; k++) {
CU_ASSERT_FATAL(q != NULL);
child = atoi(q);
ret_id = tsk_edge_table_add_row(
edge_table, left, right, parent, child, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
q = strtok(NULL, ",");
}
CU_ASSERT_FATAL(q == NULL);
}
}
void
parse_migrations(const char *text, tsk_migration_table_t *migration_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE];
const char *whitespace = " \t";
char *p;
double left, right, time;
int node, source, dest;
char *metadata;
c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok(line, whitespace);
CU_ASSERT_FATAL(p != NULL);
left = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
right = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
node = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
source = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
dest = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
time = atof(p);
p = strtok(NULL, whitespace);
if (p == NULL) {
metadata = "";
} else {
metadata = p;
}
ret_id = tsk_migration_table_add_row(migration_table, left, right, node, source,
dest, time, metadata, strlen(metadata));
CU_ASSERT_FATAL(ret_id >= 0);
}
}
void
parse_sites(const char *text, tsk_site_table_t *site_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE];
double position;
char ancestral_state[MAX_LINE];
const char *whitespace = " \t";
char *p;
c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok(line, whitespace);
CU_ASSERT_FATAL(p != NULL);
position = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
strncpy(ancestral_state, p, MAX_LINE);
ret_id = tsk_site_table_add_row(
site_table, position, ancestral_state, strlen(ancestral_state), NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
}
}
void
parse_mutations(const char *text, tsk_mutation_table_t *mutation_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE];
const char *whitespace = " \t";
char *p;
tsk_id_t node, site, parent;
double time;
char derived_state[MAX_LINE];
/* site, node, derived_state, [parent, time] */
c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok(line, whitespace);
site = atoi(p);
CU_ASSERT_FATAL(p != NULL);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
node = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
strncpy(derived_state, p, MAX_LINE);
parent = TSK_NULL;
p = strtok(NULL, whitespace);
if (p != NULL) {
parent = atoi(p);
}
time = TSK_UNKNOWN_TIME;
p = strtok(NULL, whitespace);
if (p != NULL) {
time = atof(p);
}
ret_id = tsk_mutation_table_add_row(mutation_table, site, node, parent, time,
derived_state, strlen(derived_state), NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
}
}
void
parse_individuals(const char *text, tsk_individual_table_t *individual_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE];
char sub_line[MAX_LINE];
const char *whitespace = " \t";
char *p, *q;
char *p_cont, *q_cont; // re-entrant pointers for strtok_r
double location[MAX_LINE];
int location_len;
tsk_id_t parents[MAX_LINE];
int parents_len;
int flags;
char *name;
c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok_r(line, whitespace, &p_cont);
CU_ASSERT_FATAL(p != NULL);
flags = atoi(p);
p = strtok_r(NULL, whitespace, &p_cont);
CU_ASSERT_FATAL(p != NULL);
// the locations are comma-separated
location_len = 1;
q = p;
while (*q != '\0') {
if (*q == ',') {
location_len++;
}
q++;
}
CU_ASSERT_FATAL(location_len >= 1);
strncpy(sub_line, p, MAX_LINE);
q = strtok_r(sub_line, ",", &q_cont);
for (k = 0; k < location_len; k++) {
CU_ASSERT_FATAL(q != NULL);
location[k] = atof(q);
q = strtok_r(NULL, ",", &q_cont);
}
CU_ASSERT_FATAL(q == NULL);
/* parents and name are optional */
p = strtok_r(NULL, whitespace, &p_cont);
parents_len = 0;
name = "";
if (p != NULL) {
// the parents are comma-separated
parents_len = 1;
q = p;
while (*q != '\0') {
if (*q == ',') {
parents_len++;
}
q++;
}
CU_ASSERT_FATAL(parents_len >= 1);
strncpy(sub_line, p, MAX_LINE);
q = strtok_r(sub_line, ",", &q_cont);
for (k = 0; k < parents_len; k++) {
CU_ASSERT_FATAL(q != NULL);
parents[k] = atoi(q);
q = strtok_r(NULL, ",", &q_cont);
}
CU_ASSERT_FATAL(q == NULL);
p = strtok_r(NULL, whitespace, &p_cont);
if (p != NULL) {
name = p;
}
}
ret_id = tsk_individual_table_add_row(individual_table, flags, location,
location_len, parents, parents_len, name, strlen(name));
CU_ASSERT_FATAL(ret_id >= 0);
}
}
void
tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nodes,
const char *edges, const char *migrations, const char *sites, const char *mutations,
const char *individuals, const char *provenance, tsk_flags_t tc_options)
{
int ret;
tsk_id_t ret_id;
tsk_table_collection_t tables;
tsk_id_t max_population_id;
tsk_size_t j;
tsk_flags_t ts_flags;
bool all_parents_null;
CU_ASSERT_FATAL(ts != NULL);
CU_ASSERT_FATAL(nodes != NULL);
CU_ASSERT_FATAL(edges != NULL);
/* Not supporting provenance here for now */
CU_ASSERT_FATAL(provenance == NULL);
ret = tsk_table_collection_init(&tables, tc_options);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = sequence_length;
parse_nodes(nodes, &tables.nodes);
parse_edges(edges, &tables.edges);
if (sites != NULL) {
parse_sites(sites, &tables.sites);
}
if (mutations != NULL) {
parse_mutations(mutations, &tables.mutations);
}
if (individuals != NULL) {
parse_individuals(individuals, &tables.individuals);
}
if (migrations != NULL) {
parse_migrations(migrations, &tables.migrations);
}
/* We need to add in populations if they are referenced */
max_population_id = -1;
for (j = 0; j < tables.nodes.num_rows; j++) {
max_population_id = TSK_MAX(max_population_id, tables.nodes.population[j]);
}
for (j = 0; j < tables.migrations.num_rows; j++) {
max_population_id = TSK_MAX(max_population_id, tables.migrations.source[j]);
max_population_id = TSK_MAX(max_population_id, tables.migrations.dest[j]);
}
if (max_population_id >= 0) {
for (j = 0; j <= (tsk_size_t) max_population_id; j++) {
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
}
/* If all mutation.parent are TSK_NULL, use TSK_TS_COMPUTE_MUTATION_PARENTS flag too
*/
ts_flags = TSK_TS_INIT_BUILD_INDEXES;
all_parents_null = true;
for (j = 0; j < tables.mutations.num_rows; j++) {
if (tables.mutations.parent[j] != TSK_NULL) {
all_parents_null = false;
break;
}
}
if (all_parents_null) {
ts_flags |= TSK_TS_INIT_COMPUTE_MUTATION_PARENTS;
}
ret = tsk_treeseq_init(ts, &tables, ts_flags);
/* tsk_treeseq_print_state(ts, stdout); */
if (ret != 0) {
printf("\nret = %s\n", tsk_strerror(ret));
}
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
}
/* Returns a tree sequence consisting of a single tree with n samples. This
* is a full example of the data model, with values included for all fields.
*/
tsk_treeseq_t *
caterpillar_tree(tsk_size_t n, tsk_size_t num_sites, tsk_size_t num_mutations)
{
int ret;
tsk_id_t ret_id;
tsk_treeseq_t *ts = tsk_malloc(sizeof(tsk_treeseq_t));
tsk_table_collection_t tables;
tsk_id_t j, k, last_node, u;
int state, m;
double position[2];
tsk_id_t parents[2] = { -1, -1 };
const char *states[] = { "0", "1" };
const char *metadata[] = { "This", "is", "some", "metadata" };
const int num_metadatas = sizeof(metadata) / sizeof(*metadata);
const char *metadata_schema = "mock metadata schema";
const char *ts_metadata = "This is a caterpillar tree";
const char *ts_metadata_schema = "The metadata is an example";
const char *prov_timestamp = "a timestamp, should be ISO8601";
const char *prov_record = "Produced by caterpillar_tree for testing purposes";
CU_ASSERT_FATAL(ts != NULL);
ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FATAL(num_sites > 0 && num_mutations < n - 1);
tables.sequence_length = 1.0;
tsk_table_collection_set_metadata(&tables, ts_metadata, strlen(ts_metadata));
tsk_table_collection_set_metadata_schema(
&tables, ts_metadata_schema, strlen(ts_metadata_schema));
tsk_reference_sequence_set_metadata_schema(
&tables.reference_sequence, ts_metadata_schema, strlen(ts_metadata_schema));
tsk_reference_sequence_set_metadata(
&tables.reference_sequence, ts_metadata, strlen(ts_metadata));
tsk_reference_sequence_set_data(&tables.reference_sequence, "A", 1);
tsk_reference_sequence_set_url(&tables.reference_sequence, "B", 1);
tsk_population_table_set_metadata_schema(
&tables.populations, metadata_schema, strlen(metadata_schema));
tsk_individual_table_set_metadata_schema(
&tables.individuals, metadata_schema, strlen(metadata_schema));
tsk_node_table_set_metadata_schema(
&tables.nodes, metadata_schema, strlen(metadata_schema));
tsk_edge_table_set_metadata_schema(
&tables.edges, metadata_schema, strlen(metadata_schema));
tsk_site_table_set_metadata_schema(
&tables.sites, metadata_schema, strlen(metadata_schema));
tsk_mutation_table_set_metadata_schema(
&tables.mutations, metadata_schema, strlen(metadata_schema));
tsk_migration_table_set_metadata_schema(
&tables.migrations, metadata_schema, strlen(metadata_schema));
for (j = 0; j < (tsk_id_t) n; j++) {
position[0] = j;
position[1] = j;
m = j % num_metadatas;
ret_id = tsk_population_table_add_row(
&tables.populations, metadata[m], strlen(metadata[m]));
CU_ASSERT_EQUAL_FATAL(ret_id, j);
ret_id = tsk_individual_table_add_row(&tables.individuals, 0, position, 2,
parents, 2, metadata[m], strlen(metadata[m]));
CU_ASSERT_EQUAL_FATAL(ret_id, j);
ret_id = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0, j, j,
metadata[m], strlen(metadata[m]));
CU_ASSERT_EQUAL_FATAL(ret_id, j);
}
last_node = 0;
for (j = 0; j < n - 1; j++) {
m = j % num_metadatas;
ret_id = tsk_node_table_add_row(
&tables.nodes, 0, j + 1, j % n, TSK_NULL, metadata[m], strlen(metadata[m]));
CU_ASSERT_FATAL(ret_id >= 0);
u = ret_id;
ret_id = tsk_edge_table_add_row(
&tables.edges, 0, 1, u, last_node, metadata[m], strlen(metadata[m]));
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_edge_table_add_row(
&tables.edges, 0, 1, u, j + 1, metadata[m], strlen(metadata[m]));
CU_ASSERT_FATAL(ret_id >= 0);
last_node = u;
}
for (j = 0; j < num_sites; j++) {
m = j % num_metadatas;
ret_id = tsk_site_table_add_row(&tables.sites, (j + 1) / (double) n, states[0],
strlen(states[0]), metadata[m], strlen(metadata[m]));
CU_ASSERT_FATAL(ret_id >= 0);
u = 2 * n - 3;
state = 0;
for (k = 0; k < num_mutations; k++) {
m = k % num_metadatas;
state = (state + 1) % 2;
ret_id = tsk_mutation_table_add_row(&tables.mutations, j, u, TSK_NULL,
tables.nodes.time[u], states[state], strlen(states[state]), metadata[m],
strlen(metadata[m]));
CU_ASSERT_FATAL(ret_id >= 0);
u--;
}
}
ret_id = tsk_provenance_table_add_row(&tables.provenances, prov_timestamp,
strlen(prov_timestamp), prov_record, strlen(prov_record));
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
/* TODO make these consistent with the caterpillar tree topology. */
for (j = 0; j < n - 1; j++) {
m = j % num_metadatas;
ret_id = tsk_migration_table_add_row(&tables.migrations, 0, 1, j, j, j + 1,
j + 1.5, metadata[m], strlen(metadata[m]));
CU_ASSERT_FATAL(ret_id >= 0);
}
ret = tsk_table_collection_sort(&tables, 0, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_build_index(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_table_collection_compute_mutation_parents(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(ts, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_table_collection_free(&tables);
return ts;
}
void
unsort_edges(tsk_edge_table_t *edges, size_t start)
{
size_t j, k;
size_t n = edges->num_rows - start;
tsk_edge_t *buff = tsk_malloc(n * sizeof(tsk_edge_t));
CU_ASSERT_FATAL(buff != NULL);
for (j = 0; j < n; j++) {
k = start + j;
buff[j].left = edges->left[k];
buff[j].right = edges->right[k];
buff[j].parent = edges->parent[k];
buff[j].child = edges->child[k];
}
for (j = 0; j < n; j++) {
k = start + j;
edges->left[k] = buff[n - j - 1].left;
edges->right[k] = buff[n - j - 1].right;
edges->parent[k] = buff[n - j - 1].parent;
edges->child[k] = buff[n - j - 1].child;
}
free(buff);
}
static int
tskit_suite_init(void)
{
int fd = -1;
static char template[] = "/tmp/tsk_c_test_XXXXXX";
_tmp_file_name = NULL;
_devnull = NULL;
_tmp_file_name = tsk_malloc(sizeof(template));
if (_tmp_file_name == NULL) {
return CUE_NOMEMORY;
}
strcpy(_tmp_file_name, template);
fd = mkstemp(_tmp_file_name);
if (fd == -1) {
return CUE_SINIT_FAILED;
}
close(fd);
_devnull = fopen("/dev/null", "w");
if (_devnull == NULL) {
return CUE_SINIT_FAILED;
}
return CUE_SUCCESS;
}
static int
tskit_suite_cleanup(void)
{
if (_tmp_file_name != NULL) {
unlink(_tmp_file_name);
free(_tmp_file_name);
}
if (_devnull != NULL) {
fclose(_devnull);
}
return CUE_SUCCESS;
}
static void
handle_cunit_error(void)
{
fprintf(stderr, "CUnit error occured: %d: %s\n", CU_get_error(), CU_get_error_msg());
exit(EXIT_FAILURE);
}
int
test_main(CU_TestInfo *tests, int argc, char **argv)
{
int ret;
CU_pTest test;
CU_pSuite suite;
CU_SuiteInfo suites[] = {
{
.pName = "tskit",
.pInitFunc = tskit_suite_init,
.pCleanupFunc = tskit_suite_cleanup,
.pTests = tests,
},
CU_SUITE_INFO_NULL,
};
if (CUE_SUCCESS != CU_initialize_registry()) {
handle_cunit_error();
}
if (CUE_SUCCESS != CU_register_suites(suites)) {
handle_cunit_error();
}
CU_basic_set_mode(CU_BRM_VERBOSE);
if (argc == 1) {
CU_basic_run_tests();
} else if (argc == 2) {
suite = CU_get_suite_by_name("tskit", CU_get_registry());
if (suite == NULL) {
printf("Suite not found\n");
return EXIT_FAILURE;
}
test = CU_get_test_by_name(argv[1], suite);
if (test == NULL) {
printf("Test '%s' not found\n", argv[1]);
return EXIT_FAILURE;
}
CU_basic_run_test(suite, test);
} else {
printf("usage: %s \n", argv[0]);
return EXIT_FAILURE;
}
ret = EXIT_SUCCESS;
if (CU_get_number_of_tests_failed() != 0) {
printf("Test failed!\n");
ret = EXIT_FAILURE;
}
CU_cleanup_registry();
return ret;
}
================================================
FILE: c/tests/testlib.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef __TESTLIB_H__
#define __TESTLIB_H__
#define _GNU_SOURCE
#include
#include
#include
#include
#include
/* Global variables used in the test suite */
extern char *_tmp_file_name;
extern FILE *_devnull;
int test_main(CU_TestInfo *tests, int argc, char **argv);
void tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nodes,
const char *edges, const char *migrations, const char *sites, const char *mutations,
const char *individuals, const char *provenance, tsk_flags_t tc_options);
tsk_treeseq_t *caterpillar_tree(
tsk_size_t num_samples, tsk_size_t num_sites, tsk_size_t num_mutations);
void parse_nodes(const char *text, tsk_node_table_t *node_table);
void parse_edges(const char *text, tsk_edge_table_t *edge_table);
void parse_sites(const char *text, tsk_site_table_t *site_table);
void parse_mutations(const char *text, tsk_mutation_table_t *mutation_table);
void parse_individuals(const char *text, tsk_individual_table_t *individual_table);
void unsort_edges(tsk_edge_table_t *edges, size_t start);
/* Use a macro so we can get line numbers at roughly the right place */
#define assert_arrays_almost_equal(len, a, b) \
{ \
do { \
tsk_size_t _j; \
for (_j = 0; _j < len; _j++) { \
CU_ASSERT_DOUBLE_EQUAL(a[_j], b[_j], 1e-9); \
} \
} while (0); \
}
#define assert_arrays_equal(len, a, b) \
{ \
do { \
tsk_size_t _j; \
for (_j = 0; _j < len; _j++) { \
CU_ASSERT_EQUAL(a[_j], b[_j]); \
} \
} while (0); \
}
/* Array equality if the arrays contain NaN values
NB: the float cast for NaNs is for mingw, which complains without */
#define assert_arrays_almost_equal_nan(len, a, b) \
{ \
do { \
tsk_size_t _j; \
for (_j = 0; _j < len; _j++) { \
if (isnan((float) a[_j]) || isnan((float) b[_j])) { \
CU_ASSERT_EQUAL_FATAL(isnan((float) a[_j]), isnan((float) b[_j])); \
} else { \
CU_ASSERT_DOUBLE_EQUAL(a[_j], b[_j], 1e-9); \
} \
} \
} while (0); \
}
extern const char *single_tree_ex_nodes;
extern const char *single_tree_ex_edges;
extern const char *single_tree_ex_sites;
extern const char *single_tree_ex_mutations;
extern const char *multiple_tree_ex_nodes;
extern const char *multiple_tree_ex_edges;
extern const char *odd_tree1_ex_nodes;
extern const char *odd_tree1_ex_edges;
extern const char *multi_root_tree_ex_nodes;
extern const char *multi_root_tree_ex_edges;
extern const char *multi_path_tree_ex_nodes;
extern const char *multi_path_tree_ex_edges;
extern const char *nonbinary_ex_nodes;
extern const char *nonbinary_ex_edges;
extern const char *nonbinary_ex_sites;
extern const char *nonbinary_ex_mutations;
extern const char *unary_ex_nodes;
extern const char *unary_ex_edges;
extern const char *unary_ex_sites;
extern const char *unary_ex_mutations;
extern const char *internal_sample_ex_nodes;
extern const char *internal_sample_ex_edges;
extern const char *internal_sample_ex_sites;
extern const char *internal_sample_ex_mutations;
extern const char *multiroot_ex_nodes;
extern const char *multiroot_ex_edges;
extern const char *multiroot_ex_sites;
extern const char *multiroot_ex_mutations;
extern const char *empty_ex_nodes;
extern const char *empty_ex_edges;
extern const char *paper_ex_nodes;
extern const char *paper_ex_edges;
extern const char *paper_ex_sites;
extern const char *paper_ex_mutations;
extern const char *paper_ex_individuals;
extern const char *missing_ex_nodes;
extern const char *missing_ex_edges;
#endif
================================================
FILE: c/tskit/convert.c
================================================
/*
* MIT License
*
* Copyright (c) 2018-2025 Tskit Developers
* Copyright (c) 2015-2017 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
/* ======================================================== *
* Newick output.
* ======================================================== */
/* This infrastructure is left-over from an earlier more complex version
* of this algorithm that worked over a tree sequence and cached the newick
* subtrees, updating according to diffs. It's unclear whether this complexity
* was of any real-world use, since newick output for large trees is pretty
* pointless. */
typedef struct {
unsigned int precision;
tsk_flags_t options;
char *newick;
tsk_id_t *traversal_stack;
const tsk_tree_t *tree;
} tsk_newick_converter_t;
static int
tsk_newick_converter_run(
tsk_newick_converter_t *self, tsk_id_t root, size_t buffer_size, char *buffer)
{
int ret = TSK_ERR_GENERIC;
const tsk_tree_t *tree = self->tree;
tsk_id_t *stack = self->traversal_stack;
const double *time = self->tree->tree_sequence->tables->nodes.time;
const tsk_flags_t *flags = self->tree->tree_sequence->tables->nodes.flags;
int stack_top = 0;
int label;
size_t s = 0;
int r;
tsk_id_t u, v, w, root_parent;
double branch_length;
bool ms_labels = self->options & TSK_NEWICK_LEGACY_MS_LABELS;
const char *label_format = ms_labels ? "%d" : "n%d";
if (root < 0 || root >= (tsk_id_t) self->tree->num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (buffer == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
root_parent = tree->parent[root];
stack[0] = root;
u = root_parent;
while (stack_top >= 0) {
v = stack[stack_top];
if (tree->left_child[v] != TSK_NULL && v != u) {
if (s >= buffer_size) {
ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);
goto out;
}
buffer[s] = '(';
s++;
for (w = tree->right_child[v]; w != TSK_NULL; w = tree->left_sib[w]) {
stack_top++;
stack[stack_top] = w;
}
} else {
u = tree->parent[v];
stack_top--;
label = -1;
if (ms_labels) {
if (tree->left_child[v] == TSK_NULL) {
label = (int) v + 1;
}
} else if (flags[v] & TSK_NODE_IS_SAMPLE) {
label = (int) v;
}
if (label != -1) {
if (s >= buffer_size) {
ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);
goto out;
}
r = snprintf(buffer + s, buffer_size - s, label_format, label);
if (r < 0) {
ret = tsk_trace_error(TSK_ERR_IO);
goto out;
}
s += (size_t) r;
if (s >= buffer_size) {
ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);
goto out;
}
}
if (u != root_parent) {
branch_length = (time[u] - time[v]);
r = snprintf(buffer + s, buffer_size - s, ":%.*f", (int) self->precision,
branch_length);
if (r < 0) {
ret = tsk_trace_error(TSK_ERR_IO);
goto out;
}
s += (size_t) r;
if (s >= buffer_size) {
ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);
goto out;
}
if (v == tree->right_child[u]) {
buffer[s] = ')';
} else {
buffer[s] = ',';
}
s++;
}
}
}
if ((s + 1) >= buffer_size) {
ret = tsk_trace_error(TSK_ERR_BUFFER_OVERFLOW);
goto out;
}
buffer[s] = ';';
buffer[s + 1] = '\0';
ret = 0;
out:
return ret;
}
static int
tsk_newick_converter_init(tsk_newick_converter_t *self, const tsk_tree_t *tree,
unsigned int precision, tsk_flags_t options)
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_newick_converter_t));
self->precision = precision;
self->options = options;
self->tree = tree;
self->traversal_stack
= tsk_malloc(tsk_tree_get_size_bound(tree) * sizeof(*self->traversal_stack));
if (self->traversal_stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
static int
tsk_newick_converter_free(tsk_newick_converter_t *self)
{
tsk_safe_free(self->traversal_stack);
return 0;
}
int
tsk_convert_newick(const tsk_tree_t *tree, tsk_id_t root, unsigned int precision,
tsk_flags_t options, size_t buffer_size, char *buffer)
{
int ret = 0;
tsk_newick_converter_t nc;
ret = tsk_newick_converter_init(&nc, tree, precision, options);
if (ret != 0) {
goto out;
}
ret = tsk_newick_converter_run(&nc, root, buffer_size, buffer);
out:
tsk_newick_converter_free(&nc);
return ret;
}
================================================
FILE: c/tskit/convert.h
================================================
/*
* MIT License
*
* Copyright (c) 2018-2021 Tskit Developers
* Copyright (c) 2015-2017 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSK_CONVERT_H
#define TSK_CONVERT_H
#ifdef __cplusplus
extern "C" {
#endif
#include
#define TSK_NEWICK_LEGACY_MS_LABELS (1 << 0)
int tsk_convert_newick(const tsk_tree_t *tree, tsk_id_t root, unsigned int precision,
tsk_flags_t options, size_t buffer_size, char *buffer);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit/core.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2025 Tskit Developers
* Copyright (c) 2015-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
#include
#define UUID_NUM_BYTES 16
#if defined(_WIN32)
#include
#include
static int TSK_WARN_UNUSED
get_random_bytes(uint8_t *buf)
{
/* Based on CPython's code in bootstrap_hash.c */
int ret = 0;
HCRYPTPROV hCryptProv = (HCRYPTPROV) NULL;
if (!CryptAcquireContext(
&hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) {
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
if (!CryptGenRandom(hCryptProv, (DWORD) UUID_NUM_BYTES, buf)) {
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
if (!CryptReleaseContext(hCryptProv, 0)) {
hCryptProv = (HCRYPTPROV) NULL;
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
hCryptProv = (HCRYPTPROV) NULL;
out:
if (hCryptProv != (HCRYPTPROV) NULL) {
CryptReleaseContext(hCryptProv, 0);
}
return ret;
}
#else
/* Assuming the existance of /dev/urandom on Unix platforms */
static int TSK_WARN_UNUSED
get_random_bytes(uint8_t *buf)
{
int ret = 0;
FILE *f = fopen("/dev/urandom", "r");
if (f == NULL) {
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
if (fread(buf, UUID_NUM_BYTES, 1, f) != 1) {
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
if (fclose(f) != 0) {
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
out:
return ret;
}
#endif
/* Generate a new UUID4 using a system-generated source of randomness.
* Note that this function writes a NULL terminator to the end of this
* string, so that the total length of the buffer must be 37 bytes.
*/
int
tsk_generate_uuid(char *dest, int TSK_UNUSED(flags))
{
int ret = 0;
uint8_t buf[UUID_NUM_BYTES];
const char *pattern
= "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x";
ret = get_random_bytes(buf);
if (ret != 0) {
goto out;
}
if (snprintf(dest, TSK_UUID_SIZE + 1, pattern, buf[0], buf[1], buf[2], buf[3],
buf[4], buf[5], buf[6], buf[7], buf[8], buf[9], buf[10], buf[11], buf[12],
buf[13], buf[14], buf[15])
< 0) {
ret = tsk_trace_error(TSK_ERR_GENERATE_UUID);
goto out;
}
out:
return ret;
}
static const char *
tsk_strerror_internal(int err)
{
const char *ret = "Unknown error";
switch (err) {
case 0:
ret = "Normal exit condition. This is not an error!";
break;
/* General errors */
case TSK_ERR_GENERIC:
ret = "Generic error; please file a bug report. (TSK_ERR_GENERIC)";
break;
case TSK_ERR_NO_MEMORY:
ret = "Out of memory. (TSK_ERR_NO_MEMORY)";
break;
case TSK_ERR_IO:
if (errno != 0) {
ret = strerror(errno);
} else {
ret = "Unspecified IO error";
}
break;
case TSK_ERR_BAD_PARAM_VALUE:
ret = "Bad parameter value provided. (TSK_ERR_BAD_PARAM_VALUE)";
break;
case TSK_ERR_BUFFER_OVERFLOW:
ret = "Supplied buffer is too small. (TSK_ERR_BUFFER_OVERFLOW)";
break;
case TSK_ERR_UNSUPPORTED_OPERATION:
ret = "Operation cannot be performed in current configuration. "
"(TSK_ERR_UNSUPPORTED_OPERATION)";
break;
case TSK_ERR_GENERATE_UUID:
ret = "Error generating UUID. (TSK_ERR_GENERATE_UUID)";
break;
case TSK_ERR_EOF:
ret = "End of file. (TSK_ERR_EOF)";
break;
/* File format errors */
case TSK_ERR_FILE_FORMAT:
ret = "File format error. (TSK_ERR_FILE_FORMAT)";
break;
case TSK_ERR_FILE_VERSION_TOO_OLD:
ret = "tskit file version too old. Please upgrade using the "
"'tskit upgrade' command from tskit version<0.6.2. "
"(TSK_ERR_FILE_VERSION_TOO_OLD)";
break;
case TSK_ERR_FILE_VERSION_TOO_NEW:
ret = "tskit file version is too new for this instance. "
"Please upgrade tskit to the latest version. "
"(TSK_ERR_FILE_VERSION_TOO_NEW)";
break;
case TSK_ERR_REQUIRED_COL_NOT_FOUND:
ret = "A required column was not found in the file. "
"(TSK_ERR_REQUIRED_COL_NOT_FOUND)";
break;
case TSK_ERR_BOTH_COLUMNS_REQUIRED:
ret = "Both columns in a related pair must be provided. "
"(TSK_ERR_BOTH_COLUMNS_REQUIRED)";
break;
case TSK_ERR_BAD_COLUMN_TYPE:
ret = "An incompatible type for a column was found in the file. "
"(TSK_ERR_BAD_COLUMN_TYPE)";
break;
/* Out of bounds errors */
case TSK_ERR_BAD_OFFSET:
ret = "Bad offset provided in input array. (TSK_ERR_BAD_OFFSET)";
break;
case TSK_ERR_NODE_OUT_OF_BOUNDS:
ret = "Node out of bounds. (TSK_ERR_NODE_OUT_OF_BOUNDS)";
break;
case TSK_ERR_EDGE_OUT_OF_BOUNDS:
ret = "Edge out of bounds. (TSK_ERR_EDGE_OUT_OF_BOUNDS)";
break;
case TSK_ERR_POPULATION_OUT_OF_BOUNDS:
ret = "Population out of bounds. (TSK_ERR_POPULATION_OUT_OF_BOUNDS)";
break;
case TSK_ERR_SITE_OUT_OF_BOUNDS:
ret = "Site out of bounds. (TSK_ERR_SITE_OUT_OF_BOUNDS)";
break;
case TSK_ERR_MUTATION_OUT_OF_BOUNDS:
ret = "Mutation out of bounds. (TSK_ERR_MUTATION_OUT_OF_BOUNDS)";
break;
case TSK_ERR_MIGRATION_OUT_OF_BOUNDS:
ret = "Migration out of bounds. (TSK_ERR_MIGRATION_OUT_OF_BOUNDS)";
break;
case TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS:
ret = "Individual out of bounds. (TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS)";
break;
case TSK_ERR_PROVENANCE_OUT_OF_BOUNDS:
ret = "Provenance out of bounds. (TSK_ERR_PROVENANCE_OUT_OF_BOUNDS)";
break;
case TSK_ERR_TIME_NONFINITE:
ret = "Times must be finite. (TSK_ERR_TIME_NONFINITE)";
break;
case TSK_ERR_GENOME_COORDS_NONFINITE:
ret = "Genome coordinates must be finite numbers. "
"(TSK_ERR_GENOME_COORDS_NONFINITE)";
break;
case TSK_ERR_SEEK_OUT_OF_BOUNDS:
ret = "Tree seek position out of bounds. (TSK_ERR_SEEK_OUT_OF_BOUNDS)";
break;
case TSK_ERR_KEEP_ROWS_MAP_TO_DELETED:
ret = "One of the kept rows in the table refers to a deleted row. "
"(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED)";
break;
case TSK_ERR_POSITION_OUT_OF_BOUNDS:
ret = "Position out of bounds. (TSK_ERR_POSITION_OUT_OF_BOUNDS)";
break;
/* Edge errors */
case TSK_ERR_NULL_PARENT:
ret = "Edge parent is null. (TSK_ERR_NULL_PARENT)";
break;
case TSK_ERR_NULL_CHILD:
ret = "Edge child is null. (TSK_ERR_NULL_CHILD)";
break;
case TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME:
ret = "Edges must be listed in (time[parent], child, left) order;"
" time[parent] order violated. (TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME)";
break;
case TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS:
ret = "All edges for a given parent must be contiguous. "
"(TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS)";
break;
case TSK_ERR_EDGES_NOT_SORTED_CHILD:
ret = "Edges must be listed in (time[parent], child, left) order;"
" child order violated. (TSK_ERR_EDGES_NOT_SORTED_CHILD)";
break;
case TSK_ERR_EDGES_NOT_SORTED_LEFT:
ret = "Edges must be listed in (time[parent], child, left) order;"
" left order violated. (TSK_ERR_EDGES_NOT_SORTED_LEFT)";
break;
case TSK_ERR_BAD_NODE_TIME_ORDERING:
ret = "time[parent] must be greater than time[child]. "
"(TSK_ERR_BAD_NODE_TIME_ORDERING)";
break;
case TSK_ERR_BAD_EDGE_INTERVAL:
ret = "Bad edge interval where right <= left. (TSK_ERR_BAD_EDGE_INTERVAL)";
break;
case TSK_ERR_DUPLICATE_EDGES:
ret = "Duplicate edges provided. (TSK_ERR_DUPLICATE_EDGES)";
break;
case TSK_ERR_RIGHT_GREATER_SEQ_LENGTH:
ret = "Right coordinate > sequence length. "
"(TSK_ERR_RIGHT_GREATER_SEQ_LENGTH)";
break;
case TSK_ERR_LEFT_LESS_ZERO:
ret = "Left coordinate must be >= 0. (TSK_ERR_LEFT_LESS_ZERO)";
break;
case TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN:
ret = "Bad edges: contradictory children for a given parent over "
"an interval, or indexes need to be rebuilt. "
"(TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN)";
break;
case TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA:
ret = "Can't squash, flush, simplify or link ancestors with edges that have "
"non-empty metadata. Removing the metadata from the edges will allow "
"these operations to proceed. For example using "
"tables.edges.drop_metadata() in the tskit Python API. "
"(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA)";
break;
/* Site errors */
case TSK_ERR_UNSORTED_SITES:
ret = "Sites must be provided in strictly increasing position order. "
"(TSK_ERR_UNSORTED_SITES)";
break;
case TSK_ERR_DUPLICATE_SITE_POSITION:
ret = "Duplicate site positions. (TSK_ERR_DUPLICATE_SITE_POSITION)";
break;
case TSK_ERR_BAD_SITE_POSITION:
ret = "Site positions must be between 0 and sequence_length. "
"(TSK_ERR_BAD_SITE_POSITION)";
break;
/* Mutation errors */
case TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE:
ret = "Specified parent mutation is at a different site. "
"(TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE)";
break;
case TSK_ERR_MUTATION_PARENT_EQUAL:
ret = "Parent mutation refers to itself. (TSK_ERR_MUTATION_PARENT_EQUAL)";
break;
case TSK_ERR_MUTATION_PARENT_AFTER_CHILD:
ret = "Parent mutation ID must be < current ID. "
"(TSK_ERR_MUTATION_PARENT_AFTER_CHILD)";
break;
case TSK_ERR_MUTATION_PARENT_INCONSISTENT:
ret = "Mutation parent references form a loop. "
"(TSK_ERR_MUTATION_PARENT_INCONSISTENT)";
break;
case TSK_ERR_UNSORTED_MUTATIONS:
ret = "Mutations must be provided in non-decreasing site order and "
"non-increasing time order within each site. "
"(TSK_ERR_UNSORTED_MUTATIONS)";
break;
case TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE:
ret = "A mutation's time must be >= the node time, or be marked as "
"'unknown'. (TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE)";
break;
case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION:
ret = "A mutation's time must be <= the parent mutation time (if known), or "
"be marked as 'unknown'. "
"(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION)";
break;
case TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE:
ret = "A mutation's time must be < the parent node of the edge on which it "
"occurs, or be marked as 'unknown'. "
"(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE)";
break;
case TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN:
ret = "Mutation times must either be all marked 'unknown', or all be known "
"values for any single site. "
"(TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN)";
break;
case TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME:
ret = "Some mutation times are marked 'unknown' for a method that requires "
"no unknown times. (Use compute_mutation_times to add times?) "
"(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME)";
break;
case TSK_ERR_BAD_MUTATION_PARENT:
ret = "A mutation's parent is not consistent with the topology of the tree. "
"Use compute_mutation_parents to set the parents correctly."
"(TSK_ERR_BAD_MUTATION_PARENT)";
break;
/* Migration errors */
case TSK_ERR_UNSORTED_MIGRATIONS:
ret = "Migrations must be sorted by time. (TSK_ERR_UNSORTED_MIGRATIONS)";
break;
/* Sample errors */
case TSK_ERR_DUPLICATE_SAMPLE:
ret = "Duplicate sample value. (TSK_ERR_DUPLICATE_SAMPLE)";
break;
case TSK_ERR_BAD_SAMPLES:
ret = "The nodes provided are not samples. (TSK_ERR_BAD_SAMPLES)";
break;
/* Table errors */
case TSK_ERR_BAD_TABLE_POSITION:
ret = "Bad table position provided to truncate/reset. "
"(TSK_ERR_BAD_TABLE_POSITION)";
break;
case TSK_ERR_BAD_SEQUENCE_LENGTH:
ret = "Sequence length must be > 0. (TSK_ERR_BAD_SEQUENCE_LENGTH)";
break;
case TSK_ERR_TABLES_NOT_INDEXED:
ret = "Table collection must be indexed. (TSK_ERR_TABLES_NOT_INDEXED)";
break;
case TSK_ERR_TABLES_BAD_INDEXES:
ret = "Table collection indexes inconsistent: do they need to be rebuilt? "
"(TSK_ERR_TABLES_BAD_INDEXES)";
break;
case TSK_ERR_TABLE_OVERFLOW:
ret = "Table too large; cannot allocate more than 2**31 rows. This error "
"is often caused by a lack of simplification when simulating. "
"(TSK_ERR_TABLE_OVERFLOW)";
break;
case TSK_ERR_COLUMN_OVERFLOW:
ret = "Table column too large; cannot be more than 2**64 bytes. "
"(TSK_ERR_COLUMN_OVERFLOW)";
break;
case TSK_ERR_TREE_OVERFLOW:
ret = "Too many trees; cannot be more than 2**31. (TSK_ERR_TREE_OVERFLOW)";
break;
case TSK_ERR_METADATA_DISABLED:
ret = "Metadata is disabled for this table, so cannot be set. "
"(TSK_ERR_METADATA_DISABLED)";
break;
/* Limitations */
case TSK_ERR_ONLY_INFINITE_SITES:
ret = "Only infinite sites mutations are supported for this operation, "
"i.e. at most a single mutation per site. "
"(TSK_ERR_ONLY_INFINITE_SITES)";
break;
case TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED:
ret = "Migrations not currently supported by simplify. "
"(TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED)";
break;
case TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED:
ret = "Migrations not currently supported by sort. "
"(TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED)";
break;
case TSK_ERR_SORT_OFFSET_NOT_SUPPORTED:
ret = "Sort offsets for sites and mutations must be either 0 "
"or the length of the respective tables. Intermediate values "
"are not supported. (TSK_ERR_SORT_OFFSET_NOT_SUPPORTED)";
break;
case TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED:
ret = "Only binary mutations are supported for this operation. "
"(TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED)";
break;
case TSK_ERR_MIGRATIONS_NOT_SUPPORTED:
ret = "Migrations not currently supported by this operation. "
"(TSK_ERR_MIGRATIONS_NOT_SUPPORTED)";
break;
case TSK_ERR_CANNOT_EXTEND_FROM_SELF:
ret = "Tables can only be extended using rows from a different table. "
"(TSK_ERR_CANNOT_EXTEND_FROM_SELF)";
break;
case TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED:
ret = "Silent mutations not supported by this operation. "
"(TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED)";
break;
case TSK_ERR_VARIANT_CANT_DECODE_COPY:
ret = "Can't decode a copy of a variant. (TSK_ERR_VARIANT_CANT_DECODE_COPY)";
break;
case TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA:
ret = "A tree sequence can't take ownership of tables with "
"TSK_NO_EDGE_METADATA. (TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA)";
break;
case TSK_ERR_UNDEFINED_NONBINARY:
ret = "Operation undefined for nonbinary trees. "
"(TSK_ERR_UNDEFINED_NONBINARY)";
break;
case TSK_ERR_UNDEFINED_MULTIROOT:
ret = "Operation undefined for trees that are not singly-rooted. "
"(TSK_ERR_UNDEFINED_MULTIROOT)";
break;
/* Stats errors */
case TSK_ERR_BAD_NUM_WINDOWS:
ret = "Must have at least one window, [0, L]. (TSK_ERR_BAD_NUM_WINDOWS)";
break;
case TSK_ERR_BAD_WINDOWS:
ret = "Windows must be increasing list [0, ..., L]. (TSK_ERR_BAD_WINDOWS)";
break;
case TSK_ERR_MULTIPLE_STAT_MODES:
ret = "Cannot specify more than one stats mode. "
"(TSK_ERR_MULTIPLE_STAT_MODES)";
break;
case TSK_ERR_BAD_STATE_DIMS:
ret = "Must have state dimension >= 1. (TSK_ERR_BAD_STATE_DIMS)";
break;
case TSK_ERR_BAD_RESULT_DIMS:
ret = "Must have result dimension >= 1. (TSK_ERR_BAD_RESULT_DIMS)";
break;
case TSK_ERR_INSUFFICIENT_SAMPLE_SETS:
ret = "Insufficient sample sets provided. "
"(TSK_ERR_INSUFFICIENT_SAMPLE_SETS)";
break;
case TSK_ERR_INSUFFICIENT_INDEX_TUPLES:
ret = "Insufficient sample set index tuples provided. "
"(TSK_ERR_INSUFFICIENT_INDEX_TUPLES)";
break;
case TSK_ERR_BAD_SAMPLE_SET_INDEX:
ret = "Sample set index out of bounds. (TSK_ERR_BAD_SAMPLE_SET_INDEX)";
break;
case TSK_ERR_EMPTY_SAMPLE_SET:
ret = "Samples cannot be empty. (TSK_ERR_EMPTY_SAMPLE_SET)";
break;
case TSK_ERR_UNSUPPORTED_STAT_MODE:
ret = "Requested statistics mode not supported for this method. "
"(TSK_ERR_UNSUPPORTED_STAT_MODE)";
break;
case TSK_ERR_TIME_UNCALIBRATED:
ret = "Statistics using branch lengths cannot be calculated when time_units "
"is 'uncalibrated'. (TSK_ERR_TIME_UNCALIBRATED)";
break;
case TSK_ERR_STAT_POLARISED_UNSUPPORTED:
ret = "The TSK_STAT_POLARISED option is not supported by this statistic. "
"(TSK_ERR_STAT_POLARISED_UNSUPPORTED)";
break;
case TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED:
ret = "The TSK_STAT_SPAN_NORMALISE option is not supported by this "
"statistic. "
"(TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED)";
break;
case TSK_ERR_INSUFFICIENT_WEIGHTS:
ret = "Insufficient weights provided (at least 1 required). "
"(TSK_ERR_INSUFFICIENT_WEIGHTS)";
break;
/* Pair coalescence errors */
case TSK_ERR_BAD_NODE_BIN_MAP:
ret = "Node-to-bin map contains values less than TSK_NULL. "
"(TSK_ERR_BAD_NODE_BIN_MAP)";
break;
case TSK_ERR_BAD_NODE_BIN_MAP_DIM:
ret = "Maximum index in node-to-bin map is greater than the "
"output dimension. (TSK_ERR_BAD_NODE_BIN_MAP_DIM)";
break;
case TSK_ERR_BAD_QUANTILES:
ret = "Quantiles must be between 0 and 1 (inclusive) "
"and strictly increasing. (TSK_ERR_BAD_QUANTILES)";
break;
case TSK_ERR_UNSORTED_TIMES:
ret = "Times must be strictly increasing. (TSK_ERR_UNSORTED_TIMES)";
break;
case TSK_ERR_BAD_TIME_WINDOWS_DIM:
ret = "Must have at least one time window. (TSK_ERR_BAD_TIME_WINDOWS_DIM)";
break;
case TSK_ERR_BAD_SAMPLE_PAIR_TIMES:
ret = "All sample times must be equal to the start of first time window. "
"(TSK_ERR_BAD_SAMPLE_PAIR_TIMES)";
break;
case TSK_ERR_BAD_TIME_WINDOWS:
ret = "Time windows must start at zero and be strictly increasing. "
"(TSK_ERR_BAD_TIME_WINDOWS)";
break;
case TSK_ERR_BAD_TIME_WINDOWS_END:
ret = "Time windows must end at infinity for this method. "
"(TSK_ERR_BAD_TIME_WINDOWS_END)";
break;
case TSK_ERR_BAD_NODE_TIME_WINDOW:
ret = "Node time does not fall within assigned time window. "
"(TSK_ERR_BAD_NODE_TIME_WINDOW)";
break;
/* Two locus errors */
case TSK_ERR_STAT_UNSORTED_POSITIONS:
ret = "The provided positions are not sorted in strictly increasing "
"order. (TSK_ERR_STAT_UNSORTED_POSITIONS)";
break;
case TSK_ERR_STAT_DUPLICATE_POSITIONS:
ret = "The provided positions contain duplicates. "
"(TSK_ERR_STAT_DUPLICATE_POSITIONS)";
break;
case TSK_ERR_STAT_UNSORTED_SITES:
ret = "The provided sites are not sorted in strictly increasing position "
"order. (TSK_ERR_STAT_UNSORTED_SITES)";
break;
case TSK_ERR_STAT_DUPLICATE_SITES:
ret = "The provided sites contain duplicated entries. "
"(TSK_ERR_STAT_DUPLICATE_SITES)";
break;
/* Mutation mapping errors */
case TSK_ERR_GENOTYPES_ALL_MISSING:
ret = "Must provide at least one non-missing genotype. "
"(TSK_ERR_GENOTYPES_ALL_MISSING)";
break;
case TSK_ERR_BAD_GENOTYPE:
ret = "Bad genotype value provided. (TSK_ERR_BAD_GENOTYPE)";
break;
case TSK_ERR_BAD_ANCESTRAL_STATE:
ret = "Bad ancestral state specified. (TSK_ERR_BAD_ANCESTRAL_STATE)";
break;
/* Genotype decoding errors */
case TSK_ERR_MUST_IMPUTE_NON_SAMPLES:
ret = "Cannot generate genotypes for non-samples when isolated nodes are "
"considered as missing. (TSK_ERR_MUST_IMPUTE_NON_SAMPLES)";
break;
case TSK_ERR_ALLELE_NOT_FOUND:
ret = "An allele was not found in the user-specified allele map. "
"(TSK_ERR_ALLELE_NOT_FOUND)";
break;
case TSK_ERR_TOO_MANY_ALLELES:
ret = "Cannot have more than 2147483647 alleles (TSK_ERR_TOO_MANY_ALLELES)";
break;
case TSK_ERR_ZERO_ALLELES:
ret = "Must have at least one allele when specifying an allele map. "
"(TSK_ERR_ZERO_ALLELES)";
break;
case TSK_ERR_BAD_ALLELE_LENGTH:
ret = "Alleles used when decoding alignments must have length one. "
"(TSK_ERR_BAD_ALLELE_LENGTH)";
break;
case TSK_ERR_MISSING_CHAR_COLLISION:
ret = "Alleles used when decoding alignments must not match the missing "
"data character. (TSK_ERR_MISSING_CHAR_COLLISION)";
break;
/* Distance metric errors */
case TSK_ERR_SAMPLE_SIZE_MISMATCH:
ret = "Cannot compare trees with different numbers of samples. "
"(TSK_ERR_SAMPLE_SIZE_MISMATCH)";
break;
case TSK_ERR_SAMPLES_NOT_EQUAL:
ret = "Samples must be identical in trees to compare. "
"(TSK_ERR_SAMPLES_NOT_EQUAL)";
break;
case TSK_ERR_MULTIPLE_ROOTS:
ret = "Trees with multiple roots not supported. (TSK_ERR_MULTIPLE_ROOTS)";
break;
case TSK_ERR_UNARY_NODES:
ret = "Unsimplified trees with unary nodes are not supported. "
"(TSK_ERR_UNARY_NODES)";
break;
case TSK_ERR_SEQUENCE_LENGTH_MISMATCH:
ret = "Sequence lengths must be identical to compare. "
"(TSK_ERR_SEQUENCE_LENGTH_MISMATCH)";
break;
case TSK_ERR_NO_SAMPLE_LISTS:
ret = "The sample_lists option must be enabled on the tree to perform this "
"operation. Pass the option to the constructor or method that created "
"the tree. (TSK_ERR_NO_SAMPLE_LISTS)";
break;
/* Haplotype matching errors */
case TSK_ERR_NULL_VITERBI_MATRIX:
ret = "Viterbi matrix has not filled. (TSK_ERR_NULL_VITERBI_MATRIX)";
break;
case TSK_ERR_MATCH_IMPOSSIBLE:
ret = "No matching haplotype exists with current parameters. "
"(TSK_ERR_MATCH_IMPOSSIBLE)";
break;
case TSK_ERR_BAD_COMPRESSED_MATRIX_NODE:
ret = "The compressed matrix contains a node that subtends no samples. "
"(TSK_ERR_BAD_COMPRESSED_MATRIX_NODE)";
break;
case TSK_ERR_TOO_MANY_VALUES:
ret = "Too many values to compress. (TSK_ERR_TOO_MANY_VALUES)";
break;
/* Union errors */
case TSK_ERR_UNION_BAD_MAP:
ret = "Node map contains an entry of a node not present in this table "
"collection. (TSK_ERR_UNION_BAD_MAP)";
break;
case TSK_ERR_UNION_DIFF_HISTORIES:
// histories could be equivalent, because subset does not reorder
// edges (if not sorted) or mutations.
ret = "Shared portions of the tree sequences are not equal. "
"(TSK_ERR_UNION_DIFF_HISTORIES)";
break;
/* IBD errors */
case TSK_ERR_SAME_NODES_IN_PAIR:
ret = "Both nodes in the sample pair are the same. "
"(TSK_ERR_SAME_NODES_IN_PAIR)";
break;
case TSK_ERR_IBD_PAIRS_NOT_STORED:
ret = "The sample pairs are not stored by default in ibd_segments. Please "
"add the TSK_IBD_STORE_PAIRS option flag if per-pair statistics are "
"required. (TSK_ERR_IBD_PAIRS_NOT_STORED)";
break;
case TSK_ERR_IBD_SEGMENTS_NOT_STORED:
ret = "All segments are not stored by default in ibd_segments. Please "
"add the TSK_IBD_STORE_SEGMENTS option flag if they are required. "
"(TSK_ERR_IBD_SEGMENTS_NOT_STORED)";
break;
/* Simplify errors */
case TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE:
ret = "You cannot specify both TSK_SIMPLIFY_KEEP_UNARY and "
"TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVDUALS. "
"(TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE)";
break;
/* Individual errors */
case TSK_ERR_UNSORTED_INDIVIDUALS:
ret = "Individuals must be provided in an order where children are after "
"their parent individuals (TSK_ERR_UNSORTED_INDIVIDUALS)";
break;
case TSK_ERR_INDIVIDUAL_SELF_PARENT:
ret = "Individuals cannot be their own parents. "
"(TSK_ERR_INDIVIDUAL_SELF_PARENT)";
break;
case TSK_ERR_INDIVIDUAL_PARENT_CYCLE:
ret = "Individuals cannot be their own ancestor. "
"(TSK_ERR_INDIVIDUAL_PARENT_CYCLE)";
break;
case TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH:
ret = "Individual populations cannot be returned "
"if an individual has nodes from more than one population. "
"(TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH)";
break;
case TSK_ERR_INDIVIDUAL_TIME_MISMATCH:
ret = "Individual times cannot be returned "
"if an individual has nodes from more than one time. "
"(TSK_ERR_INDIVIDUAL_TIME_MISMATCH)";
break;
case TSK_ERR_EXTEND_EDGES_BAD_MAXITER:
ret = "Maximum number of iterations must be positive. "
"(TSK_ERR_EXTEND_EDGES_BAD_MAXITER)";
break;
}
return ret;
}
int
tsk_set_kas_error(int err)
{
if (err == KAS_ERR_IO) {
/* If we've detected an IO error, report it as TSK_ERR_IO so that we have
* a consistent error code covering these situtations */
return TSK_ERR_IO;
} else {
/* Flip this bit. As the error is negative, this sets the bit to 0 */
return err ^ (1 << TSK_KAS_ERR_BIT);
}
}
bool
tsk_is_kas_error(int err)
{
return !(err & (1 << TSK_KAS_ERR_BIT));
}
int
tsk_get_kas_error(int err)
{
return err ^ (1 << TSK_KAS_ERR_BIT);
}
const char *
tsk_strerror(int err)
{
if (err != 0 && tsk_is_kas_error(err)) {
return kas_strerror(tsk_get_kas_error(err));
} else {
return tsk_strerror_internal(err);
}
}
void
__tsk_safe_free(void **ptr)
{
if (ptr != NULL) {
if (*ptr != NULL) {
free(*ptr);
*ptr = NULL;
}
}
}
/* Block allocator. Simple allocator when we lots of chunks of memory
* and don't need to free them individually.
*/
void
tsk_blkalloc_print_state(tsk_blkalloc_t *self, FILE *out)
{
fprintf(out, "Block allocator%p::\n", (void *) self);
fprintf(out, "\ttop = %lld\n", (long long) self->top);
fprintf(out, "\tchunk_size = %lld\n", (long long) self->chunk_size);
fprintf(out, "\tnum_chunks = %lld\n", (long long) self->num_chunks);
fprintf(out, "\ttotal_allocated = %lld\n", (long long) self->total_allocated);
fprintf(out, "\ttotal_size = %lld\n", (long long) self->total_size);
}
int TSK_WARN_UNUSED
tsk_blkalloc_reset(tsk_blkalloc_t *self)
{
int ret = 0;
self->top = 0;
self->current_chunk = 0;
self->total_allocated = 0;
return ret;
}
int TSK_WARN_UNUSED
tsk_blkalloc_init(tsk_blkalloc_t *self, size_t chunk_size)
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_blkalloc_t));
if (chunk_size < 1) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
self->chunk_size = chunk_size;
self->top = 0;
self->current_chunk = 0;
self->total_allocated = 0;
self->total_size = 0;
self->num_chunks = 0;
self->mem_chunks = malloc(sizeof(char *));
if (self->mem_chunks == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->mem_chunks[0] = malloc(chunk_size);
if (self->mem_chunks[0] == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->num_chunks = 1;
self->total_size = chunk_size + sizeof(void *);
out:
return ret;
}
void *TSK_WARN_UNUSED
tsk_blkalloc_get(tsk_blkalloc_t *self, size_t size)
{
void *ret = NULL;
void *p;
if (size > self->chunk_size) {
goto out;
}
if ((self->top + size) > self->chunk_size) {
if (self->current_chunk == (self->num_chunks - 1)) {
p = realloc(self->mem_chunks, (self->num_chunks + 1) * sizeof(void *));
if (p == NULL) {
goto out;
}
self->mem_chunks = p;
p = malloc(self->chunk_size);
if (p == NULL) {
goto out;
}
self->mem_chunks[self->num_chunks] = p;
self->num_chunks++;
self->total_size += self->chunk_size + sizeof(void *);
}
self->current_chunk++;
self->top = 0;
}
ret = self->mem_chunks[self->current_chunk] + self->top;
self->top += size;
self->total_allocated += size;
out:
return ret;
}
void
tsk_blkalloc_free(tsk_blkalloc_t *self)
{
size_t j;
for (j = 0; j < self->num_chunks; j++) {
if (self->mem_chunks[j] != NULL) {
free(self->mem_chunks[j]);
}
}
if (self->mem_chunks != NULL) {
free(self->mem_chunks);
}
}
/* Mirrors the semantics of numpy's searchsorted function. Uses binary
* search to find the index of the closest value in the array. */
tsk_size_t
tsk_search_sorted(const double *restrict array, tsk_size_t size, double value)
{
int64_t upper = (int64_t) size;
int64_t lower = 0;
int64_t offset = 0;
int64_t mid;
if (upper == 0) {
return 0;
}
while (upper - lower > 1) {
mid = (upper + lower) / 2;
if (value >= array[mid]) {
lower = mid;
} else {
upper = mid;
}
}
offset = (int64_t) (array[lower] < value);
return (tsk_size_t) (lower + offset);
}
/* Rounds the specified double to the closest multiple of 10**-num_digits. If
* num_digits > 22, return value without changes. This is intended for use with
* small positive numbers; behaviour with large inputs has not been considered.
*
* Based on double_round from the Python standard library
* https://github.com/python/cpython/blob/master/Objects/floatobject.c#L985
*/
double
tsk_round(double x, unsigned int ndigits)
{
double pow1, y, z;
z = x;
if (ndigits < 22) {
pow1 = pow(10.0, (double) ndigits);
y = x * pow1;
z = round(y);
if (fabs(y - z) == 0.5) {
/* halfway between two integers; use round-half-even */
z = 2.0 * round(y / 2.0);
}
z = z / pow1;
}
return z;
}
/* As NANs are not equal, use this function to check for equality to TSK_UNKNOWN_TIME */
bool
tsk_is_unknown_time(double val)
{
union {
uint64_t i;
double f;
} nan_union;
nan_union.f = val;
return nan_union.i == TSK_UNKNOWN_TIME_HEX;
}
/* Work around a bug which seems to show up on various mixtures of
* compiler and libc versions, where isfinite and isnan result in
* spurious warnings about casting down to float. The original issue
* is here:
* https://github.com/tskit-dev/tskit/issues/721
*
* The simplest approach seems to be to use the builtins where they
* are available (clang and gcc), and to use the library macro
* otherwise. There would be no disadvantage to using the builtin
* version, so there's no real harm in this approach.
*/
bool
tsk_isnan(double val)
{
#if defined(__GNUC__)
return __builtin_isnan(val);
#else
return isnan(val);
#endif
}
bool
tsk_isfinite(double val)
{
#if defined(__GNUC__)
return __builtin_isfinite(val);
#else
return isfinite(val);
#endif
}
void *
tsk_malloc(tsk_size_t size)
{
/* Avoid malloc(0) as it's not portable */
if (size == 0) {
size = 1;
}
#if TSK_MAX_SIZE > SIZE_MAX
if (size > SIZE_MAX) {
return NULL;
}
#endif
return malloc((size_t) size);
}
void *
tsk_realloc(void *ptr, tsk_size_t size)
{
/* We shouldn't ever realloc to a zero size in tskit */
tsk_bug_assert(size > 0);
return realloc(ptr, (size_t) size);
}
/* We keep the size argument here as a size_t because we'd have to
* cast the outputs of sizeof() otherwise, which would lead to
* less readable code. We need to be careful to use calloc within
* the library accordingly, so that size can't overflow on 32 bit.
*/
void *
tsk_calloc(tsk_size_t n, size_t size)
{
/* Avoid calloc(0) as it's not portable */
if (n == 0) {
n = 1;
}
#if TSK_MAX_SIZE > SIZE_MAX
if (n > SIZE_MAX) {
return NULL;
}
#endif
return calloc((size_t) n, size);
}
void *
tsk_memset(void *ptr, int fill, tsk_size_t size)
{
return memset(ptr, fill, (size_t) size);
}
void *
tsk_memcpy(void *dest, const void *src, tsk_size_t size)
{
return memcpy(dest, src, (size_t) size);
}
void *
tsk_memmove(void *dest, const void *src, tsk_size_t size)
{
return memmove(dest, src, (size_t) size);
}
int
tsk_memcmp(const void *s1, const void *s2, tsk_size_t size)
{
return memcmp(s1, s2, (size_t) size);
}
/* We can't initialise the stream to its real default value because
* of limitations on static initialisers. To work around this, we initialise
* it to NULL and then set the value to the required standard stream
* when called. */
FILE *_tsk_debug_stream = NULL;
void
tsk_set_debug_stream(FILE *f)
{
_tsk_debug_stream = f;
}
FILE *
tsk_get_debug_stream(void)
{
if (_tsk_debug_stream == NULL) {
_tsk_debug_stream = TSK_DEFAULT_DEBUG_STREAM;
}
return _tsk_debug_stream;
}
/* AVL Tree implementation. This is based directly on Knuth's implementation
* in TAOCP. See the python/tests/test_avl_tree.py for more information,
* and equivalent code annotated with the original algorithm listing.
*/
static void
tsk_avl_tree_int_print_node(tsk_avl_node_int_t *node, int depth, FILE *out)
{
int d;
if (node == NULL) {
return;
}
for (d = 0; d < depth; d++) {
fprintf(out, " ");
}
fprintf(out, "key=%d balance=%d\n", (int) node->key, node->balance);
tsk_avl_tree_int_print_node(node->llink, depth + 1, out);
tsk_avl_tree_int_print_node(node->rlink, depth + 1, out);
}
void
tsk_avl_tree_int_print_state(tsk_avl_tree_int_t *self, FILE *out)
{
fprintf(out, "AVL tree: size=%d height=%d\n", (int) self->size, (int) self->height);
tsk_avl_tree_int_print_node(self->head.rlink, 0, out);
}
int
tsk_avl_tree_int_init(tsk_avl_tree_int_t *self)
{
memset(self, 0, sizeof(*self));
return 0;
}
int
tsk_avl_tree_int_free(tsk_avl_tree_int_t *TSK_UNUSED(self))
{
return 0;
}
tsk_avl_node_int_t *
tsk_avl_tree_int_get_root(const tsk_avl_tree_int_t *self)
{
return self->head.rlink;
}
tsk_avl_node_int_t *
tsk_avl_tree_int_search(const tsk_avl_tree_int_t *self, int64_t key)
{
tsk_avl_node_int_t *P = self->head.rlink;
while (P != NULL) {
if (key == P->key) {
break;
} else if (key < P->key) {
P = P->llink;
} else {
P = P->rlink;
}
}
return P;
}
static int
tsk_avl_tree_int_insert_empty(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node)
{
self->head.rlink = node;
self->size = 1;
self->height = 1;
node->llink = NULL;
node->rlink = NULL;
node->balance = 0;
return 0;
}
#define get_link(a, P) ((a) == -1 ? (P)->llink : (P)->rlink)
#define set_link(a, P, val) \
do { \
if ((a) == -1) { \
(P)->llink = val; \
} else { \
(P)->rlink = val; \
} \
} while (0);
static int
tsk_avl_tree_int_insert_non_empty(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node)
{
const int64_t K = node->key;
tsk_avl_node_int_t *T = &self->head;
tsk_avl_node_int_t *S = T->rlink;
tsk_avl_node_int_t *P = T->rlink;
tsk_avl_node_int_t *Q, *R;
int a;
while (true) {
if (K == P->key) {
/* TODO figure out what the most useful semantics are here. Just
* returning 1 as a non-zero value for now. */
return 1;
} else if (K < P->key) {
Q = P->llink;
if (Q == NULL) {
Q = node;
P->llink = Q;
break;
}
} else {
Q = P->rlink;
if (Q == NULL) {
Q = node;
P->rlink = Q;
break;
}
}
if (Q->balance != 0) {
T = P;
S = Q;
}
P = Q;
}
self->size++;
Q->llink = NULL;
Q->rlink = NULL;
Q->balance = 0;
if (K < S->key) {
a = -1;
} else {
a = 1;
}
P = get_link(a, S);
R = P;
while (P != Q) {
if (K < P->key) {
P->balance = -1;
P = P->llink;
} else if (K > P->key) {
P->balance = 1;
P = P->rlink;
}
}
if (S->balance == 0) {
S->balance = a;
self->height++;
} else if (S->balance == -a) {
S->balance = 0;
} else {
if (R->balance == a) {
P = R;
set_link(a, S, get_link(-a, R));
set_link(-a, R, S);
S->balance = 0;
R->balance = 0;
} else if (R->balance == -a) {
P = get_link(-a, R);
set_link(-a, R, get_link(a, P));
set_link(a, P, R);
set_link(a, S, get_link(-a, P));
set_link(-a, P, S);
if (P->balance == a) {
S->balance = -a;
R->balance = 0;
} else if (P->balance == 0) {
S->balance = 0;
R->balance = 0;
} else {
S->balance = 0;
R->balance = a;
}
P->balance = 0;
}
if (S == T->rlink) {
T->rlink = P;
} else {
T->llink = P;
}
}
return 0;
}
int
tsk_avl_tree_int_insert(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node)
{
int ret = 0;
if (self->size == 0) {
ret = tsk_avl_tree_int_insert_empty(self, node);
} else {
ret = tsk_avl_tree_int_insert_non_empty(self, node);
}
return ret;
}
/* An inorder traversal of the nodes in an AVL tree (or any binary search tree)
* yields the keys in sorted order. The recursive implementation is safe here
* because this is an AVL tree and it is strictly balanced, the depth is very
* limited. Using GCC's __builtin_frame_address it looks like the size of a stack
* frame for this function is 48 bytes. Assuming a stack size of 1MiB, this
* would give us a maximum tree depth of 21845 - so, we're pretty safe.
*/
static int
ordered_nodes_traverse(tsk_avl_node_int_t *node, int index, tsk_avl_node_int_t **out)
{
if (node == NULL) {
return index;
}
index = ordered_nodes_traverse(node->llink, index, out);
out[index] = node;
return ordered_nodes_traverse(node->rlink, index + 1, out);
}
int
tsk_avl_tree_int_ordered_nodes(const tsk_avl_tree_int_t *self, tsk_avl_node_int_t **out)
{
ordered_nodes_traverse(self->head.rlink, 0, out);
return 0;
}
// Bit Array implementation. Allows us to store unsigned integers in a compact manner.
// Currently implemented as an array of 32-bit unsigned integers.
int
tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length)
{
int ret = 0;
self->row_len = (num_bits / TSK_BITSET_BITS) + (num_bits % TSK_BITSET_BITS ? 1 : 0);
self->len = length;
self->data = tsk_calloc(self->row_len * length, sizeof(*self->data));
if (self->data == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
#define BITSET_DATA_ROW(bs, row) ((bs)->data + (row) * (bs)->row_len)
void
tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out)
{
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
tsk_bitset_val_t *restrict out_d = out->data;
for (tsk_size_t i = 0; i < self->row_len; i++) {
out_d[i] = self_d[i] & other_d[i];
}
}
void
tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row)
{
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
for (tsk_size_t i = 0; i < self->row_len; i++) {
self_d[i] &= ~(other_d[i]);
}
}
void
tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row)
{
tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, self_row);
const tsk_bitset_val_t *restrict other_d = BITSET_DATA_ROW(other, other_row);
for (tsk_size_t i = 0; i < self->row_len; i++) {
self_d[i] |= other_d[i];
}
}
void
tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
{
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
*(BITSET_DATA_ROW(self, row) + i) |= (tsk_bitset_val_t) 1
<< (bit - (TSK_BITSET_BITS * i));
}
bool
tsk_bitset_contains(const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit)
{
tsk_bitset_val_t i = (bit / TSK_BITSET_BITS);
return *(BITSET_DATA_ROW(self, row) + i)
& ((tsk_bitset_val_t) 1 << (bit - (TSK_BITSET_BITS * i)));
}
static inline uint32_t
popcount(tsk_bitset_val_t v)
{
// Utilizes 12 operations per chunk. NB this only works on 32 bit integers.
// Taken from:
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
// There's a nice breakdown of this algorithm here:
// https://stackoverflow.com/a/109025
//
// The gcc/clang compiler flag will -mpopcnt will convert this code to a
// popcnt instruction (most if not all modern CPUs will support this). The
// popcnt instruction will yield some speed improvements, which depend on
// the tree sequence.
//
// NB: 32bit counting is typically faster than 64bit counting for this task.
// (at least on x86-64)
v = v - ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
return (((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}
tsk_size_t
tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row)
{
tsk_size_t i = 0;
tsk_size_t count = 0;
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
for (i = 0; i < self->row_len; i++) {
count += popcount(self_d[i]);
}
return count;
}
void
tsk_bitset_get_items(
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items)
{
// Get the items stored in the row of a bitset.
// Uses a de Bruijn sequence lookup table to determine the lowest bit set.
// See the wikipedia article for more info: https://w.wiki/BYiF
tsk_size_t i, n, off;
tsk_bitset_val_t v, lsb; // least significant bit
static const tsk_id_t lookup[32] = { 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25,
17, 4, 8, 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 };
const tsk_bitset_val_t *restrict self_d = BITSET_DATA_ROW(self, row);
n = 0;
for (i = 0; i < self->row_len; i++) {
v = self_d[i];
off = i * TSK_BITSET_BITS;
if (v == 0) {
continue;
}
while ((lsb = v & -v)) {
items[n] = lookup[(lsb * 0x077cb531U) >> 27] + (tsk_id_t) off;
n++;
v ^= lsb;
}
}
*n_items = n;
}
void
tsk_bitset_free(tsk_bitset_t *self)
{
tsk_safe_free(self->data);
}
================================================
FILE: c/tskit/core.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2025 Tskit Developers
* Copyright (c) 2015-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* @file core.h
* @brief Core utilities used in all of tskit.
*/
#ifndef __TSK_CORE_H__
#define __TSK_CORE_H__
#ifdef __cplusplus
extern "C" {
#endif
#include
#include
#include
#include
#include
#ifdef __GNUC__
#define TSK_WARN_UNUSED __attribute__((warn_unused_result))
#define TSK_UNUSED(x) TSK_UNUSED_##x __attribute__((__unused__))
#else
#define TSK_WARN_UNUSED
#define TSK_UNUSED(x) TSK_UNUSED_##x
/* Don't bother with restrict for MSVC */
#define restrict
#endif
/* We assume CHAR_BIT == 8 when loading strings from 8-bit byte arrays */
#if CHAR_BIT != 8
#error CHAR_BIT MUST EQUAL 8
#endif
/* This sets up TSK_DBL_DECIMAL_DIG, which can then be used as a
* precision specifier when writing out doubles, if you want sufficient
* decimal digits to be written to guarantee a lossless round-trip
* after being read back in. Usage:
*
* printf("%.*g", TSK_DBL_DECIMAL_DIG, foo);
*
* See https://stackoverflow.com/a/19897395/2752221
*/
#ifdef DBL_DECIMAL_DIG
#define TSK_DBL_DECIMAL_DIG (DBL_DECIMAL_DIG)
#else
#define TSK_DBL_DECIMAL_DIG (DBL_DIG + 3)
#endif
/**
@brief Tskit Object IDs.
@rst
All objects in tskit are referred to by integer IDs corresponding to the
row they occupy in the relevant table. The ``tsk_id_t`` type should be used
when manipulating these ID values. The reserved value :c:macro:`TSK_NULL` (-1) defines
missing data.
@endrst
*/
#ifdef _TSK_BIG_TABLES
/* Allow tables to have more than 2^31 rows. This is an EXPERIMENTAL feature
* and is not supported in any way. This typedef is only included for
* future-proofing purposes, so that we can be sure that we don't make any
* design decisions that are incompatible with big tables by building the
* library in 64 bit mode in CI. See the discussion here for more background:
* https://github.com/tskit-dev/tskit/issues/343
*
* If you need big tables, please open an issue on GitHub to discuss, or comment
* on the thread above.
*/
typedef int64_t tsk_id_t;
#define TSK_MAX_ID INT64_MAX - 1
#define TSK_ID_STORAGE_TYPE KAS_INT64
#else
typedef int32_t tsk_id_t;
#define TSK_MAX_ID INT32_MAX - 1
#define TSK_ID_STORAGE_TYPE KAS_INT32
#endif
/**
@brief Tskit sizes.
@rst
The ``tsk_size_t`` type is an unsigned integer used for any size or count value.
@endrst
*/
typedef uint64_t tsk_size_t;
#define TSK_MAX_SIZE UINT64_MAX
#define TSK_SIZE_STORAGE_TYPE KAS_UINT64
/**
@brief Container for bitwise flags.
@rst
Bitwise flags are used in tskit as a column type and also as a way to
specify options to API functions.
@endrst
*/
typedef uint32_t tsk_flags_t;
#define TSK_FLAGS_STORAGE_TYPE KAS_UINT32
/**
@brief Boolean type.
@rst
Fixed-size (1 byte) boolean values.
@endrst
*/
typedef uint8_t tsk_bool_t;
// clang-format off
/**
@defgroup API_VERSION_GROUP API version macros.
@{
*/
/**
The library major version. Incremented when breaking changes to the API or ABI are
introduced. This includes any changes to the signatures of functions and the
sizes and types of externally visible structs.
*/
#define TSK_VERSION_MAJOR 1
/**
The library minor version. Incremented when non-breaking backward-compatible changes
to the API or ABI are introduced, i.e., the addition of a new function.
*/
#define TSK_VERSION_MINOR 3
/**
The library patch version. Incremented when any changes not relevant to the
to the API or ABI are introduced, i.e., internal refactors of bugfixes.
*/
#define TSK_VERSION_PATCH 1
/** @} */
/*
We define a specific NAN value for default mutation time which indicates
the time is unknown. We use a specific value so that if mutation time is set to
a NAN from a computation we can reject it. This specific value is a non-signalling
NAN with the last six fraction bytes set to the ascii of "tskit!"
*/
#define TSK_UNKNOWN_TIME_HEX 0x7FF874736B697421ULL
static inline double
__tsk_nan_f(void)
{
const union {
uint64_t i;
double f;
} nan_union = { .i = TSK_UNKNOWN_TIME_HEX };
return nan_union.f;
}
/**
@defgroup GENERIC_CONSTANTS General options flags used in some functions.
@{
*/
/**
Used in node flags to indicate that a node is a sample node.
*/
#define TSK_NODE_IS_SAMPLE 1u
/**
Null value used for cases such as absent id references.
*/
#define TSK_NULL ((tsk_id_t) -1)
/**
Value used for missing data in genotype arrays.
*/
#define TSK_MISSING_DATA (-1)
/**
Value to indicate that a time is unknown. Note that this value is a non-signalling NAN
whose representation differs from the NAN generated by computations such as divide by zeros.
*/
#define TSK_UNKNOWN_TIME __tsk_nan_f()
/** @} */
#define TSK_TIME_UNITS_UNKNOWN "unknown"
#define TSK_TIME_UNITS_UNCALIBRATED "uncalibrated"
#define TSK_FILE_FORMAT_NAME "tskit.trees"
#define TSK_FILE_FORMAT_NAME_LENGTH 11
#define TSK_FILE_FORMAT_VERSION_MAJOR 12
#define TSK_FILE_FORMAT_VERSION_MINOR 7
/**
@defgroup GENERIC_FUNCTION_OPTIONS General options flags used in some functions.
@{
*/
/* Place the common options at the top of the space; this way we can start
options for individual functions at the bottom without worrying about
clashing with the common options
*/
/** Turn on debugging output. Not supported by all functions. */
#define TSK_DEBUG (1u << 31)
/** Do not initialise the parameter object. */
#define TSK_NO_INIT (1u << 30)
/**
Do not run integrity checks before performing an operation.
This performance optimisation should not be used unless the calling code can
guarantee reference integrity within the table collection. References
to rows not in the table or bad offsets will result in undefined
behaviour.
*/
#define TSK_NO_CHECK_INTEGRITY (1u << 29)
/**
Instead of taking a copy of input objects, the function should take ownership
of them and manage their lifecycle. The caller specifying this flag should no
longer modify or free the object or objects passed. See individual functions
using this flag for what object it applies to.
*/
#define TSK_TAKE_OWNERSHIP (1u << 28)
/** @} */
/**
@defgroup GENERAL_ERROR_GROUP General errors.
@{
*/
/**
Generic error thrown when no other message can be generated.
*/
#define TSK_ERR_GENERIC -1
/**
Memory could not be allocated.
*/
#define TSK_ERR_NO_MEMORY -2
/**
An IO error occurred.
*/
#define TSK_ERR_IO -3
#define TSK_ERR_BAD_PARAM_VALUE -4
#define TSK_ERR_BUFFER_OVERFLOW -5
#define TSK_ERR_UNSUPPORTED_OPERATION -6
#define TSK_ERR_GENERATE_UUID -7
/**
The file stream ended after reading zero bytes.
*/
#define TSK_ERR_EOF -8
/** @} */
/**
@defgroup FILE_FORMAT_ERROR_GROUP File format errors.
@{
*/
/**
A file could not be read because it is in the wrong format
*/
#define TSK_ERR_FILE_FORMAT -100
/**
The file is in tskit format, but the version is too old for the
library to read. The file should be upgraded to the latest version
using the ``tskit upgrade`` command line utility from tskit version<0.6.2.
*/
#define TSK_ERR_FILE_VERSION_TOO_OLD -101
/**
The file is in tskit format, but the version is too new for the
library to read. To read the file you must upgrade the version
of tskit.
*/
#define TSK_ERR_FILE_VERSION_TOO_NEW -102
/**
A column that is a required member of a table was not found in
the file.
*/
#define TSK_ERR_REQUIRED_COL_NOT_FOUND -103
/**
One of a pair of columns that must be specified together was
not found in the file.
*/
#define TSK_ERR_BOTH_COLUMNS_REQUIRED -104
/**
An unsupported type was provided for a column in the file.
*/
#define TSK_ERR_BAD_COLUMN_TYPE -105
/** @} */
/**
@defgroup OOB_ERROR_GROUP Out of bounds errors.
@{
*/
/**
A bad value was provided for a ragged column offset, values should
start at zero and be monotonically increasing.
*/
#define TSK_ERR_BAD_OFFSET -200
/**
A position to seek to was less than zero or greater than the length
of the genome
*/
#define TSK_ERR_SEEK_OUT_OF_BOUNDS -201
/**
A node id was less than zero or greater than the final index
*/
#define TSK_ERR_NODE_OUT_OF_BOUNDS -202
/**
A edge id was less than zero or greater than the final index
*/
#define TSK_ERR_EDGE_OUT_OF_BOUNDS -203
/**
A population id was less than zero or greater than the final index
*/
#define TSK_ERR_POPULATION_OUT_OF_BOUNDS -204
/**
A site id was less than zero or greater than the final index
*/
#define TSK_ERR_SITE_OUT_OF_BOUNDS -205
/**
A mutation id was less than zero or greater than the final index
*/
#define TSK_ERR_MUTATION_OUT_OF_BOUNDS -206
/**
An individual id was less than zero or greater than the final index
*/
#define TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS -207
/**
A migration id was less than zero or greater than the final index
*/
#define TSK_ERR_MIGRATION_OUT_OF_BOUNDS -208
/**
A provenance id was less than zero or greater than the final index
*/
#define TSK_ERR_PROVENANCE_OUT_OF_BOUNDS -209
/**
A time value was non-finite (NaN counts as finite)
*/
#define TSK_ERR_TIME_NONFINITE -210
/**
A genomic position was non-finite
*/
#define TSK_ERR_GENOME_COORDS_NONFINITE -211
/**
One of the rows in the retained table refers to a row that has been
deleted.
*/
#define TSK_ERR_KEEP_ROWS_MAP_TO_DELETED -212
/**
A genomic position was less than zero or greater equal to the sequence
length
*/
#define TSK_ERR_POSITION_OUT_OF_BOUNDS -213
/** @} */
/**
@defgroup EDGE_ERROR_GROUP Edge errors.
@{
*/
/**
A parent node of an edge was TSK_NULL.
*/
#define TSK_ERR_NULL_PARENT -300
/**
A child node of an edge was TSK_NULL.
*/
#define TSK_ERR_NULL_CHILD -301
/**
The edge table was not sorted by the time of each edge's parent
nodes. Sort order is (time[parent], child, left).
*/
#define TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME -302
/**
A parent node had edges that were non-contigious.
*/
#define TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS -303
/**
The edge table was not sorted by the id of the child node of each edge.
Sort order is (time[parent], child, left).
*/
#define TSK_ERR_EDGES_NOT_SORTED_CHILD -304
/**
The edge table was not sorted by the left coordinate each edge.
Sort order is (time[parent], child, left).
*/
#define TSK_ERR_EDGES_NOT_SORTED_LEFT -305
/**
An edge had child node that was older than the parent. Parent times must
be greater than the child time.
*/
#define TSK_ERR_BAD_NODE_TIME_ORDERING -306
/**
An edge had a genomic interval where right was greater or equal to left.
*/
#define TSK_ERR_BAD_EDGE_INTERVAL -307
/**
An edge was duplicated.
*/
#define TSK_ERR_DUPLICATE_EDGES -308
/**
An edge had a right coord greater than the genomic length.
*/
#define TSK_ERR_RIGHT_GREATER_SEQ_LENGTH -309
/**
An edge had a left coord less than zero.
*/
#define TSK_ERR_LEFT_LESS_ZERO -310
/**
A parent node had edges that were contradictory over an interval.
*/
#define TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN -311
/**
A method that doesn't support edge metadata was attempted on an edge
table containing metadata.
*/
#define TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA -312
/** @} */
/**
@defgroup SITE_ERROR_GROUP Site errors.
@{
*/
/**
The site table was not in order of increasing genomic position.
*/
#define TSK_ERR_UNSORTED_SITES -400
/**
The site table had more than one site at a single genomic position.
*/
#define TSK_ERR_DUPLICATE_SITE_POSITION -401
/**
A site had a position that was less than zero or greater than the sequence
length.
*/
#define TSK_ERR_BAD_SITE_POSITION -402
/** @} */
/**
@defgroup MUTATION_ERROR_GROUP Mutation errors.
@{
*/
/**
A mutation had a parent mutation that was at a different site.
*/
#define TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE -500
/**
A mutation had a parent mutation that was itself.
*/
#define TSK_ERR_MUTATION_PARENT_EQUAL -501
/**
A mutation had a parent mutation that had a greater id.
*/
#define TSK_ERR_MUTATION_PARENT_AFTER_CHILD -502
/**
Two or more mutation parent references formed a loop
*/
#define TSK_ERR_MUTATION_PARENT_INCONSISTENT -503
/**
The mutation table was not in the order of non-decreasing site id and
non-increasing time within each site.
*/
#define TSK_ERR_UNSORTED_MUTATIONS -504
/* 505 was the now unused TSK_ERR_NON_SINGLE_CHAR_MUTATION */
/**
A mutation's time was younger (not >=) the time of its node
and wasn't TSK_UNKNOWN_TIME.
*/
#define TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE -506
/**
A mutation's time was older (not <=) than the time of its parent
mutation, and wasn't TSK_UNKNOWN_TIME.
*/
#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION -507
/**
A mutation's time was older (not <) than the time of the parent node of
the edge on which it occurs, and wasn't TSK_UNKNOWN_TIME.
*/
#define TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE -508
/**
A single site had a mixture of known mutation times and TSK_UNKNOWN_TIME
*/
#define TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN -509
/**
Some mutations have TSK_UNKNOWN_TIME in an algorithm where that's
disallowed (use compute_mutation_times?).
*/
#define TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME -510
/**
A mutation's parent was not consistent with the topology of the tree.
*/
#define TSK_ERR_BAD_MUTATION_PARENT -511
/** @} */
/**
@defgroup MIGRATION_ERROR_GROUP Migration errors.
@{
*/
/**
The migration table was not sorted by time.
*/
#define TSK_ERR_UNSORTED_MIGRATIONS -550
/** @} */
/**
@defgroup SAMPLE_ERROR_GROUP Sample errors.
@{
*/
/**
A duplicate sample was specified.
*/
#define TSK_ERR_DUPLICATE_SAMPLE -600
/**
A sample id that was not valid was specified.
*/
#define TSK_ERR_BAD_SAMPLES -601
/** @} */
/**
@defgroup TABLE_ERROR_GROUP Table errors.
@{
*/
/**
An invalid table position was specifed.
*/
#define TSK_ERR_BAD_TABLE_POSITION -700
/**
A sequence length equal to or less than zero was specified.
*/
#define TSK_ERR_BAD_SEQUENCE_LENGTH -701
/**
The table collection was not indexed.
*/
#define TSK_ERR_TABLES_NOT_INDEXED -702
/**
Tables cannot be larger than 2**31 rows.
*/
#define TSK_ERR_TABLE_OVERFLOW -703
/**
Ragged array columns cannot be larger than 2**64 bytes.
*/
#define TSK_ERR_COLUMN_OVERFLOW -704
/**
The table collection contains more than 2**31 trees.
*/
#define TSK_ERR_TREE_OVERFLOW -705
/**
Metadata was attempted to be set on a table where it is disabled.
*/
#define TSK_ERR_METADATA_DISABLED -706
/**
There was an error with the table's indexes.
*/
#define TSK_ERR_TABLES_BAD_INDEXES -707
/** @} */
/**
@defgroup LIMITATION_ERROR_GROUP Limitation errors.
@{
*/
/**
An operation was attempted that only supports infinite sites, i.e.
at most a single mutation per site.
*/
#define TSK_ERR_ONLY_INFINITE_SITES -800
/**
Simplification was attempted with migrations present, which are not
supported.
*/
#define TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED -801
/**
Sorting was attempted on migrations, which is not supported.
*/
#define TSK_ERR_SORT_MIGRATIONS_NOT_SUPPORTED -802
/**
An invalid sort offset was specified, for sites and mutations this must
be either 0 or the table length.
*/
#define TSK_ERR_SORT_OFFSET_NOT_SUPPORTED -803
/**
An operation was attempted that only supports binary mutations.
*/
#define TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED -804
/**
An operation was attempted that doesn't support migrations, with a
non-empty migration table.
*/
#define TSK_ERR_MIGRATIONS_NOT_SUPPORTED -805
/**
A table attempted to extend from itself.
*/
#define TSK_ERR_CANNOT_EXTEND_FROM_SELF -806
/**
An operation was attempted that doesn't support silent mutations, i.e.
a mutation that doesn't change the allelic state.
*/
#define TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED -807
/**
A copy of a variant cannot be decoded.
*/
#define TSK_ERR_VARIANT_CANT_DECODE_COPY -808
/**
A tree sequence cannot take ownership of a table collection where
TSK_NO_EDGE_METADATA.
*/
#define TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA -809
/**
Operation is undefined for nonbinary trees
*/
#define TSK_ERR_UNDEFINED_NONBINARY -810
/**
Operation is undefined for trees with multiple roots.
*/
#define TSK_ERR_UNDEFINED_MULTIROOT -811
/** @} */
/**
@defgroup STATS_ERROR_GROUP Stats errors.
@{
*/
/**
Zero windows were specified, at least one window must be specified.
*/
#define TSK_ERR_BAD_NUM_WINDOWS -900
/**
The window specification was not an increasing list of positions between
0 and the sequence length.
*/
#define TSK_ERR_BAD_WINDOWS -901
/**
More than one stat mode was specified.
*/
#define TSK_ERR_MULTIPLE_STAT_MODES -902
/**
The state dimension was not >=1.
*/
#define TSK_ERR_BAD_STATE_DIMS -903
/**
The result dimension was not >=1.
*/
#define TSK_ERR_BAD_RESULT_DIMS -904
/**
Insufficient sample sets were provided.
*/
#define TSK_ERR_INSUFFICIENT_SAMPLE_SETS -905
/**
Insufficient sample set index tuples were provided.
*/
#define TSK_ERR_INSUFFICIENT_INDEX_TUPLES -906
/**
The sample set index was out of bounds.
*/
#define TSK_ERR_BAD_SAMPLE_SET_INDEX -907
/**
The sample set index was empty.
*/
#define TSK_ERR_EMPTY_SAMPLE_SET -908
/**
A stat mode was attempted that is not supported by the operation.
*/
#define TSK_ERR_UNSUPPORTED_STAT_MODE -909
/**
Statistics based on branch lengths were attempted when the ``time_units``
were ``uncalibrated``.
*/
#define TSK_ERR_TIME_UNCALIBRATED -910
/**
The TSK_STAT_POLARISED option was passed to a statistic that does
not support it.
*/
#define TSK_ERR_STAT_POLARISED_UNSUPPORTED -911
/**
The TSK_STAT_SPAN_NORMALISE option was passed to a statistic that does
not support it.
*/
#define TSK_ERR_STAT_SPAN_NORMALISE_UNSUPPORTED -912
/**
Insufficient weights were provided.
*/
#define TSK_ERR_INSUFFICIENT_WEIGHTS -913
/**
The node bin map contains a value less than TSK_NULL.
*/
#define TSK_ERR_BAD_NODE_BIN_MAP -914
/**
Maximum index in node bin map is greater than output dimension.
*/
#define TSK_ERR_BAD_NODE_BIN_MAP_DIM -915
/**
The vector of quantiles is out of bounds or in nonascending order.
*/
#define TSK_ERR_BAD_QUANTILES -916
/**
Times are not in ascending order
*/
#define TSK_ERR_UNSORTED_TIMES -917
/*
The provided positions are not provided in strictly increasing order
*/
#define TSK_ERR_STAT_UNSORTED_POSITIONS -918
/**
The provided positions are not unique
*/
#define TSK_ERR_STAT_DUPLICATE_POSITIONS -919
/**
The provided sites are not provided in strictly increasing position order
*/
#define TSK_ERR_STAT_UNSORTED_SITES -920
/**
The provided sites are not unique
*/
#define TSK_ERR_STAT_DUPLICATE_SITES -921
/**
The number of time windows is zero
*/
#define TSK_ERR_BAD_TIME_WINDOWS_DIM -922
/**
Sample times do not all equal the start of first time window
*/
#define TSK_ERR_BAD_SAMPLE_PAIR_TIMES -923
/**
Time windows are not strictly increasing
*/
#define TSK_ERR_BAD_TIME_WINDOWS -924
/**
Time windows do not end at infinity
*/
#define TSK_ERR_BAD_TIME_WINDOWS_END -925
/**
Node time does not fall within assigned time window
*/
#define TSK_ERR_BAD_NODE_TIME_WINDOW -926
/** @} */
/**
@defgroup MAPPING_ERROR_GROUP Mutation mapping errors.
@{
*/
/**
Only missing genotypes were specified, at least one non-missing is
required.
*/
#define TSK_ERR_GENOTYPES_ALL_MISSING -1000
/**
A genotype value was greater than the maximum allowed (64) or less
than TSK_MISSING_DATA (-1).
*/
#define TSK_ERR_BAD_GENOTYPE -1001
/**
A ancestral genotype value was greater than the maximum allowed (64) or less
than 0.
*/
#define TSK_ERR_BAD_ANCESTRAL_STATE -1002
/** @} */
/**
@defgroup GENOTYPE_ERROR_GROUP Genotype decoding errors.
@{
*/
/**
Genotypes were requested for non-samples at the same time
as asking that isolated nodes be marked as missing. This is not
supported.
*/
#define TSK_ERR_MUST_IMPUTE_NON_SAMPLES -1100
/**
A user-specified allele map was used, but didn't contain an allele
found in the tree sequence.
*/
#define TSK_ERR_ALLELE_NOT_FOUND -1101
/**
More than 2147483647 alleles were specified.
*/
#define TSK_ERR_TOO_MANY_ALLELES -1102
/**
A user-specified allele map was used, but it contained zero alleles.
*/
#define TSK_ERR_ZERO_ALLELES -1103
/**
An allele used when decoding alignments had length other than one.
*/
#define TSK_ERR_BAD_ALLELE_LENGTH -1104
/**
An allele used when decoding alignments matched the missing data character.
*/
#define TSK_ERR_MISSING_CHAR_COLLISION -1105
/** @} */
/**
@defgroup DISTANCE_ERROR_GROUP Distance metric errors.
@{
*/
/**
Trees with different numbers of samples were specified.
*/
#define TSK_ERR_SAMPLE_SIZE_MISMATCH -1200
/**
Trees with nonidentical samples were specified.
*/
#define TSK_ERR_SAMPLES_NOT_EQUAL -1201
/**
A tree with multiple roots was specified.
*/
#define TSK_ERR_MULTIPLE_ROOTS -1202
/**
A tree with unary nodes was specified.
*/
#define TSK_ERR_UNARY_NODES -1203
/**
Trees were specifed that had unequal sequence lengths.
*/
#define TSK_ERR_SEQUENCE_LENGTH_MISMATCH -1204
/**
A tree was specifed that did not have the sample lists option
enabled (TSK_SAMPLE_LISTS).
*/
#define TSK_ERR_NO_SAMPLE_LISTS -1205
/** @} */
/**
@defgroup HAPLOTYPE_ERROR_GROUP Haplotype matching errors.
@{
*/
/**
The Viterbi matrix has not filled (it has zero transitions).
*/
#define TSK_ERR_NULL_VITERBI_MATRIX -1300
/**
There was no matching haplotype.
*/
#define TSK_ERR_MATCH_IMPOSSIBLE -1301
/**
The compressed matrix has a node that has no samples in it's descendants.
*/
#define TSK_ERR_BAD_COMPRESSED_MATRIX_NODE -1302
/**
There are too many values to compress.
*/
#define TSK_ERR_TOO_MANY_VALUES -1303
/** @} */
/**
@defgroup UNION_ERROR_GROUP Union errors.
@{
*/
/**
A node map was specified that contained a node not present in the
specified table collection.
*/
#define TSK_ERR_UNION_BAD_MAP -1400
/**
The shared portions of the specified tree sequences are not equal.
Note that this may be the case if the table collections were not
fully sorted before union was called.
*/
#define TSK_ERR_UNION_DIFF_HISTORIES -1401
/** @} */
/**
@defgroup IBD_ERROR_GROUP IBD errors.
@{
*/
/**
Both nodes in a sample pair are the same node.
*/
#define TSK_ERR_SAME_NODES_IN_PAIR -1500
/**
Per-pair statistics were requested without TSK_IBD_STORE_PAIRS being
specified.
*/
#define TSK_ERR_IBD_PAIRS_NOT_STORED -1501
/**
Segments were requested without TSK_IBD_STORE_SEGMENTS being specified.
*/
#define TSK_ERR_IBD_SEGMENTS_NOT_STORED -1502
/** @} */
/**
@defgroup SIMPLIFY_ERROR_GROUP Simplify errors.
@{
*/
/**
Both TSK_SIMPLIFY_KEEP_UNARY and TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS
were specified. Only one can be used.
*/
#define TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE -1600
/** @} */
/**
@defgroup INDIVIDUAL_ERROR_GROUP Individual errors.
@{
*/
/**
Individuals were provided in an order where parents were after their
children.
*/
#define TSK_ERR_UNSORTED_INDIVIDUALS -1700
/**
An individual was its own parent.
*/
#define TSK_ERR_INDIVIDUAL_SELF_PARENT -1701
/**
An individual was its own ancestor in a cycle of references.
*/
#define TSK_ERR_INDIVIDUAL_PARENT_CYCLE -1702
/**
An individual had nodes from more than one population
(and only one was requested).
*/
#define TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH -1703
/**
An individual had nodes from more than one time
(and only one was requested).
*/
#define TSK_ERR_INDIVIDUAL_TIME_MISMATCH -1704
/** @} */
/**
@defgroup EXTEND_EDGES_ERROR_GROUP Extend edges errors.
@{
*/
/**
Maximum iteration number (max_iter) must be positive.
*/
#define TSK_ERR_EXTEND_EDGES_BAD_MAXITER -1800
/** @} */
// clang-format on
/* This bit is 0 for any errors originating from kastore */
#define TSK_KAS_ERR_BIT 14
int tsk_set_kas_error(int err);
bool tsk_is_kas_error(int err);
int tsk_get_kas_error(int err);
/**
@brief Return a description of the specified error.
The memory for the returned string is handled by the library and should
not be freed by client code.
@param err A tskit error code.
@return A description of the error.
*/
const char *tsk_strerror(int err);
/* Redefine this macro in downstream builds if stdout is not the
* approriate stream to emit debug information when the TSK_DEBUG
* flag is passed to supporting functions (e.g. in R).
*/
#define TSK_DEFAULT_DEBUG_STREAM stdout
#ifdef TSK_TRACE_ERRORS
static inline int
_tsk_trace_error(int err, int line, const char *file)
{
fprintf(stderr, "tskit-trace-error: %d='%s' at line %d in %s\n", err,
tsk_strerror(err), line, file);
return err;
}
/*
Developer note: this macro may be redefined as part of compilation for
an R package, and should be treated as part of the documented API
(no changes).
*/
#define tsk_trace_error(err) (_tsk_trace_error(err, __LINE__, __FILE__))
#else
#define tsk_trace_error(err) (err)
#endif
#ifndef TSK_BUG_ASSERT_MESSAGE
#define TSK_BUG_ASSERT_MESSAGE \
"If you are using tskit directly please open an issue on" \
" GitHub, ideally with a reproducible example." \
" (https://github.com/tskit-dev/tskit/issues) If you are" \
" using software that uses tskit, please report an issue" \
" to that software's issue tracker, at least initially."
#endif
/**
We often wish to assert a condition that is unexpected, but using the normal `assert`
means compiling without NDEBUG. This macro still asserts when NDEBUG is defined.
If you are using this macro in your own software then please set TSK_BUG_ASSERT_MESSAGE
to point users to your issue tracker.
*/
/*
Developer note: this macro may redefined as part of compilation for
an R package, and should be treated as part of the documented API
(no changes).
*/
#define tsk_bug_assert(condition) \
do { \
if (!(condition)) { \
fprintf(stderr, "Bug detected in %s at line %d. %s\n", __FILE__, __LINE__, \
TSK_BUG_ASSERT_MESSAGE); \
abort(); \
} \
} while (0)
void __tsk_safe_free(void **ptr);
#define tsk_safe_free(pointer) __tsk_safe_free((void **) &(pointer))
#define TSK_MAX(a, b) ((a) > (b) ? (a) : (b))
#define TSK_MIN(a, b) ((a) < (b) ? (a) : (b))
/* This is a simple allocator that is optimised to efficiently allocate a
* large number of small objects without large numbers of calls to malloc.
* The allocator mallocs memory in chunks of a configurable size. When
* responding to calls to get(), it will return a chunk of this memory.
* This memory cannot be subsequently handed back to the allocator. However,
* all memory allocated by the allocator can be returned at once by calling
* reset.
*/
typedef struct {
size_t chunk_size; /* number of bytes per chunk */
size_t top; /* the offset of the next available byte in the current chunk */
size_t current_chunk; /* the index of the chunk currently being used */
size_t total_size; /* the total number of bytes allocated + overhead. */
size_t total_allocated; /* the total number of bytes allocated. */
size_t num_chunks; /* the number of memory chunks. */
char **mem_chunks; /* the memory chunks */
} tsk_blkalloc_t;
extern void tsk_blkalloc_print_state(tsk_blkalloc_t *self, FILE *out);
extern int tsk_blkalloc_reset(tsk_blkalloc_t *self);
extern int tsk_blkalloc_init(tsk_blkalloc_t *self, size_t chunk_size);
extern void *tsk_blkalloc_get(tsk_blkalloc_t *self, size_t size);
extern void tsk_blkalloc_free(tsk_blkalloc_t *self);
typedef struct _tsk_avl_node_int_t {
int64_t key;
void *value;
struct _tsk_avl_node_int_t *llink;
struct _tsk_avl_node_int_t *rlink;
/* This can only contain -1, 0, 1. We could set it to a smaller type,
* but there's no point because of struct padding and alignment so
* it's simplest to keep it as a plain int. */
int balance;
} tsk_avl_node_int_t;
typedef struct {
tsk_avl_node_int_t head;
tsk_size_t size;
tsk_size_t height;
} tsk_avl_tree_int_t;
int tsk_avl_tree_int_init(tsk_avl_tree_int_t *self);
int tsk_avl_tree_int_free(tsk_avl_tree_int_t *self);
void tsk_avl_tree_int_print_state(tsk_avl_tree_int_t *self, FILE *out);
int tsk_avl_tree_int_insert(tsk_avl_tree_int_t *self, tsk_avl_node_int_t *node);
tsk_avl_node_int_t *tsk_avl_tree_int_search(const tsk_avl_tree_int_t *self, int64_t key);
int tsk_avl_tree_int_ordered_nodes(
const tsk_avl_tree_int_t *self, tsk_avl_node_int_t **out);
tsk_avl_node_int_t *tsk_avl_tree_int_get_root(const tsk_avl_tree_int_t *self);
tsk_size_t tsk_search_sorted(const double *array, tsk_size_t size, double value);
double tsk_round(double x, unsigned int ndigits);
/**
@brief Check if a number is ``TSK_UNKNOWN_TIME``
@rst
Unknown time values in tskit are represented by a particular NaN value. Since NaN values
are not equal to each other by definition, a simple comparison like
``mutation.time == TSK_UNKNOWN_TIME`` will fail even if the mutation's time is
TSK_UNKNOWN_TIME. This function compares the underlying bit representation of a double
value and returns true iff it is equal to the specific NaN value
:c:macro:`TSK_UNKNOWN_TIME`.
@endrst
@param val The number to check
@return true if the number is ``TSK_UNKNOWN_TIME`` else false
*/
bool tsk_is_unknown_time(double val);
/* We define local versions of isnan and isfinite to workaround some portability
* issues. */
bool tsk_isnan(double val);
bool tsk_isfinite(double val);
#define TSK_UUID_SIZE 36
int tsk_generate_uuid(char *dest, int flags);
/* TODO most of these can probably be macros so they compile out as no-ops.
* Lets do the 64 bit tsk_size_t switch first though. */
void *tsk_malloc(tsk_size_t size);
void *tsk_realloc(void *ptr, tsk_size_t size);
void *tsk_calloc(tsk_size_t n, size_t size);
void *tsk_memset(void *ptr, int fill, tsk_size_t size);
void *tsk_memcpy(void *dest, const void *src, tsk_size_t size);
void *tsk_memmove(void *dest, const void *src, tsk_size_t size);
int tsk_memcmp(const void *s1, const void *s2, tsk_size_t size);
/* Developer debug utilities. These are **not** threadsafe */
void tsk_set_debug_stream(FILE *f);
FILE *tsk_get_debug_stream(void);
/* Bit Array functionality */
// define a 32-bit chunk size for our bitsets.
// this means we'll be able to hold 32 distinct items in each 32 bit uint
#define TSK_BITSET_BITS ((tsk_size_t) 32)
typedef uint32_t tsk_bitset_val_t;
typedef struct {
tsk_size_t row_len; // Number of size TSK_BITSET_BITS chunks per row
tsk_size_t len; // Number of rows
tsk_bitset_val_t *data;
} tsk_bitset_t;
int tsk_bitset_init(tsk_bitset_t *self, tsk_size_t num_bits, tsk_size_t length);
void tsk_bitset_free(tsk_bitset_t *self);
void tsk_bitset_intersect(const tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row, tsk_bitset_t *out);
void tsk_bitset_subtract(tsk_bitset_t *self, tsk_size_t self_row,
const tsk_bitset_t *other, tsk_size_t other_row);
void tsk_bitset_union(tsk_bitset_t *self, tsk_size_t self_row, const tsk_bitset_t *other,
tsk_size_t other_row);
void tsk_bitset_set_bit(tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
bool tsk_bitset_contains(
const tsk_bitset_t *self, tsk_size_t row, const tsk_bitset_val_t bit);
tsk_size_t tsk_bitset_count(const tsk_bitset_t *self, tsk_size_t row);
void tsk_bitset_get_items(
const tsk_bitset_t *self, tsk_size_t row, tsk_id_t *items, tsk_size_t *n_items);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit/genotypes.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2025 Tskit Developers
* Copyright (c) 2016-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
/* ======================================================== *
* Variant generator
* ======================================================== */
void
tsk_variant_print_state(const tsk_variant_t *self, FILE *out)
{
tsk_size_t j;
fprintf(out, "tsk_variant state\n");
fprintf(out, "user_alleles = %lld\n", (long long) self->user_alleles);
fprintf(out, "num_alleles = %lld\n", (long long) self->num_alleles);
for (j = 0; j < self->num_alleles; j++) {
fprintf(out, "\tlen = %lld, '%.*s'\n", (long long) self->allele_lengths[j],
(int) self->allele_lengths[j], self->alleles[j]);
}
fprintf(out, "num_samples = %lld\n", (long long) self->num_samples);
}
void
tsk_vargen_print_state(const tsk_vargen_t *self, FILE *out)
{
tsk_variant_print_state(&self->variant, out);
}
/* Copy the fixed allele mapping specified by the user into local
* memory. */
static int
tsk_variant_copy_alleles(tsk_variant_t *self, const char **alleles)
{
int ret = 0;
tsk_size_t j;
size_t total_len, allele_len, offset;
self->num_alleles = self->max_alleles;
total_len = 0;
for (j = 0; j < self->num_alleles; j++) {
allele_len = strlen(alleles[j]);
self->allele_lengths[j] = (tsk_size_t) allele_len;
total_len += allele_len;
}
self->user_alleles_mem = tsk_malloc(total_len * sizeof(char *));
if (self->user_alleles_mem == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
offset = 0;
for (j = 0; j < self->num_alleles; j++) {
strcpy(self->user_alleles_mem + offset, alleles[j]);
self->alleles[j] = self->user_alleles_mem + offset;
offset += (size_t) self->allele_lengths[j];
}
out:
return ret;
}
static int
variant_init_samples_and_index_map(tsk_variant_t *self,
const tsk_treeseq_t *tree_sequence, const tsk_id_t *samples, tsk_size_t num_samples,
size_t num_samples_alloc, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_size_t j, num_nodes;
tsk_id_t u;
num_nodes = tsk_treeseq_get_num_nodes(tree_sequence);
self->alt_samples = tsk_malloc(num_samples_alloc * sizeof(*samples));
self->alt_sample_index_map
= tsk_malloc(num_nodes * sizeof(*self->alt_sample_index_map));
if (self->alt_samples == NULL || self->alt_sample_index_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(self->alt_samples, samples, num_samples * sizeof(*samples));
tsk_memset(self->alt_sample_index_map, 0xff,
num_nodes * sizeof(*self->alt_sample_index_map));
/* Create the reverse mapping */
for (j = 0; j < num_samples; j++) {
u = samples[j];
if (u < 0 || u >= (tsk_id_t) num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (self->alt_sample_index_map[u] != TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
self->alt_sample_index_map[samples[j]] = (tsk_id_t) j;
}
out:
return ret;
}
int
tsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence,
const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,
tsk_flags_t options)
{
int ret = 0;
tsk_size_t max_alleles_limit, max_alleles;
tsk_size_t num_samples_alloc;
tsk_memset(self, 0, sizeof(tsk_variant_t));
/* Set site id to NULL to indicate the variant is not decoded */
self->site.id = TSK_NULL;
self->tree_sequence = tree_sequence;
ret = tsk_tree_init(
&self->tree, tree_sequence, samples == NULL ? TSK_SAMPLE_LISTS : 0);
if (ret != 0) {
goto out;
}
if (samples != NULL) {
/* Take a copy of the samples so we don't have to manage the lifecycle*/
self->samples = tsk_malloc(num_samples * sizeof(*samples));
if (self->samples == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(self->samples, samples, num_samples * sizeof(*samples));
self->num_samples = num_samples;
}
self->options = options;
max_alleles_limit = INT32_MAX;
if (alleles == NULL) {
self->user_alleles = false;
max_alleles = 4; /* Arbitrary --- we'll rarely have more than this */
} else {
self->user_alleles = true;
/* Count the input alleles. The end is designated by the NULL sentinel. */
for (max_alleles = 0; alleles[max_alleles] != NULL; max_alleles++)
;
if (max_alleles > max_alleles_limit) {
ret = tsk_trace_error(TSK_ERR_TOO_MANY_ALLELES);
goto out;
}
if (max_alleles == 0) {
ret = tsk_trace_error(TSK_ERR_ZERO_ALLELES);
goto out;
}
}
self->max_alleles = max_alleles;
self->alleles = tsk_calloc(max_alleles, sizeof(*self->alleles));
self->allele_lengths = tsk_malloc(max_alleles * sizeof(*self->allele_lengths));
if (self->alleles == NULL || self->allele_lengths == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (self->user_alleles) {
ret = tsk_variant_copy_alleles(self, alleles);
if (ret != 0) {
goto out;
}
}
if (self->samples == NULL) {
self->num_samples = tsk_treeseq_get_num_samples(tree_sequence);
self->samples = tsk_malloc(self->num_samples * sizeof(*self->samples));
if (self->samples == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(self->samples, tsk_treeseq_get_samples(tree_sequence),
self->num_samples * sizeof(*self->samples));
self->sample_index_map = tsk_treeseq_get_sample_index_map(tree_sequence);
num_samples_alloc = self->num_samples;
} else {
num_samples_alloc = self->num_samples;
ret = variant_init_samples_and_index_map(self, tree_sequence, self->samples,
self->num_samples, (size_t) num_samples_alloc, self->options);
if (ret != 0) {
goto out;
}
self->sample_index_map = self->alt_sample_index_map;
}
/* When a list of samples is given, we use the traversal based algorithm
* which doesn't use sample list tracking in the tree */
if (self->alt_samples != NULL) {
self->traversal_stack = tsk_malloc(
tsk_treeseq_get_num_nodes(tree_sequence) * sizeof(*self->traversal_stack));
if (self->traversal_stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
self->genotypes = tsk_malloc(num_samples_alloc * sizeof(*self->genotypes));
if (self->genotypes == NULL || self->alleles == NULL
|| self->allele_lengths == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
int
tsk_vargen_init(tsk_vargen_t *self, const tsk_treeseq_t *tree_sequence,
const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,
tsk_flags_t options)
{
int ret = 0;
tsk_bug_assert(tree_sequence != NULL);
tsk_memset(self, 0, sizeof(tsk_vargen_t));
self->tree_sequence = tree_sequence;
ret = tsk_variant_init(
&self->variant, tree_sequence, samples, num_samples, alleles, options);
if (ret != 0) {
goto out;
}
ret = 0;
out:
return ret;
}
int
tsk_variant_free(tsk_variant_t *self)
{
if (self->tree_sequence != NULL) {
tsk_tree_free(&self->tree);
}
tsk_safe_free(self->genotypes);
tsk_safe_free(self->alleles);
tsk_safe_free(self->allele_lengths);
tsk_safe_free(self->user_alleles_mem);
tsk_safe_free(self->samples);
tsk_safe_free(self->alt_samples);
tsk_safe_free(self->alt_sample_index_map);
tsk_safe_free(self->traversal_stack);
return 0;
}
int
tsk_vargen_free(tsk_vargen_t *self)
{
tsk_variant_free(&self->variant);
return 0;
}
static int
tsk_variant_expand_alleles(tsk_variant_t *self)
{
int ret = 0;
void *p;
tsk_size_t hard_limit = INT32_MAX;
if (self->max_alleles == hard_limit) {
ret = tsk_trace_error(TSK_ERR_TOO_MANY_ALLELES);
goto out;
}
self->max_alleles = TSK_MIN(hard_limit, self->max_alleles * 2);
p = tsk_realloc(self->alleles, self->max_alleles * sizeof(*self->alleles));
if (p == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->alleles = p;
p = tsk_realloc(
self->allele_lengths, self->max_alleles * sizeof(*self->allele_lengths));
if (p == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->allele_lengths = p;
out:
return ret;
}
/* The following pair of functions are identical except one handles 8 bit
* genotypes and the other handles 16 bit genotypes. This is done for performance
* reasons as this is a key function and for common alleles can entail
* iterating over millions of samples. The compiler hints are included for the
* same reason.
*/
static int TSK_WARN_UNUSED
tsk_variant_update_genotypes_sample_list(
tsk_variant_t *self, tsk_id_t node, tsk_id_t derived)
{
int32_t *restrict genotypes = self->genotypes;
const tsk_id_t *restrict list_left = self->tree.left_sample;
const tsk_id_t *restrict list_right = self->tree.right_sample;
const tsk_id_t *restrict list_next = self->tree.next_sample;
tsk_id_t index, stop;
int ret = 0;
tsk_bug_assert(derived < INT32_MAX);
index = list_left[node];
if (index != TSK_NULL) {
stop = list_right[node];
while (true) {
ret += genotypes[index] == TSK_MISSING_DATA;
genotypes[index] = (int32_t) derived;
if (index == stop) {
break;
}
index = list_next[index];
}
}
return ret;
}
/* The following functions implement the genotype setting by traversing
* down the tree to the samples. We're not so worried about performance here
* because this should only be used when we have a very small number of samples,
* and so we use a visit function to avoid duplicating code.
*/
typedef int (*visit_func_t)(tsk_variant_t *, tsk_id_t, tsk_id_t);
static int TSK_WARN_UNUSED
tsk_variant_traverse(
tsk_variant_t *self, tsk_id_t node, tsk_id_t derived, visit_func_t visit)
{
int ret = 0;
tsk_id_t *restrict stack = self->traversal_stack;
const tsk_id_t *restrict left_child = self->tree.left_child;
const tsk_id_t *restrict right_sib = self->tree.right_sib;
const tsk_id_t *restrict sample_index_map = self->sample_index_map;
tsk_id_t u, v, sample_index;
int stack_top;
int no_longer_missing = 0;
stack_top = 0;
stack[0] = node;
while (stack_top >= 0) {
u = stack[stack_top];
sample_index = sample_index_map[u];
if (sample_index != TSK_NULL) {
ret = visit(self, sample_index, derived);
if (ret < 0) {
goto out;
}
no_longer_missing += ret;
}
stack_top--;
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
stack_top++;
stack[stack_top] = v;
}
}
ret = no_longer_missing;
out:
return ret;
}
static int
tsk_variant_visit(tsk_variant_t *self, tsk_id_t sample_index, tsk_id_t derived)
{
int ret = 0;
int32_t *restrict genotypes = self->genotypes;
tsk_bug_assert(derived < INT32_MAX);
tsk_bug_assert(sample_index != -1);
ret = genotypes[sample_index] == TSK_MISSING_DATA;
genotypes[sample_index] = (int32_t) derived;
return ret;
}
static int TSK_WARN_UNUSED
tsk_variant_update_genotypes_traversal(
tsk_variant_t *self, tsk_id_t node, tsk_id_t derived)
{
return tsk_variant_traverse(self, node, derived, tsk_variant_visit);
}
static tsk_size_t
tsk_variant_mark_missing(tsk_variant_t *self)
{
tsk_size_t num_missing = 0;
const tsk_id_t *restrict left_child = self->tree.left_child;
const tsk_id_t *restrict right_sib = self->tree.right_sib;
const tsk_id_t *restrict sample_index_map = self->sample_index_map;
const tsk_id_t N = self->tree.virtual_root;
int32_t *restrict genotypes = self->genotypes;
tsk_id_t root, sample_index;
for (root = left_child[N]; root != TSK_NULL; root = right_sib[root]) {
if (left_child[root] == TSK_NULL) {
sample_index = sample_index_map[root];
if (sample_index != TSK_NULL) {
genotypes[sample_index] = TSK_MISSING_DATA;
num_missing++;
}
}
}
return num_missing;
}
/* Mark missing for any requested node (sample or non-sample) that is isolated
* in the current tree, i.e., has no parent and no children at this position. */
static tsk_size_t
tsk_variant_mark_missing_any(tsk_variant_t *self)
{
tsk_size_t num_missing = 0;
int32_t *restrict genotypes = self->genotypes;
const tsk_id_t *restrict parent = self->tree.parent;
const tsk_id_t *restrict left_child = self->tree.left_child;
tsk_size_t j;
for (j = 0; j < self->num_samples; j++) {
tsk_id_t u = self->samples[j];
if (parent[u] == TSK_NULL && left_child[u] == TSK_NULL) {
genotypes[j] = TSK_MISSING_DATA;
num_missing++;
}
}
return num_missing;
}
static tsk_id_t
tsk_variant_get_allele_index(tsk_variant_t *self, const char *allele, tsk_size_t length)
{
tsk_id_t ret = -1;
tsk_size_t j;
for (j = 0; j < self->num_alleles; j++) {
if (length == self->allele_lengths[j]
&& tsk_memcmp(allele, self->alleles[j], length) == 0) {
ret = (tsk_id_t) j;
break;
}
}
return ret;
}
int
tsk_variant_decode(
tsk_variant_t *self, tsk_id_t site_id, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t allele_index;
tsk_size_t j, num_missing;
int no_longer_missing;
tsk_mutation_t mutation;
bool impute_missing = !!(self->options & TSK_ISOLATED_NOT_MISSING);
bool by_traversal = self->alt_samples != NULL;
int (*update_genotypes)(tsk_variant_t *, tsk_id_t, tsk_id_t);
tsk_size_t (*mark_missing)(tsk_variant_t *);
if (self->tree_sequence == NULL) {
ret = tsk_trace_error(TSK_ERR_VARIANT_CANT_DECODE_COPY);
goto out;
}
ret = tsk_treeseq_get_site(self->tree_sequence, site_id, &self->site);
if (ret != 0) {
goto out;
}
ret = tsk_tree_seek(&self->tree, self->site.position, 0);
if (ret != 0) {
goto out;
}
/* When we have no specified samples we need sample lists to be active
* on the tree, as indicated by the presence of left_sample */
if (!by_traversal && self->tree.left_sample == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_SAMPLE_LISTS);
goto out;
}
/* For now we use a traversal method to find genotypes when we have a
* specified set of samples, but we should provide the option to do it
* via tracked_samples in the tree also. There will be a tradeoff: if
* we only have a small number of samples, it's probably better to
* do it by traversal. For large sets of samples though, it may be
* better to use the sample list infrastructure. */
mark_missing = tsk_variant_mark_missing;
update_genotypes = tsk_variant_update_genotypes_sample_list;
if (by_traversal) {
update_genotypes = tsk_variant_update_genotypes_traversal;
/* When decoding a user-provided list of nodes (which may include
* non-samples), mark isolated nodes as missing directly by checking
* isolation status for each requested node. */
mark_missing = tsk_variant_mark_missing_any;
}
if (self->user_alleles) {
allele_index = tsk_variant_get_allele_index(
self, self->site.ancestral_state, self->site.ancestral_state_length);
if (allele_index == -1) {
ret = tsk_trace_error(TSK_ERR_ALLELE_NOT_FOUND);
goto out;
}
} else {
/* Ancestral state is always allele 0 */
self->alleles[0] = self->site.ancestral_state;
self->allele_lengths[0] = self->site.ancestral_state_length;
self->num_alleles = 1;
allele_index = 0;
}
/* The algorithm for generating the allelic state of every sample works by
* examining each mutation in order, and setting the state for all the
* samples under the mutation's node. For complex sites where there is
* more than one mutation, we depend on the ordering of mutations being
* correct. Specifically, any mutation that is above another mutation in
* the tree must be visited first. This is enforced using the mutation.parent
* field, where we require that a mutation's parent must appear before it
* in the list of mutations. This guarantees the correctness of this algorithm.
*/
for (j = 0; j < self->num_samples; j++) {
self->genotypes[j] = (int32_t) allele_index;
}
/* We mark missing data *before* updating the genotypes because
* mutations directly over samples should not be missing */
num_missing = 0;
if (!impute_missing) {
num_missing = mark_missing(self);
}
for (j = 0; j < self->site.mutations_length; j++) {
mutation = self->site.mutations[j];
/* Compute the allele index for this derived state value. */
allele_index = tsk_variant_get_allele_index(
self, mutation.derived_state, mutation.derived_state_length);
if (allele_index == -1) {
if (self->user_alleles) {
ret = tsk_trace_error(TSK_ERR_ALLELE_NOT_FOUND);
goto out;
}
if (self->num_alleles == self->max_alleles) {
ret = tsk_variant_expand_alleles(self);
if (ret != 0) {
goto out;
}
}
allele_index = (tsk_id_t) self->num_alleles;
self->alleles[allele_index] = mutation.derived_state;
self->allele_lengths[allele_index] = mutation.derived_state_length;
self->num_alleles++;
}
no_longer_missing = update_genotypes(self, mutation.node, allele_index);
if (no_longer_missing < 0) {
ret = no_longer_missing;
goto out;
}
/* Update genotypes returns the number of missing values marked
* not-missing */
num_missing -= (tsk_size_t) no_longer_missing;
}
self->has_missing_data = num_missing > 0;
out:
return ret;
}
int
tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other)
{
int ret = 0;
tsk_size_t total_len, offset, j;
/* Copy everything */
tsk_memcpy(other, self, sizeof(*other));
/* Tree sequence left as NULL and zero'd tree is a way of indicating this variant is
* fixed and cannot be further decoded. */
other->tree_sequence = NULL;
tsk_memset(&other->tree, sizeof(other->tree), 0);
other->traversal_stack = NULL;
other->samples = NULL;
other->sample_index_map = NULL;
other->alt_samples = NULL;
other->alt_sample_index_map = NULL;
other->user_alleles_mem = NULL;
total_len = 0;
for (j = 0; j < self->num_alleles; j++) {
total_len += self->allele_lengths[j];
}
other->samples = tsk_malloc(other->num_samples * sizeof(*other->samples));
other->genotypes = tsk_malloc(other->num_samples * sizeof(*other->genotypes));
other->user_alleles_mem = tsk_malloc(total_len * sizeof(*other->user_alleles_mem));
other->allele_lengths
= tsk_malloc(other->num_alleles * sizeof(*other->allele_lengths));
other->alleles = tsk_malloc(other->num_alleles * sizeof(*other->alleles));
if (other->samples == NULL || other->genotypes == NULL
|| other->user_alleles_mem == NULL || other->allele_lengths == NULL
|| other->alleles == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(
other->samples, self->samples, other->num_samples * sizeof(*other->samples));
tsk_memcpy(other->genotypes, self->genotypes,
other->num_samples * sizeof(*other->genotypes));
tsk_memcpy(other->allele_lengths, self->allele_lengths,
other->num_alleles * sizeof(*other->allele_lengths));
offset = 0;
for (j = 0; j < other->num_alleles; j++) {
tsk_memcpy(other->user_alleles_mem + offset, self->alleles[j],
other->allele_lengths[j] * sizeof(*other->user_alleles_mem));
other->alleles[j] = other->user_alleles_mem + offset;
offset += other->allele_lengths[j];
}
out:
return ret;
}
int
tsk_vargen_next(tsk_vargen_t *self, tsk_variant_t **variant)
{
int ret = 0;
if ((tsk_size_t) self->site_index < tsk_treeseq_get_num_sites(self->tree_sequence)) {
ret = tsk_variant_decode(&self->variant, self->site_index, 0);
if (ret != 0) {
goto out;
}
self->site_index++;
*variant = &self->variant;
ret = 1;
}
out:
return ret;
}
static int
tsk_treeseq_decode_alignments_overlay_missing(const tsk_treeseq_t *self,
const tsk_id_t *nodes, tsk_size_t num_nodes, double left, double right,
char missing_data_character, tsk_size_t L, char *alignments_out)
{
int ret = 0;
tsk_tree_t tree;
tsk_size_t i, seg_left, seg_right;
char *row = NULL;
tsk_id_t u;
tsk_memset(&tree, 0, sizeof(tree));
ret = tsk_tree_init(&tree, self, 0);
if (ret != 0) {
goto out;
}
ret = tsk_tree_seek(&tree, left, 0);
if (ret != 0) {
goto out;
}
while (tree.index != -1 && tree.interval.left < right) {
seg_left = TSK_MAX((tsk_size_t) tree.interval.left, (tsk_size_t) left);
seg_right = TSK_MIN((tsk_size_t) tree.interval.right, (tsk_size_t) right);
if (seg_right > seg_left) {
for (i = 0; i < num_nodes; i++) {
u = nodes[i];
if (tree.parent[u] == TSK_NULL && tree.left_child[u] == TSK_NULL) {
row = alignments_out + i * L;
/* memset takes an `int`, `missing_data_character` is a `char` which
* can be signed or unsigned depending on the platform, so we need to
* cast. Some tools/compilers will warn if we just cast
* to `unsigned char` and leave the cast to `int` as implicit, hence
* the double cast. */
tsk_memset(row + (seg_left - (tsk_size_t) left),
(int) (unsigned char) missing_data_character,
seg_right - seg_left);
}
}
}
ret = tsk_tree_next(&tree);
if (ret < 0) {
goto out;
}
}
/* On success we should return 0, not TSK_TREE_OK from the last tsk_tree_next */
ret = 0;
out:
tsk_tree_free(&tree);
return ret;
}
static int
tsk_treeseq_decode_alignments_overlay_sites(const tsk_treeseq_t *self,
const tsk_id_t *nodes, tsk_size_t num_nodes, double left, double right,
char missing_data_character, tsk_size_t L, char *alignments_out, tsk_flags_t options)
{
int ret = 0;
tsk_variant_t var;
tsk_id_t site_id;
tsk_site_t site;
char *allele_byte = NULL;
tsk_size_t allele_cap = 0;
tsk_size_t i, j;
char *row = NULL;
int32_t g;
char c;
char *tmp = NULL;
tsk_memset(&var, 0, sizeof(var));
ret = tsk_variant_init(&var, self, nodes, num_nodes, NULL, options);
if (ret != 0) {
goto out;
}
for (site_id = 0; site_id < (tsk_id_t) tsk_treeseq_get_num_sites(self); site_id++) {
ret = tsk_treeseq_get_site(self, site_id, &site);
if (ret != 0) {
goto out;
}
if (site.position < left) {
continue;
}
if (site.position >= right) {
break;
}
ret = tsk_variant_decode(&var, site_id, 0);
if (ret != 0) {
goto out;
}
if (var.num_alleles > 0) {
if (var.num_alleles > allele_cap) {
tmp = tsk_realloc(allele_byte, var.num_alleles * sizeof(*allele_byte));
if (tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
allele_byte = tmp;
allele_cap = var.num_alleles;
}
for (j = 0; j < var.num_alleles; j++) {
if (var.allele_lengths[j] != 1) {
ret = tsk_trace_error(TSK_ERR_BAD_ALLELE_LENGTH);
goto out;
}
allele_byte[j] = var.alleles[j][0];
if (allele_byte[j] == missing_data_character) {
ret = tsk_trace_error(TSK_ERR_MISSING_CHAR_COLLISION);
goto out;
}
}
for (i = 0; i < num_nodes; i++) {
row = alignments_out + i * L;
g = var.genotypes[i];
c = missing_data_character;
if (g != TSK_MISSING_DATA) {
tsk_bug_assert(g >= 0);
tsk_bug_assert((tsk_size_t) g < var.num_alleles);
c = allele_byte[g];
}
row[((tsk_size_t) site.position) - (tsk_size_t) left] = (char) c;
}
}
}
out:
tsk_safe_free(allele_byte);
tsk_variant_free(&var);
return ret;
}
/* NOTE: We usually keep functions with a tsk_treeseq_t signature in trees.c.
* tsk_treeseq_decode_alignments is implemented here instead because it
* depends directly on tsk_variant_t and the genotype/allele machinery in
* this file (and thus on genotypes.h). This slightly breaks that layering
* convention but keeps the implementation close to the variant code. */
int
tsk_treeseq_decode_alignments(const tsk_treeseq_t *self, const char *ref_seq,
tsk_size_t ref_seq_length, const tsk_id_t *nodes, tsk_size_t num_nodes, double left,
double right, char missing_data_character, char *alignments_out, tsk_flags_t options)
{
int ret = 0;
tsk_size_t i, L;
char *row = NULL;
if (!tsk_treeseq_get_discrete_genome(self)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (ref_seq == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (ref_seq_length != (tsk_size_t) tsk_treeseq_get_sequence_length(self)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (trunc(left) != left || trunc(right) != right) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (left < 0 || right > tsk_treeseq_get_sequence_length(self)
|| (tsk_size_t) left >= (tsk_size_t) right) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
L = (tsk_size_t) right - (tsk_size_t) left;
if (num_nodes == 0) {
return 0;
}
if (nodes == NULL || alignments_out == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
for (i = 0; i < num_nodes; i++) {
if (nodes[i] < 0 || nodes[i] >= (tsk_id_t) tsk_treeseq_get_num_nodes(self)) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
}
/* Fill rows with the reference slice */
for (i = 0; i < num_nodes; i++) {
row = alignments_out + i * L;
tsk_memcpy(row, ref_seq + (tsk_size_t) left, L);
}
if (!(options & TSK_ISOLATED_NOT_MISSING)) {
ret = tsk_treeseq_decode_alignments_overlay_missing(self, nodes, num_nodes, left,
right, missing_data_character, L, alignments_out);
if (ret != 0) {
goto out;
}
}
ret = tsk_treeseq_decode_alignments_overlay_sites(self, nodes, num_nodes, left,
right, missing_data_character, L, alignments_out, options);
if (ret != 0) {
goto out;
}
out:
return ret;
}
================================================
FILE: c/tskit/genotypes.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2022 Tskit Developers
* Copyright (c) 2016-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSK_GENOTYPES_H
#define TSK_GENOTYPES_H
#ifdef __cplusplus
extern "C" {
#endif
#include
#define TSK_ISOLATED_NOT_MISSING (1 << 1)
/**
@brief A variant at a specific site.
@rst
Used to generate the genotypes for a given set of samples at a given
site.
@endrst
*/
typedef struct {
/** @brief Unowned reference to the tree sequence of the variant */
const tsk_treeseq_t *tree_sequence;
/** @brief The site this variant is currently decoded at*/
tsk_site_t site;
tsk_tree_t tree;
/** @brief Array of allele strings that the genotypes of the variant refer to
* These are not NULL terminated - use `allele_lengths` for example:.
* `printf("%.*s", (int) var->allele_lengths[j], var->alleles[j]);`
*/
const char **alleles;
/** @brief Lengths of the allele strings */
tsk_size_t *allele_lengths;
/** @brief Length of the allele array */
tsk_size_t num_alleles;
tsk_size_t max_alleles;
/** @brief If True the genotypes of isolated nodes have been decoded to the "missing"
* genotype. If False they are set to the ancestral state (in the absence of
* mutations above them)*/
bool has_missing_data;
/** @brief Array of genotypes for the current site */
int32_t *genotypes;
/** @brief Number of samples */
tsk_size_t num_samples;
/** @brief Array of sample ids used*/
tsk_id_t *samples;
const tsk_id_t *sample_index_map;
bool user_alleles;
char *user_alleles_mem;
tsk_id_t *traversal_stack;
tsk_flags_t options;
tsk_id_t *alt_samples;
tsk_id_t *alt_sample_index_map;
} tsk_variant_t;
/* All vargen related structs and methods were deprecated in C API v1.0 */
typedef struct {
const tsk_treeseq_t *tree_sequence;
tsk_id_t site_index;
tsk_variant_t variant;
} tsk_vargen_t;
/**
@defgroup VARIANT_API_GROUP Variant API for obtaining genotypes.
@{
*/
/**
@brief Initialises the variant by allocating the internal memory
@rst
This must be called before any operations are performed on the variant.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_variant_t object.
@param tree_sequence A pointer to the tree sequence from which this variant
will decode genotypes. No copy is taken, so this tree sequence must persist
for the lifetime of the variant.
@param samples Optional. Either `NULL` or an array of node ids of the samples that are to
have their genotypes decoded. A copy of this array will be taken by the variant. If
`NULL` then the samples from the tree sequence will be used.
@param num_samples The number of ids in the samples array, ignored if `samples` is `NULL`
@param alleles Optional. Either ``NULL`` or an array of string alleles with a terminal
``NULL`` sentinel value.
If specified, the genotypes will be decoded to match the index in this allele array.
If ``NULL`` then alleles will be automatically determined from the mutations encountered.
@param options Variant options. Either ``0`` or ``TSK_ISOLATED_NOT_MISSING`` which
if specified indicates that isolated sample nodes should not be decoded as the "missing"
state but as the ancestral state (or the state of any mutation above them).
@return Return 0 on success or a negative value on failure.
*/
int tsk_variant_init(tsk_variant_t *self, const tsk_treeseq_t *tree_sequence,
const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,
tsk_flags_t options);
/**
@brief Copies the state of this variant to another variant
@rst
Copies the site, genotypes and alleles from this variant to another. Note that
the other variant should be uninitialised as this method does not free any
memory that the other variant owns. After copying `other` is frozen and
this restricts it from being further decoded at any site. `self` remains unchanged.
@endrst
@param self A pointer to an initialised and decoded tsk_variant_t object.
@param other A pointer to an uninitialised tsk_variant_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_variant_restricted_copy(const tsk_variant_t *self, tsk_variant_t *other);
/**
@brief Decode the genotypes at the given site, storing them in this variant.
@rst
Decodes the genotypes for this variant's samples, indexed to this variant's alleles,
at the specified site.
This method is most efficient at decoding sites in-order, either forwards or backwards
along the tree sequence. Resulting genotypes are stored in the ``genotypes`` member of
this variant.
@endrst
@param self A pointer to an initialised tsk_variant_t object.
@param site_id A valid site id for the tree sequence of this variant.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of `tskit`.
@return Return 0 on success or a negative value on failure.
*/
int tsk_variant_decode(tsk_variant_t *self, tsk_id_t site_id, tsk_flags_t options);
/**
@brief Free the internal memory for the specified variant.
@param self A pointer to an initialised tsk_variant_t object.
@return Always returns 0.
*/
int tsk_variant_free(tsk_variant_t *self);
/**
@brief Print out the state of this variant to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_variant_t object.
@param out The stream to write the summary to.
*/
void tsk_variant_print_state(const tsk_variant_t *self, FILE *out);
/** @} */
/* Deprecated vargen methods (since C API v1.0) */
int tsk_vargen_init(tsk_vargen_t *self, const tsk_treeseq_t *tree_sequence,
const tsk_id_t *samples, tsk_size_t num_samples, const char **alleles,
tsk_flags_t options);
int tsk_vargen_next(tsk_vargen_t *self, tsk_variant_t **variant);
int tsk_vargen_free(tsk_vargen_t *self);
void tsk_vargen_print_state(const tsk_vargen_t *self, FILE *out);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit/haplotype_matching.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2025 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
#include
#define MAX_PARSIMONY_WORDS 256
const char *_zero_one_alleles[] = { "0", "1", NULL };
const char *_acgt_alleles[] = { "A", "C", "G", "T", NULL };
static int
cmp_double(const void *a, const void *b)
{
const double *ia = (const double *) a;
const double *ib = (const double *) b;
return (*ia > *ib) - (*ia < *ib);
}
static int
cmp_argsort(const void *a, const void *b)
{
const tsk_argsort_t *ia = (const tsk_argsort_t *) a;
const tsk_argsort_t *ib = (const tsk_argsort_t *) b;
int ret = (ia->value > ib->value) - (ia->value < ib->value);
/* Break any ties using the index to ensure consistency */
if (ret == 0) {
ret = (ia->index > ib->index) - (ia->index < ib->index);
}
return ret;
}
static void
tsk_ls_hmm_check_state(tsk_ls_hmm_t *self)
{
tsk_id_t *T_index = self->transition_index;
tsk_value_transition_t *T = self->transitions;
tsk_id_t j;
for (j = 0; j < (tsk_id_t) self->num_transitions; j++) {
if (T[j].tree_node != TSK_NULL) {
tsk_bug_assert(T_index[T[j].tree_node] == j);
}
}
/* tsk_bug_assert(self->num_transitions <= self->num_samples); */
if (self->num_transitions > 0) {
for (j = 0; j < (tsk_id_t) self->num_nodes; j++) {
if (T_index[j] != TSK_NULL) {
tsk_bug_assert(T[T_index[j]].tree_node == j);
}
tsk_bug_assert(self->tree.parent[j] == self->parent[j]);
}
}
}
void
tsk_ls_hmm_print_state(tsk_ls_hmm_t *self, FILE *out)
{
tsk_size_t j, l;
fprintf(out, "tree_sequence = %p\n", (void *) self->tree_sequence);
fprintf(out, "num_sites = %lld\n", (long long) self->num_sites);
fprintf(out, "num_samples = %lld\n", (long long) self->num_samples);
fprintf(out, "num_values = %lld\n", (long long) self->num_values);
fprintf(out, "max_values = %lld\n", (long long) self->max_values);
fprintf(out, "num_optimal_value_set_words = %lld\n",
(long long) self->num_optimal_value_set_words);
fprintf(out, "sites::\n");
for (l = 0; l < self->num_sites; l++) {
fprintf(out, "%lld\t%lld\t[", (long long) l, (long long) self->num_alleles[l]);
for (j = 0; j < self->num_alleles[l]; j++) {
fprintf(out, "%s,", self->alleles[l][j]);
}
fprintf(out, "]\n");
}
fprintf(out, "transitions::%lld\n", (long long) self->num_transitions);
for (j = 0; j < self->num_transitions; j++) {
fprintf(out, "tree_node=%lld\tvalue=%.14f\tvalue_index=%lld\n",
(long long) self->transitions[j].tree_node, self->transitions[j].value,
(long long) self->transitions[j].value_index);
}
if (self->num_transitions > 0) {
fprintf(out, "tree::%lld\n", (long long) self->num_nodes);
for (j = 0; j < self->num_nodes; j++) {
fprintf(out, "%lld\tparent=%lld\ttransition=%lld\n", (long long) j,
(long long) self->parent[j], (long long) self->transition_index[j]);
}
}
tsk_ls_hmm_check_state(self);
}
int TSK_WARN_UNUSED
tsk_ls_hmm_init(tsk_ls_hmm_t *self, tsk_treeseq_t *tree_sequence,
double *recombination_rate, double *mutation_rate, tsk_flags_t options)
{
int ret = TSK_ERR_GENERIC;
tsk_size_t l;
tsk_memset(self, 0, sizeof(tsk_ls_hmm_t));
self->tree_sequence = tree_sequence;
self->precision = 6; /* Seems like a safe value, but probably not ideal for perf */
self->num_sites = tsk_treeseq_get_num_sites(tree_sequence);
self->num_samples = tsk_treeseq_get_num_samples(tree_sequence);
self->num_alleles = tsk_malloc(self->num_sites * sizeof(*self->num_alleles));
self->num_nodes = tsk_treeseq_get_num_nodes(tree_sequence);
self->parent = tsk_malloc(self->num_nodes * sizeof(*self->parent));
self->allelic_state = tsk_malloc(self->num_nodes * sizeof(*self->allelic_state));
self->transition_index
= tsk_malloc(self->num_nodes * sizeof(*self->transition_index));
self->transition_stack
= tsk_malloc(self->num_nodes * sizeof(*self->transition_stack));
/* We can't have more than 2 * num_samples transitions, so we use this as the
* upper bound. Because of the implementation, we'll also have to worry about
* the extra mutations at the first site, which in worst case involves all
* mutations. We can definitely save some memory here if we want to.*/
self->max_transitions
= 2 * self->num_samples + tsk_treeseq_get_num_mutations(tree_sequence);
/* FIXME Arbitrarily doubling this after hitting problems */
self->max_transitions *= 2;
self->transitions = tsk_malloc(self->max_transitions * sizeof(*self->transitions));
self->transitions_copy
= tsk_malloc(self->max_transitions * sizeof(*self->transitions));
self->num_transition_samples
= tsk_malloc(self->max_transitions * sizeof(*self->num_transition_samples));
self->transition_parent
= tsk_malloc(self->max_transitions * sizeof(*self->transition_parent));
self->transition_time_order
= tsk_malloc(self->max_transitions * sizeof(*self->transition_time_order));
self->values = tsk_malloc(self->max_transitions * sizeof(*self->values));
self->recombination_rate
= tsk_malloc(self->num_sites * sizeof(*self->recombination_rate));
self->mutation_rate = tsk_malloc(self->num_sites * sizeof(*self->mutation_rate));
self->alleles = tsk_calloc(self->num_sites, sizeof(*self->alleles));
if (self->num_alleles == NULL || self->parent == NULL || self->allelic_state == NULL
|| self->transition_index == NULL || self->transition_stack == NULL
|| self->transitions == NULL || self->transitions_copy == NULL
|| self->num_transition_samples == NULL || self->transition_parent == NULL
|| self->transition_time_order == NULL || self->values == NULL
|| self->recombination_rate == NULL || self->mutation_rate == NULL
|| self->alleles == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (l = 0; l < self->num_sites; l++) {
/* TODO check these inputs */
self->recombination_rate[l] = recombination_rate[l];
self->mutation_rate[l] = mutation_rate[l];
if (options & TSK_ALLELES_ACGT) {
self->num_alleles[l] = 4;
self->alleles[l] = _acgt_alleles;
} else {
/* Default to the 0/1 alleles */
self->num_alleles[l] = 2;
self->alleles[l] = _zero_one_alleles;
}
}
ret = tsk_tree_init(&self->tree, self->tree_sequence, 0);
if (ret != 0) {
goto out;
}
self->num_values = 0;
self->max_values = 0;
/* Keep this as a struct variable so that we can test overflow, but this
* should never be set to more than MAX_PARSIMONY_WORDS as we're doing
* a bunch of stack allocations based on this. */
self->max_parsimony_words = MAX_PARSIMONY_WORDS;
ret = 0;
out:
return ret;
}
int
tsk_ls_hmm_set_precision(tsk_ls_hmm_t *self, unsigned int precision)
{
self->precision = precision;
return 0;
}
int
tsk_ls_hmm_free(tsk_ls_hmm_t *self)
{
tsk_tree_free(&self->tree);
tsk_safe_free(self->recombination_rate);
tsk_safe_free(self->mutation_rate);
tsk_safe_free(self->recombination_rate);
tsk_safe_free(self->alleles);
tsk_safe_free(self->num_alleles);
tsk_safe_free(self->parent);
tsk_safe_free(self->allelic_state);
tsk_safe_free(self->transition_index);
tsk_safe_free(self->transition_stack);
tsk_safe_free(self->transitions);
tsk_safe_free(self->transitions_copy);
tsk_safe_free(self->transition_time_order);
tsk_safe_free(self->values);
tsk_safe_free(self->num_transition_samples);
tsk_safe_free(self->transition_parent);
tsk_safe_free(self->optimal_value_sets);
return 0;
}
static int
tsk_ls_hmm_reset(tsk_ls_hmm_t *self, double value)
{
int ret = 0;
tsk_size_t j;
tsk_id_t u;
const tsk_id_t *samples;
tsk_size_t N = self->num_nodes;
tsk_memset(self->parent, 0xff, N * sizeof(*self->parent));
tsk_memset(self->transition_index, 0xff, N * sizeof(*self->transition_index));
tsk_memset(self->allelic_state, 0xff, N * sizeof(*self->allelic_state));
tsk_memset(self->transitions, 0, self->max_transitions * sizeof(*self->transitions));
tsk_memset(self->num_transition_samples, 0,
self->max_transitions * sizeof(*self->num_transition_samples));
tsk_memset(self->transition_parent, 0xff,
self->max_transitions * sizeof(*self->transition_parent));
samples = tsk_treeseq_get_samples(self->tree_sequence);
for (j = 0; j < self->num_samples; j++) {
u = samples[j];
self->transitions[j].tree_node = u;
self->transitions[j].value = value;
self->transition_index[u] = (tsk_id_t) j;
}
self->num_transitions = self->num_samples;
return ret;
}
/* After we have moved on to a new tree we can have transitions still associated
* with the old roots, which are now disconnected. Remove. */
static int
tsk_ls_hmm_remove_dead_roots(tsk_ls_hmm_t *self)
{
tsk_id_t *restrict T_index = self->transition_index;
tsk_value_transition_t *restrict T = self->transitions;
const tsk_id_t *restrict right_sib = self->tree.right_sib;
const tsk_id_t left_root = tsk_tree_get_left_root(&self->tree);
const tsk_id_t *restrict parent = self->parent;
tsk_id_t root, u;
tsk_size_t j;
const tsk_id_t root_marker = -2;
for (root = left_root; root != TSK_NULL; root = right_sib[root]) {
if (T_index[root] != TSK_NULL) {
/* Use the value_index slot as a marker. We don't use this between
* iterations, so it's safe to appropriate here */
T[T_index[root]].value_index = root_marker;
}
}
for (j = 0; j < self->num_transitions; j++) {
u = T[j].tree_node;
if (u != TSK_NULL) {
if (parent[u] == TSK_NULL && T[j].value_index != root_marker) {
T_index[u] = TSK_NULL;
T[j].tree_node = TSK_NULL;
}
T[j].value_index = -1;
}
}
return 0;
}
static int
tsk_ls_hmm_update_tree(tsk_ls_hmm_t *self, int direction)
{
int ret = 0;
tsk_id_t *restrict parent = self->parent;
tsk_id_t *restrict T_index = self->transition_index;
const tsk_id_t *restrict edges_child = self->tree_sequence->tables->edges.child;
const tsk_id_t *restrict edges_parent = self->tree_sequence->tables->edges.parent;
tsk_value_transition_t *restrict T = self->transitions;
tsk_id_t u, c, p, j, e;
tsk_value_transition_t *vt;
tsk_tree_position_t tree_pos;
tree_pos = self->tree.tree_pos;
for (j = tree_pos.out.start; j != tree_pos.out.stop; j += direction) {
e = tree_pos.out.order[j];
c = edges_child[e];
u = c;
if (T_index[u] == TSK_NULL) {
/* Ensure the subtree we're detaching has a transition at the root */
while (T_index[u] == TSK_NULL) {
u = parent[u];
tsk_bug_assert(u != TSK_NULL);
}
tsk_bug_assert(self->num_transitions < self->max_transitions);
T_index[c] = (tsk_id_t) self->num_transitions;
T[self->num_transitions].tree_node = c;
T[self->num_transitions].value = T[T_index[u]].value;
self->num_transitions++;
}
parent[c] = TSK_NULL;
}
for (j = tree_pos.in.start; j != tree_pos.in.stop; j += direction) {
e = tree_pos.in.order[j];
c = edges_child[e];
p = edges_parent[e];
parent[c] = p;
u = p;
if (parent[p] == TSK_NULL) {
/* Grafting onto a new root. */
if (T_index[p] == TSK_NULL) {
T_index[p] = (tsk_id_t) self->num_transitions;
tsk_bug_assert(self->num_transitions < self->max_transitions);
T[self->num_transitions].tree_node = p;
T[self->num_transitions].value = T[T_index[c]].value;
self->num_transitions++;
}
} else {
/* Grafting into an existing subtree. */
while (T_index[u] == TSK_NULL) {
u = parent[u];
}
tsk_bug_assert(u != TSK_NULL);
}
tsk_bug_assert(T_index[u] != -1 && T_index[c] != -1);
if (T[T_index[u]].value == T[T_index[c]].value) {
vt = &T[T_index[c]];
/* Mark the value transition as unusued */
vt->value = -1;
vt->tree_node = TSK_NULL;
T_index[c] = TSK_NULL;
}
}
ret = tsk_ls_hmm_remove_dead_roots(self);
return ret;
}
static int
tsk_ls_hmm_get_allele_index(tsk_ls_hmm_t *self, tsk_id_t site, const char *allele_state,
const tsk_size_t allele_length)
{
/* Note we're not doing tsk_trace_error here because it would require changing
* the logic of the function. Could be done easily enough, though */
int ret = TSK_ERR_ALLELE_NOT_FOUND;
const char **alleles = self->alleles[site];
const tsk_id_t num_alleles = (tsk_id_t) self->num_alleles[site];
tsk_id_t j;
for (j = 0; j < num_alleles; j++) {
if (strlen(alleles[j]) != allele_length) {
break;
}
if (strncmp(alleles[j], allele_state, (size_t) allele_length) == 0) {
ret = (int) j;
break;
}
}
return ret;
}
static int
tsk_ls_hmm_update_probabilities(
tsk_ls_hmm_t *self, const tsk_site_t *site, int32_t haplotype_state)
{
int ret = 0;
tsk_id_t root;
tsk_tree_t *tree = &self->tree;
tsk_id_t *restrict parent = self->parent;
tsk_id_t *restrict T_index = self->transition_index;
tsk_value_transition_t *restrict T = self->transitions;
int32_t *restrict allelic_state = self->allelic_state;
const tsk_id_t left_root = tsk_tree_get_left_root(tree);
tsk_mutation_t mut;
tsk_id_t j, u, v;
double x;
bool match;
/* Set the allelic states */
ret = tsk_ls_hmm_get_allele_index(
self, site->id, site->ancestral_state, site->ancestral_state_length);
if (ret < 0) {
goto out;
}
for (root = left_root; root != TSK_NULL; root = tree->right_sib[root]) {
allelic_state[root] = (int32_t) ret;
}
for (j = 0; j < (tsk_id_t) site->mutations_length; j++) {
mut = site->mutations[j];
ret = tsk_ls_hmm_get_allele_index(
self, site->id, mut.derived_state, mut.derived_state_length);
if (ret < 0) {
goto out;
}
u = mut.node;
allelic_state[u] = (int32_t) ret;
if (T_index[u] == TSK_NULL) {
while (T_index[u] == TSK_NULL) {
u = parent[u];
}
tsk_bug_assert(self->num_transitions < self->max_transitions);
T_index[mut.node] = (tsk_id_t) self->num_transitions;
T[self->num_transitions].tree_node = mut.node;
T[self->num_transitions].value = T[T_index[u]].value;
self->num_transitions++;
}
}
for (j = 0; j < (tsk_id_t) self->num_transitions; j++) {
u = T[j].tree_node;
if (u != TSK_NULL) {
/* Get the allelic_state at u. */
v = u;
while (allelic_state[v] == TSK_MISSING_DATA) {
v = parent[v];
tsk_bug_assert(v != -1);
}
match = haplotype_state == TSK_MISSING_DATA
|| haplotype_state == allelic_state[v];
ret = self->next_probability(self, site->id, T[j].value, match, u, &x);
if (ret != 0) {
goto out;
}
T[j].value = x;
}
}
/* Unset the allelic states */
for (root = left_root; root != TSK_NULL; root = tree->right_sib[root]) {
allelic_state[root] = TSK_MISSING_DATA;
}
for (j = 0; j < (tsk_id_t) site->mutations_length; j++) {
mut = site->mutations[j];
allelic_state[mut.node] = TSK_MISSING_DATA;
}
ret = 0;
out:
return ret;
}
static int
tsk_ls_hmm_discretise_values(tsk_ls_hmm_t *self)
{
int ret = 0;
tsk_value_transition_t *T = self->transitions;
double *values = self->values;
tsk_size_t j, k, num_values;
num_values = 0;
for (j = 0; j < self->num_transitions; j++) {
if (T[j].tree_node != TSK_NULL) {
values[num_values] = T[j].value;
num_values++;
}
}
tsk_bug_assert(num_values > 0);
qsort(values, (size_t) num_values, sizeof(double), cmp_double);
k = 0;
for (j = 1; j < num_values; j++) {
if (values[j] != values[k]) {
k++;
values[k] = values[j];
}
}
num_values = k + 1;
self->num_values = num_values;
for (j = 0; j < self->num_transitions; j++) {
if (T[j].tree_node != TSK_NULL) {
T[j].value_index
= (tsk_id_t) tsk_search_sorted(values, num_values, T[j].value);
tsk_bug_assert(T[j].value == self->values[T[j].value_index]);
}
}
return ret;
}
/*
* TODO We also have these function in tree.c where they're used in the
* parsimony calculations (which are slightly different). It would be good to bring
* these together, or at least avoid having the same function in two
* files. Keeping it as it is for now so that it can be inlined, since
* it's perf-sensitive. */
static inline tsk_id_t
get_smallest_set_bit(uint64_t v)
{
/* This is an inefficient implementation, there are several better
* approaches. On GCC we can use
* return (uint8_t) (__builtin_ffsll((long long) v) - 1);
*/
uint64_t t = 1;
tsk_id_t r = 0;
assert(v != 0);
while ((v & t) == 0) {
t <<= 1;
r++;
}
return r;
}
static inline uint64_t
set_bit(uint64_t value, uint8_t bit)
{
return value | (1ULL << bit);
}
static inline bool
bit_is_set(uint64_t value, uint8_t bit)
{
return (value & (1ULL << bit)) != 0;
}
static inline tsk_id_t
get_smallest_element(const uint64_t *restrict A, tsk_size_t u, tsk_size_t num_words)
{
tsk_size_t base = u * num_words;
const uint64_t *restrict a = A + base;
tsk_id_t j = 0;
while (a[j] == 0) {
j++;
tsk_bug_assert(j < (tsk_id_t) num_words);
}
return j * 64 + get_smallest_set_bit(a[j]);
}
/* static variables are zero-initialised by default. */
static const uint64_t zero_block[MAX_PARSIMONY_WORDS];
static inline bool
all_zero(const uint64_t *restrict A, tsk_id_t u, tsk_size_t num_words)
{
if (num_words == 1) {
return A[u] == 0;
} else {
return tsk_memcmp(
zero_block, A + (tsk_size_t) u * num_words, num_words * sizeof(*A))
== 0;
}
}
static inline bool
element_in(
const uint64_t *restrict A, tsk_id_t u, const tsk_id_t state, tsk_size_t num_words)
{
tsk_size_t index = ((tsk_size_t) u) * num_words + (tsk_size_t) (state / 64);
return (A[index] & (1ULL << (state % 64))) != 0;
}
static inline void
set_optimal_value(
uint64_t *restrict A, tsk_id_t u, const tsk_size_t num_words, tsk_id_t state)
{
tsk_size_t index = ((tsk_size_t) u) * num_words + (tsk_size_t) (state / 64);
tsk_bug_assert(((tsk_size_t) state) / 64 < num_words);
A[index] |= 1ULL << (state % 64);
}
/* TODO the implementation here isn't particularly optimal and the way things
* were organised was really driven by the old Fitch parsimony algorithm
* (which only worked on binary trees. In particular, we should be working
* word-by-word where possible rather than iterating by values like we do here.
* Needs to be reworked when we're documenting/writing up this algorithm.
*/
static void
compute_optimal_value_1(uint64_t *restrict A, const tsk_id_t *restrict left_child,
const tsk_id_t *restrict right_sib, const tsk_id_t u, const tsk_id_t parent_state,
const tsk_size_t num_values)
{
tsk_id_t v;
uint64_t child;
tsk_size_t value_count[64], max_value_count;
uint8_t j;
assert(num_values < 64);
tsk_memset(value_count, 0, num_values * sizeof(*value_count));
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
child = A[v];
/* If the set for a given child is empty, then we know it inherits
* directly from the parent state and must be a singleton set. */
if (child == 0) {
child = 1ULL << parent_state;
}
for (j = 0; j < num_values; j++) {
value_count[j] += bit_is_set(child, j);
}
}
max_value_count = 0;
for (j = 0; j < num_values; j++) {
max_value_count = TSK_MAX(max_value_count, value_count[j]);
}
A[u] = 0;
for (j = 0; j < num_values; j++) {
if (value_count[j] == max_value_count) {
A[u] = set_bit(A[u], j);
}
}
}
static void
compute_optimal_value_general(uint64_t *restrict A, const tsk_id_t *restrict left_child,
const tsk_id_t *restrict right_sib, const tsk_id_t u, const tsk_id_t parent_state,
const tsk_size_t num_values, const tsk_size_t num_words)
{
tsk_id_t v;
uint64_t child[MAX_PARSIMONY_WORDS];
uint64_t *Au;
tsk_size_t base, word, bit;
bool child_all_zero;
const tsk_id_t state_index = parent_state / 64;
const uint64_t state_word = 1ULL << (parent_state % 64);
tsk_size_t value_count[64 * MAX_PARSIMONY_WORDS], max_value_count;
tsk_size_t j;
tsk_bug_assert(num_values < 64 * MAX_PARSIMONY_WORDS);
tsk_bug_assert(num_words <= MAX_PARSIMONY_WORDS);
for (j = 0; j < num_values; j++) {
value_count[j] = 0;
}
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
child_all_zero = true;
base = ((tsk_size_t) v) * num_words;
for (word = 0; word < num_words; word++) {
child[word] = A[base + word];
child_all_zero = child_all_zero && (child[word] == 0);
}
/* If the set for a given child is empty, then we know it inherits
* directly from the parent state and must be a singleton set. */
if (child_all_zero) {
child[state_index] = state_word;
}
for (j = 0; j < num_values; j++) {
word = j / 64;
bit = j % 64;
assert(word < num_words);
value_count[j] += bit_is_set(child[word], (uint8_t) bit);
}
}
max_value_count = 0;
for (j = 0; j < num_values; j++) {
max_value_count = TSK_MAX(max_value_count, value_count[j]);
}
Au = A + ((size_t) u * num_words);
for (word = 0; word < num_words; word++) {
Au[word] = 0;
}
for (j = 0; j < num_values; j++) {
if (value_count[j] == max_value_count) {
word = j / 64;
bit = j % 64;
Au[word] = set_bit(Au[word], (uint8_t) bit);
}
}
}
static void
compute_optimal_value(uint64_t *restrict A, const tsk_id_t *restrict left_child,
const tsk_id_t *restrict right_sib, const tsk_id_t u, const tsk_id_t parent_state,
const tsk_size_t num_values, const tsk_size_t num_words)
{
if (num_words == 1) {
compute_optimal_value_1(A, left_child, right_sib, u, parent_state, num_values);
} else {
compute_optimal_value_general(
A, left_child, right_sib, u, parent_state, num_values, num_words);
}
}
static int
tsk_ls_hmm_setup_optimal_value_sets(tsk_ls_hmm_t *self)
{
int ret = 0;
/* We expect that most of the time there will be one word per optimal_value set,
* but there will be times when we need more than one word. This approach
* lets us expand the memory if we need to, but when the number of
* values goes back below 64 we revert to using one word per set. We
* could in principle release back the memory as well, but it doesn't seem
* worth the bother. */
self->num_optimal_value_set_words = (self->num_values / 64) + 1;
if (self->num_optimal_value_set_words > self->max_parsimony_words) {
ret = tsk_trace_error(TSK_ERR_TOO_MANY_VALUES);
goto out;
}
if (self->num_values >= self->max_values) {
self->max_values = self->num_optimal_value_set_words * 64;
tsk_safe_free(self->optimal_value_sets);
self->optimal_value_sets
= tsk_calloc(self->num_nodes * self->num_optimal_value_set_words,
sizeof(*self->optimal_value_sets));
if (self->optimal_value_sets == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
out:
return ret;
}
static int
tsk_ls_hmm_build_optimal_value_sets(tsk_ls_hmm_t *self)
{
int ret = 0;
const double *restrict node_time = self->tree_sequence->tables->nodes.time;
const tsk_id_t *restrict left_child = self->tree.left_child;
const tsk_id_t *restrict right_sib = self->tree.right_sib;
const tsk_id_t *restrict parent = self->parent;
const tsk_value_transition_t *restrict T = self->transitions;
const tsk_id_t *restrict T_index = self->transition_index;
tsk_argsort_t *restrict order = self->transition_time_order;
const tsk_size_t num_optimal_value_set_words = self->num_optimal_value_set_words;
uint64_t *restrict A = self->optimal_value_sets;
tsk_size_t j;
tsk_id_t u, v, state, parent_state;
/* argsort the transitions by node time so we can visit them in the
* correct order */
for (j = 0; j < self->num_transitions; j++) {
order[j].index = j;
order[j].value = DBL_MAX;
if (T[j].tree_node != TSK_NULL) {
order[j].value = node_time[T[j].tree_node];
}
}
qsort(order, (size_t) self->num_transitions, sizeof(*order), cmp_argsort);
for (j = 0; j < self->num_transitions; j++) {
u = T[order[j].index].tree_node;
if (u != TSK_NULL) {
state = T[order[j].index].value_index;
if (left_child[u] == TSK_NULL) {
/* leaf node */
set_optimal_value(A, u, num_optimal_value_set_words, state);
} else {
compute_optimal_value(A, left_child, right_sib, u, state,
self->num_values, num_optimal_value_set_words);
}
v = parent[u];
if (v != TSK_NULL) {
while (T_index[v] == TSK_NULL) {
v = parent[v];
tsk_bug_assert(v != TSK_NULL);
}
parent_state = T[T_index[v]].value_index;
v = parent[u];
while (T_index[v] == TSK_NULL) {
compute_optimal_value(A, left_child, right_sib, v, parent_state,
self->num_values, num_optimal_value_set_words);
v = parent[v];
tsk_bug_assert(v != TSK_NULL);
}
}
}
}
return ret;
}
static int
tsk_ls_hmm_redistribute_transitions(tsk_ls_hmm_t *self)
{
int ret = 0;
const tsk_id_t *restrict left_child = self->tree.left_child;
const tsk_id_t *restrict right_sib = self->tree.right_sib;
const tsk_id_t *restrict parent = self->parent;
tsk_id_t *restrict T_index = self->transition_index;
tsk_id_t *restrict T_parent = self->transition_parent;
tsk_value_transition_t *restrict T = self->transitions;
tsk_value_transition_t *restrict T_old = self->transitions_copy;
tsk_transition_stack_t *stack = self->transition_stack;
uint64_t *restrict A = self->optimal_value_sets;
const tsk_size_t num_optimal_value_set_words = self->num_optimal_value_set_words;
tsk_transition_stack_t s, child_s;
tsk_id_t root, u, v;
int stack_top = 0;
tsk_size_t j, old_num_transitions;
tsk_memcpy(T_old, T, self->num_transitions * sizeof(*T));
old_num_transitions = self->num_transitions;
self->num_transitions = 0;
/* TODO refactor this to push the virtual root onto the stack rather then
* iterating over the roots. See the existing parsimony implementations
* for an example. */
for (root = tsk_tree_get_left_root(&self->tree); root != TSK_NULL;
root = right_sib[root]) {
stack[0].tree_node = root;
stack[0].old_state = T_old[T_index[root]].value_index;
stack[0].new_state
= get_smallest_element(A, (tsk_size_t) root, num_optimal_value_set_words);
stack[0].transition_parent = 0;
stack_top = 0;
tsk_bug_assert(self->num_transitions < self->max_transitions);
T_parent[self->num_transitions] = TSK_NULL;
T[self->num_transitions].tree_node = stack[0].tree_node;
T[self->num_transitions].value_index = stack[0].new_state;
self->num_transitions++;
while (stack_top >= 0) {
s = stack[stack_top];
stack_top--;
for (v = left_child[s.tree_node]; v != TSK_NULL; v = right_sib[v]) {
child_s = s;
child_s.tree_node = v;
if (T_index[v] != TSK_NULL) {
child_s.old_state = T_old[T_index[v]].value_index;
}
if (!all_zero(A, v, num_optimal_value_set_words)) {
if (!element_in(A, v, s.new_state, num_optimal_value_set_words)) {
child_s.new_state = get_smallest_element(
A, (tsk_size_t) v, num_optimal_value_set_words);
child_s.transition_parent = (tsk_id_t) self->num_transitions;
/* Add a new transition */
tsk_bug_assert(self->num_transitions < self->max_transitions);
T_parent[self->num_transitions] = s.transition_parent;
T[self->num_transitions].tree_node = v;
T[self->num_transitions].value_index = child_s.new_state;
self->num_transitions++;
}
stack_top++;
stack[stack_top] = child_s;
} else {
/* Node that we didn't visit when moving up the tree */
if (s.old_state != s.new_state) {
tsk_bug_assert(self->num_transitions < self->max_transitions);
T_parent[self->num_transitions] = s.transition_parent;
T[self->num_transitions].tree_node = v;
T[self->num_transitions].value_index = s.old_state;
self->num_transitions++;
}
}
}
}
}
/* Unset the old T_index pointers and optimal_value sets. */
for (j = 0; j < old_num_transitions; j++) {
u = T_old[j].tree_node;
if (u != TSK_NULL) {
T_index[u] = TSK_NULL;
while (u != TSK_NULL && !all_zero(A, u, num_optimal_value_set_words)) {
tsk_memset(A + ((tsk_size_t) u) * num_optimal_value_set_words, 0,
num_optimal_value_set_words * sizeof(uint64_t));
u = parent[u];
}
}
}
/* Set the new pointers for transition nodes and the values.*/
for (j = 0; j < self->num_transitions; j++) {
T_index[T[j].tree_node] = (tsk_id_t) j;
T[j].value = self->values[T[j].value_index];
}
return ret;
}
static int
tsk_ls_hmm_compress(tsk_ls_hmm_t *self)
{
int ret = 0;
ret = tsk_ls_hmm_discretise_values(self);
if (ret != 0) {
goto out;
}
ret = tsk_ls_hmm_setup_optimal_value_sets(self);
if (ret != 0) {
goto out;
}
ret = tsk_ls_hmm_build_optimal_value_sets(self);
if (ret != 0) {
goto out;
}
ret = tsk_ls_hmm_redistribute_transitions(self);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
tsk_ls_hmm_process_site_forward(
tsk_ls_hmm_t *self, const tsk_site_t *site, int32_t haplotype_state)
{
int ret = 0;
double x, normalisation_factor;
tsk_compressed_matrix_t *output = (tsk_compressed_matrix_t *) self->output;
tsk_value_transition_t *restrict T = self->transitions;
const unsigned int precision = (unsigned int) self->precision;
tsk_size_t j;
ret = tsk_ls_hmm_update_probabilities(self, site, haplotype_state);
if (ret != 0) {
goto out;
}
/* See notes in the Python implementation on why we don't want to compress
* here, but rather should be doing it after rounding. */
ret = tsk_ls_hmm_compress(self);
if (ret != 0) {
goto out;
}
tsk_bug_assert(self->num_transitions <= self->num_samples);
normalisation_factor = self->compute_normalisation_factor(self);
if (normalisation_factor == 0) {
ret = tsk_trace_error(TSK_ERR_MATCH_IMPOSSIBLE);
goto out;
}
for (j = 0; j < self->num_transitions; j++) {
tsk_bug_assert(T[j].tree_node != TSK_NULL);
x = T[j].value / normalisation_factor;
T[j].value = tsk_round(x, precision);
}
ret = tsk_compressed_matrix_store_site(
output, site->id, normalisation_factor, (tsk_size_t) self->num_transitions, T);
out:
return ret;
}
static int
tsk_ls_hmm_run_forward(tsk_ls_hmm_t *self, int32_t *haplotype)
{
int ret = 0;
int t_ret;
const tsk_site_t *sites;
tsk_size_t j, num_sites;
const double n = (double) self->num_samples;
ret = tsk_ls_hmm_reset(self, 1 / n);
if (ret != 0) {
goto out;
}
for (t_ret = tsk_tree_first(&self->tree); t_ret == TSK_TREE_OK;
t_ret = tsk_tree_next(&self->tree)) {
ret = tsk_ls_hmm_update_tree(self, TSK_DIR_FORWARD);
if (ret != 0) {
goto out;
}
/* tsk_ls_hmm_check_state(self); */
ret = tsk_tree_get_sites(&self->tree, &sites, &num_sites);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_sites; j++) {
ret = tsk_ls_hmm_process_site_forward(
self, &sites[j], haplotype[sites[j].id]);
if (ret != 0) {
goto out;
}
}
}
/* Set to zero so we can print and check the state OK. */
self->num_transitions = 0;
if (t_ret != 0) {
ret = t_ret;
goto out;
}
out:
return ret;
}
/****************************************************************
* Forward Algorithm
****************************************************************/
static double
tsk_ls_hmm_compute_normalisation_factor_forward(tsk_ls_hmm_t *self)
{
tsk_size_t *restrict N = self->num_transition_samples;
tsk_value_transition_t *restrict T = self->transitions;
const tsk_id_t *restrict T_parent = self->transition_parent;
const tsk_size_t *restrict num_samples = self->tree.num_samples;
const tsk_id_t num_transitions = (tsk_id_t) self->num_transitions;
double normalisation_factor;
tsk_id_t j;
/* Compute the number of samples directly inheriting from each transition */
for (j = 0; j < num_transitions; j++) {
tsk_bug_assert(T[j].tree_node != TSK_NULL);
N[j] = num_samples[T[j].tree_node];
}
for (j = 0; j < num_transitions; j++) {
if (T_parent[j] != TSK_NULL) {
N[T_parent[j]] -= N[j];
}
}
/* Compute the normalising constant used to avoid underflow */
normalisation_factor = 0;
for (j = 0; j < num_transitions; j++) {
normalisation_factor += (double) N[j] * T[j].value;
}
return normalisation_factor;
}
static int
tsk_ls_hmm_next_probability_forward(tsk_ls_hmm_t *self, tsk_id_t site_id, double p_last,
bool is_match, tsk_id_t TSK_UNUSED(node), double *result)
{
const double rho = self->recombination_rate[site_id];
const double mu = self->mutation_rate[site_id];
const double n = (double) self->num_samples;
const double num_alleles = self->num_alleles[site_id];
double p_t, p_e;
p_t = p_last * (1 - rho) + rho / n;
p_e = mu;
if (is_match) {
p_e = 1 - (num_alleles - 1) * mu;
}
*result = p_t * p_e;
return 0;
}
int
tsk_ls_hmm_forward(tsk_ls_hmm_t *self, int32_t *haplotype,
tsk_compressed_matrix_t *output, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_compressed_matrix_init(output, self->tree_sequence, 0, 0);
if (ret != 0) {
goto out;
}
} else {
if (output->tree_sequence != self->tree_sequence) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_compressed_matrix_clear(output);
if (ret != 0) {
goto out;
}
}
self->next_probability = tsk_ls_hmm_next_probability_forward;
self->compute_normalisation_factor = tsk_ls_hmm_compute_normalisation_factor_forward;
self->output = output;
ret = tsk_ls_hmm_run_forward(self, haplotype);
out:
return ret;
}
/****************************************************************
* Backward Algorithm
****************************************************************/
static int
tsk_ls_hmm_next_probability_backward(tsk_ls_hmm_t *self, tsk_id_t site_id, double p_last,
bool is_match, tsk_id_t TSK_UNUSED(node), double *result)
{
const double mu = self->mutation_rate[site_id];
const double num_alleles = self->num_alleles[site_id];
double p_e;
p_e = mu;
if (is_match) {
p_e = 1 - (num_alleles - 1) * mu;
}
*result = p_last * p_e;
return 0;
}
static int
tsk_ls_hmm_process_site_backward(tsk_ls_hmm_t *self, const tsk_site_t *site,
const int32_t haplotype_state, const double normalisation_factor)
{
int ret = 0;
double x, b_last_sum;
tsk_compressed_matrix_t *output = (tsk_compressed_matrix_t *) self->output;
tsk_value_transition_t *restrict T = self->transitions;
const unsigned int precision = (unsigned int) self->precision;
const double rho = self->recombination_rate[site->id];
const double n = (double) self->num_samples;
tsk_size_t j;
/* FIXME!!! We are calling compress twice here because we need to compress
* immediately before calling store_site in order to filter out -1 nodes,
* and also (crucially) to ensure that the value transitions are listed
* in preorder, which we rely on later for decoding.
*
* https://github.com/tskit-dev/tskit/issues/2803
*/
ret = tsk_ls_hmm_compress(self);
if (ret != 0) {
goto out;
}
ret = tsk_compressed_matrix_store_site(
output, site->id, normalisation_factor, (tsk_size_t) self->num_transitions, T);
if (ret != 0) {
goto out;
}
ret = tsk_ls_hmm_update_probabilities(self, site, haplotype_state);
if (ret != 0) {
goto out;
}
/* DO WE NEED THIS compress?? See above */
ret = tsk_ls_hmm_compress(self);
if (ret != 0) {
goto out;
}
tsk_bug_assert(self->num_transitions <= self->num_samples);
b_last_sum = self->compute_normalisation_factor(self);
for (j = 0; j < self->num_transitions; j++) {
tsk_bug_assert(T[j].tree_node != TSK_NULL);
x = rho * b_last_sum / n + (1 - rho) * T[j].value;
x /= normalisation_factor;
T[j].value = tsk_round(x, precision);
}
out:
return ret;
}
static int
tsk_ls_hmm_run_backward(
tsk_ls_hmm_t *self, int32_t *haplotype, const double *forward_norm)
{
int ret = 0;
int t_ret;
const tsk_site_t *sites;
double s;
tsk_size_t num_sites;
tsk_id_t j;
ret = tsk_ls_hmm_reset(self, 1);
if (ret != 0) {
goto out;
}
for (t_ret = tsk_tree_last(&self->tree); t_ret == TSK_TREE_OK;
t_ret = tsk_tree_prev(&self->tree)) {
ret = tsk_ls_hmm_update_tree(self, TSK_DIR_REVERSE);
if (ret != 0) {
goto out;
}
/* tsk_ls_hmm_check_state(self); */
ret = tsk_tree_get_sites(&self->tree, &sites, &num_sites);
if (ret != 0) {
goto out;
}
for (j = (tsk_id_t) num_sites - 1; j >= 0; j--) {
s = forward_norm[sites[j].id];
if (s <= 0) {
/* NOTE: I'm not sure if this is the correct interpretation,
* but norm values of 0 do lead to problems, and this seems
* like a simple way of guarding against it. We do seem to
* get norm values of 0 with impossible matches from the fwd
* matrix.
*/
ret = tsk_trace_error(TSK_ERR_MATCH_IMPOSSIBLE);
goto out;
}
ret = tsk_ls_hmm_process_site_backward(
self, &sites[j], haplotype[sites[j].id], s);
if (ret != 0) {
goto out;
}
}
}
/* Set to zero so we can print and check the state OK. */
self->num_transitions = 0;
if (t_ret != 0) {
ret = t_ret;
goto out;
}
out:
return ret;
}
int
tsk_ls_hmm_backward(tsk_ls_hmm_t *self, int32_t *haplotype, const double *forward_norm,
tsk_compressed_matrix_t *output, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_compressed_matrix_init(output, self->tree_sequence, 0, 0);
if (ret != 0) {
goto out;
}
} else {
if (output->tree_sequence != self->tree_sequence) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_compressed_matrix_clear(output);
if (ret != 0) {
goto out;
}
}
self->next_probability = tsk_ls_hmm_next_probability_backward;
self->compute_normalisation_factor = tsk_ls_hmm_compute_normalisation_factor_forward;
self->output = output;
ret = tsk_ls_hmm_run_backward(self, haplotype, forward_norm);
out:
return ret;
}
/****************************************************************
* Viterbi Algorithm
****************************************************************/
static double
tsk_ls_hmm_compute_normalisation_factor_viterbi(tsk_ls_hmm_t *self)
{
tsk_value_transition_t *restrict T = self->transitions;
const tsk_id_t num_transitions = (tsk_id_t) self->num_transitions;
tsk_value_transition_t max_vt;
tsk_id_t j;
max_vt.value = -1;
max_vt.tree_node = 0; /* keep compiler happy */
tsk_bug_assert(num_transitions > 0);
for (j = 0; j < num_transitions; j++) {
tsk_bug_assert(T[j].tree_node != TSK_NULL);
if (T[j].value > max_vt.value) {
max_vt = T[j];
}
}
return max_vt.value;
}
static int
tsk_ls_hmm_next_probability_viterbi(tsk_ls_hmm_t *self, tsk_id_t site, double p_last,
bool is_match, tsk_id_t node, double *result)
{
const double rho = self->recombination_rate[site];
const double mu = self->mutation_rate[site];
const double num_alleles = self->num_alleles[site];
const double n = (double) self->num_samples;
double p_recomb, p_no_recomb, p_t, p_e;
bool recombination_required = false;
p_no_recomb = p_last * (1 - rho + rho / n);
p_recomb = rho / n;
if (p_no_recomb > p_recomb) {
p_t = p_no_recomb;
} else {
p_t = p_recomb;
recombination_required = true;
}
p_e = mu;
if (is_match) {
p_e = 1 - (num_alleles - 1) * mu;
}
*result = p_t * p_e;
return tsk_viterbi_matrix_add_recombination_required(
self->output, site, node, recombination_required);
}
int
tsk_ls_hmm_viterbi(tsk_ls_hmm_t *self, int32_t *haplotype, tsk_viterbi_matrix_t *output,
tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_viterbi_matrix_init(output, self->tree_sequence, 0, 0);
if (ret != 0) {
goto out;
}
} else {
if (output->matrix.tree_sequence != self->tree_sequence) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_viterbi_matrix_clear(output);
if (ret != 0) {
goto out;
}
}
self->next_probability = tsk_ls_hmm_next_probability_viterbi;
self->compute_normalisation_factor = tsk_ls_hmm_compute_normalisation_factor_viterbi;
self->output = output;
ret = tsk_ls_hmm_run_forward(self, haplotype);
out:
return ret;
}
/****************************************************************
* Compressed matrix
****************************************************************/
int
tsk_compressed_matrix_init(tsk_compressed_matrix_t *self, tsk_treeseq_t *tree_sequence,
tsk_size_t block_size, tsk_flags_t options)
{
int ret = 0;
tsk_memset(self, 0, sizeof(*self));
self->tree_sequence = tree_sequence;
self->options = options;
self->num_sites = tsk_treeseq_get_num_sites(tree_sequence);
self->num_samples = tsk_treeseq_get_num_samples(tree_sequence);
self->num_transitions = tsk_malloc(self->num_sites * sizeof(*self->num_transitions));
self->normalisation_factor
= tsk_malloc(self->num_sites * sizeof(*self->normalisation_factor));
self->values = tsk_malloc(self->num_sites * sizeof(*self->values));
self->nodes = tsk_malloc(self->num_sites * sizeof(*self->nodes));
if (self->num_transitions == NULL || self->values == NULL || self->nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (block_size == 0) {
block_size = 1 << 20;
}
ret = tsk_blkalloc_init(&self->memory, (size_t) block_size);
if (ret != 0) {
goto out;
}
ret = tsk_compressed_matrix_clear(self);
out:
return ret;
}
int
tsk_compressed_matrix_free(tsk_compressed_matrix_t *self)
{
tsk_blkalloc_free(&self->memory);
tsk_safe_free(self->num_transitions);
tsk_safe_free(self->normalisation_factor);
tsk_safe_free(self->values);
tsk_safe_free(self->nodes);
return 0;
}
int
tsk_compressed_matrix_clear(tsk_compressed_matrix_t *self)
{
tsk_blkalloc_reset(&self->memory);
tsk_memset(
self->num_transitions, 0, self->num_sites * sizeof(*self->num_transitions));
tsk_memset(self->normalisation_factor, 0,
self->num_sites * sizeof(*self->normalisation_factor));
return 0;
}
void
tsk_compressed_matrix_print_state(tsk_compressed_matrix_t *self, FILE *out)
{
tsk_size_t l, j;
fprintf(out, "Compressed matrix for %p\n", (void *) self->tree_sequence);
fprintf(out, "num_sites = %lld\n", (long long) self->num_sites);
fprintf(out, "num_samples = %lld\n", (long long) self->num_samples);
for (l = 0; l < self->num_sites; l++) {
fprintf(out, "%lld\ts=%f\tv=%lld [", (long long) l,
self->normalisation_factor[l], (long long) self->num_transitions[l]);
for (j = 0; j < self->num_transitions[l]; j++) {
fprintf(
out, "(%lld, %f)", (long long) self->nodes[l][j], self->values[l][j]);
if (j < self->num_transitions[l] - 1) {
fprintf(out, ",");
} else {
fprintf(out, "]\n");
}
}
}
fprintf(out, "Memory:\n");
tsk_blkalloc_print_state(&self->memory, out);
}
int
tsk_compressed_matrix_store_site(tsk_compressed_matrix_t *self, tsk_id_t site,
double normalisation_factor, tsk_size_t num_transitions,
const tsk_value_transition_t *transitions)
{
int ret = 0;
tsk_size_t j;
if (site < 0 || site >= (tsk_id_t) self->num_sites) {
ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);
goto out;
}
self->num_transitions[site] = num_transitions;
self->normalisation_factor[site] = normalisation_factor;
self->nodes[site]
= tsk_blkalloc_get(&self->memory, (size_t) num_transitions * sizeof(tsk_id_t));
self->values[site]
= tsk_blkalloc_get(&self->memory, (size_t) num_transitions * sizeof(double));
if (self->nodes[site] == NULL || self->values[site] == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_transitions; j++) {
tsk_bug_assert(transitions[j].tree_node >= 0);
self->values[site][j] = transitions[j].value;
self->nodes[site][j] = transitions[j].tree_node;
}
out:
return ret;
}
static int
tsk_compressed_matrix_decode_site(tsk_compressed_matrix_t *self, const tsk_tree_t *tree,
const tsk_id_t site, double *values)
{
int ret = 0;
const tsk_id_t *restrict list_left = tree->left_sample;
const tsk_id_t *restrict list_right = tree->right_sample;
const tsk_id_t *restrict list_next = tree->next_sample;
const tsk_id_t num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(self->tree_sequence);
tsk_size_t j;
tsk_id_t node, index, stop;
double value;
for (j = 0; j < self->num_transitions[site]; j++) {
node = self->nodes[site][j];
if (node < 0 || node >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
value = self->values[site][j];
index = list_left[node];
if (index == TSK_NULL) {
/* It's an error if there are nodes that don't subtend any samples */
ret = tsk_trace_error(TSK_ERR_BAD_COMPRESSED_MATRIX_NODE);
goto out;
}
stop = list_right[node];
while (true) {
values[index] = value;
if (index == stop) {
break;
}
index = list_next[index];
}
}
out:
return ret;
}
int
tsk_compressed_matrix_decode(tsk_compressed_matrix_t *self, double *values)
{
int ret = 0;
int t_ret;
tsk_tree_t tree;
tsk_size_t j, num_tree_sites;
const tsk_site_t *sites = NULL;
tsk_id_t site_id;
double *site_array;
ret = tsk_tree_init(&tree, self->tree_sequence, TSK_SAMPLE_LISTS);
if (ret != 0) {
goto out;
}
for (t_ret = tsk_tree_first(&tree); t_ret == TSK_TREE_OK;
t_ret = tsk_tree_next(&tree)) {
ret = tsk_tree_get_sites(&tree, &sites, &num_tree_sites);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_tree_sites; j++) {
site_id = sites[j].id;
site_array = values + ((tsk_size_t) site_id) * self->num_samples;
if (self->num_transitions[site_id] == 0) {
tsk_memset(site_array, 0, self->num_samples * sizeof(*site_array));
} else {
ret = tsk_compressed_matrix_decode_site(
self, &tree, site_id, site_array);
if (ret != 0) {
goto out;
}
}
}
}
if (t_ret < 0) {
ret = t_ret;
goto out;
}
out:
tsk_tree_free(&tree);
return ret;
}
/****************************************************************
* Viterbi matrix
****************************************************************/
static int
tsk_viterbi_matrix_expand_recomb_records(tsk_viterbi_matrix_t *self)
{
int ret = 0;
tsk_recomb_required_record *tmp = tsk_realloc(
self->recombination_required, self->max_recomb_records * sizeof(*tmp));
if (tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->recombination_required = tmp;
out:
return ret;
}
int
tsk_viterbi_matrix_init(tsk_viterbi_matrix_t *self, tsk_treeseq_t *tree_sequence,
tsk_size_t block_size, tsk_flags_t options)
{
int ret = 0;
tsk_memset(self, 0, sizeof(*self));
if (block_size == 0) {
block_size = 1 << 20; /* 1MiB */
}
ret = tsk_compressed_matrix_init(&self->matrix, tree_sequence, block_size, options);
if (ret != 0) {
goto out;
}
self->max_recomb_records
= TSK_MAX(1, block_size / sizeof(tsk_recomb_required_record));
ret = tsk_viterbi_matrix_expand_recomb_records(self);
if (ret != 0) {
goto out;
}
/* Add the sentinel at the start to simplify traceback */
self->recombination_required[0].site = -1;
ret = tsk_viterbi_matrix_clear(self);
out:
return ret;
}
int
tsk_viterbi_matrix_free(tsk_viterbi_matrix_t *self)
{
tsk_compressed_matrix_free(&self->matrix);
tsk_safe_free(self->recombination_required);
return 0;
}
int
tsk_viterbi_matrix_clear(tsk_viterbi_matrix_t *self)
{
self->num_recomb_records = 1;
tsk_compressed_matrix_clear(&self->matrix);
return 0;
}
void
tsk_viterbi_matrix_print_state(tsk_viterbi_matrix_t *self, FILE *out)
{
tsk_id_t l, j;
fprintf(out, "viterbi_matrix\n");
fprintf(out, "num_recomb_records = %lld\n", (long long) self->num_recomb_records);
fprintf(out, "max_recomb_records = %lld\n", (long long) self->max_recomb_records);
j = 1;
for (l = 0; l < (tsk_id_t) self->matrix.num_sites; l++) {
fprintf(out, "%lld\t[", (long long) l);
while (j < (tsk_id_t) self->num_recomb_records
&& self->recombination_required[j].site == l) {
fprintf(out, "(%lld, %d) ", (long long) self->recombination_required[j].node,
self->recombination_required[j].required);
j++;
}
fprintf(out, "]\n");
}
tsk_compressed_matrix_print_state(&self->matrix, out);
}
TSK_WARN_UNUSED int
tsk_viterbi_matrix_add_recombination_required(
tsk_viterbi_matrix_t *self, tsk_id_t site, tsk_id_t node, bool required)
{
int ret = 0;
tsk_recomb_required_record *record;
if (self->num_recomb_records == self->max_recomb_records) {
self->max_recomb_records *= 2;
ret = tsk_viterbi_matrix_expand_recomb_records(self);
if (ret != 0) {
goto out;
}
}
record = self->recombination_required + self->num_recomb_records;
record->site = site;
record->node = node;
record->required = required;
self->num_recomb_records++;
out:
return ret;
}
static tsk_id_t
tsk_viterbi_matrix_choose_sample(
tsk_viterbi_matrix_t *self, tsk_id_t site, tsk_tree_t *tree)
{
tsk_id_t ret;
tsk_id_t u = TSK_NULL;
const tsk_flags_t *node_flags = self->matrix.tree_sequence->tables->nodes.flags;
const tsk_size_t num_transitions = self->matrix.num_transitions[site];
const tsk_id_t *transition_nodes = self->matrix.nodes[site];
const double *transition_values = self->matrix.values[site];
double max_value = -1;
tsk_size_t j;
tsk_id_t v;
bool found;
if (num_transitions == 0) {
ret = tsk_trace_error(TSK_ERR_NULL_VITERBI_MATRIX);
goto out;
}
for (j = 0; j < num_transitions; j++) {
if (max_value < transition_values[j]) {
u = transition_nodes[j];
max_value = transition_values[j];
}
}
tsk_bug_assert(u != TSK_NULL);
while (!(node_flags[u] & TSK_NODE_IS_SAMPLE)) {
found = false;
for (v = tree->left_child[u]; v != TSK_NULL; v = tree->right_sib[v]) {
/* Choose the first child that is not in the list of transition nodes */
for (j = 0; j < num_transitions; j++) {
if (transition_nodes[j] == v) {
break;
}
}
if (j == num_transitions) {
u = v;
found = true;
break;
}
}
/* TODO: should remove this once we're sure this is robust */
tsk_bug_assert(found);
}
ret = u;
out:
return ret;
}
int
tsk_viterbi_matrix_traceback(
tsk_viterbi_matrix_t *self, tsk_id_t *path, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_site_t site;
tsk_id_t u, site_id, current_node;
tsk_recomb_required_record *rr_record, *rr_record_tmp;
const tsk_id_t num_sites = (tsk_id_t) self->matrix.num_sites;
const tsk_id_t num_nodes
= (tsk_id_t) tsk_treeseq_get_num_nodes(self->matrix.tree_sequence);
tsk_tree_t tree;
tsk_id_t *recombination_tree
= tsk_malloc((size_t) num_nodes * sizeof(*recombination_tree));
ret = tsk_tree_init(&tree, self->matrix.tree_sequence, 0);
if (ret != 0) {
goto out;
}
if (recombination_tree == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* Initialise the path an recombination_tree to contain TSK_NULL */
tsk_memset(path, 0xff, ((size_t) num_sites) * sizeof(*path));
tsk_memset(recombination_tree, 0xff, ((size_t) num_nodes) * sizeof(*path));
current_node = TSK_NULL;
rr_record = &self->recombination_required[self->num_recomb_records - 1];
ret = tsk_tree_last(&tree);
if (ret < 0) {
goto out;
}
for (site_id = num_sites - 1; site_id >= 0; site_id--) {
ret = tsk_treeseq_get_site(self->matrix.tree_sequence, site_id, &site);
if (ret != 0) {
goto out;
}
while (tree.interval.left > site.position) {
ret = tsk_tree_prev(&tree);
if (ret < 0) {
goto out;
}
}
tsk_bug_assert(tree.interval.left <= site.position);
tsk_bug_assert(site.position < tree.interval.right);
/* Fill in the recombination tree */
rr_record_tmp = rr_record;
while (rr_record->site == site.id) {
recombination_tree[rr_record->node] = rr_record->required;
rr_record--;
}
if (current_node == TSK_NULL) {
current_node = tsk_viterbi_matrix_choose_sample(self, site.id, &tree);
if (current_node < 0) {
ret = (int) current_node;
goto out;
}
}
path[site.id] = current_node;
/* Now traverse up the tree from the current node. The
* first marked node tells us whether we need to recombine */
u = current_node;
while (u != TSK_NULL && recombination_tree[u] == TSK_NULL) {
u = tree.parent[u];
}
tsk_bug_assert(u != TSK_NULL);
if (recombination_tree[u] == 1) {
/* Switch at the next site */
current_node = TSK_NULL;
}
/* Reset in the recombination tree */
rr_record = rr_record_tmp;
while (rr_record->site == site.id) {
recombination_tree[rr_record->node] = TSK_NULL;
rr_record--;
}
}
ret = 0;
out:
tsk_tree_free(&tree);
tsk_safe_free(recombination_tree);
return ret;
}
================================================
FILE: c/tskit/haplotype_matching.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSK_HAPLOTYPE_MATCHING_H
#define TSK_HAPLOTYPE_MATCHING_H
#ifdef __cplusplus
extern "C" {
#endif
#include
/* Seems like we might use this somewhere else as well, so putting it into the middle
* of the flags space */
#define TSK_ALLELES_ACGT (1 << 16)
typedef struct {
tsk_id_t tree_node;
tsk_id_t value_index;
double value;
} tsk_value_transition_t;
typedef struct {
tsk_size_t index;
double value;
} tsk_argsort_t;
typedef struct {
tsk_id_t tree_node;
tsk_id_t old_state;
tsk_id_t new_state;
tsk_id_t transition_parent;
} tsk_transition_stack_t;
typedef struct {
double normalisation_factor;
double *value;
tsk_id_t *node;
tsk_size_t num_values;
} tsk_site_probability_t;
typedef struct {
tsk_treeseq_t *tree_sequence;
tsk_flags_t options;
tsk_size_t num_sites;
tsk_size_t num_samples;
double *normalisation_factor;
tsk_size_t *num_transitions;
double **values;
tsk_id_t **nodes;
tsk_blkalloc_t memory;
} tsk_compressed_matrix_t;
typedef struct {
tsk_id_t site;
tsk_id_t node;
bool required;
} tsk_recomb_required_record;
typedef struct {
tsk_compressed_matrix_t matrix;
tsk_recomb_required_record *recombination_required;
tsk_size_t num_recomb_records;
tsk_size_t max_recomb_records;
} tsk_viterbi_matrix_t;
typedef struct _tsk_ls_hmm_t {
/* input */
tsk_treeseq_t *tree_sequence;
double *recombination_rate;
double *mutation_rate;
const char ***alleles;
unsigned int precision;
uint32_t *num_alleles;
tsk_size_t num_samples;
tsk_size_t num_sites;
tsk_size_t num_nodes;
/* state */
tsk_tree_t tree;
tsk_id_t *parent;
/* The probability value transitions on the tree */
tsk_value_transition_t *transitions;
tsk_value_transition_t *transitions_copy;
/* Stack used when distributing transitions on the tree */
tsk_transition_stack_t *transition_stack;
/* Map of node_id to index in the transitions list */
tsk_id_t *transition_index;
/* Buffer used to argsort the transitions by node time */
tsk_argsort_t *transition_time_order;
tsk_size_t num_transitions;
tsk_size_t max_transitions;
/* The distinct values in the transitions */
double *values;
tsk_size_t num_values;
tsk_size_t max_values;
tsk_size_t max_parsimony_words;
/* Number of machine words per node optimal value set. */
tsk_size_t num_optimal_value_set_words;
uint64_t *optimal_value_sets;
/* The parent transition; used during compression */
tsk_id_t *transition_parent;
/* The number of samples directly subtended by a transition */
tsk_size_t *num_transition_samples;
int32_t *allelic_state;
/* Algorithms set these values before they are run */
int (*next_probability)(
struct _tsk_ls_hmm_t *, tsk_id_t, double, bool, tsk_id_t, double *);
double (*compute_normalisation_factor)(struct _tsk_ls_hmm_t *);
void *output;
} tsk_ls_hmm_t;
/* TODO constify these APIs */
int tsk_ls_hmm_init(tsk_ls_hmm_t *self, tsk_treeseq_t *tree_sequence,
double *recombination_rate, double *mutation_rate, tsk_flags_t options);
int tsk_ls_hmm_set_precision(tsk_ls_hmm_t *self, unsigned int precision);
int tsk_ls_hmm_free(tsk_ls_hmm_t *self);
void tsk_ls_hmm_print_state(tsk_ls_hmm_t *self, FILE *out);
int tsk_ls_hmm_forward(tsk_ls_hmm_t *self, int32_t *haplotype,
tsk_compressed_matrix_t *output, tsk_flags_t options);
int tsk_ls_hmm_backward(tsk_ls_hmm_t *self, int32_t *haplotype,
const double *forward_norm, tsk_compressed_matrix_t *output, tsk_flags_t options);
int tsk_ls_hmm_viterbi(tsk_ls_hmm_t *self, int32_t *haplotype,
tsk_viterbi_matrix_t *output, tsk_flags_t options);
int tsk_compressed_matrix_init(tsk_compressed_matrix_t *self,
tsk_treeseq_t *tree_sequence, tsk_size_t block_size, tsk_flags_t options);
int tsk_compressed_matrix_free(tsk_compressed_matrix_t *self);
int tsk_compressed_matrix_clear(tsk_compressed_matrix_t *self);
void tsk_compressed_matrix_print_state(tsk_compressed_matrix_t *self, FILE *out);
int tsk_compressed_matrix_store_site(tsk_compressed_matrix_t *self, tsk_id_t site,
double normalisation_factor, tsk_size_t num_transitions,
const tsk_value_transition_t *transitions);
int tsk_compressed_matrix_decode(tsk_compressed_matrix_t *self, double *values);
int tsk_viterbi_matrix_init(tsk_viterbi_matrix_t *self, tsk_treeseq_t *tree_sequence,
tsk_size_t block_size, tsk_flags_t options);
int tsk_viterbi_matrix_free(tsk_viterbi_matrix_t *self);
int tsk_viterbi_matrix_clear(tsk_viterbi_matrix_t *self);
void tsk_viterbi_matrix_print_state(tsk_viterbi_matrix_t *self, FILE *out);
int tsk_viterbi_matrix_add_recombination_required(
tsk_viterbi_matrix_t *self, tsk_id_t site, tsk_id_t node, bool required);
int tsk_viterbi_matrix_traceback(
tsk_viterbi_matrix_t *self, tsk_id_t *path, tsk_flags_t options);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit/stats.c
================================================
/*
* MIT License
*
* Copyright (c) 2018-2025 Tskit Developers
* Copyright (c) 2016-2017 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
void
tsk_ld_calc_print_state(const tsk_ld_calc_t *self, FILE *out)
{
fprintf(out, "tree = %p\n", (const void *) &self->tree);
fprintf(out, "max_sites = %d\n", (int) self->max_sites);
fprintf(out, "max_distance = %f\n", self->max_distance);
}
int TSK_WARN_UNUSED
tsk_ld_calc_init(tsk_ld_calc_t *self, const tsk_treeseq_t *tree_sequence)
{
int ret = 0;
tsk_memset(self, 0, sizeof(*self));
ret = tsk_tree_init(&self->tree, tree_sequence, 0);
if (ret != 0) {
goto out;
}
self->tree_sequence = tree_sequence;
self->total_samples = tsk_treeseq_get_num_samples(self->tree_sequence);
self->sample_buffer = tsk_malloc(self->total_samples * sizeof(*self->sample_buffer));
if (self->sample_buffer == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
int
tsk_ld_calc_free(tsk_ld_calc_t *self)
{
tsk_tree_free(&self->tree);
tsk_safe_free(self->sample_buffer);
return 0;
}
static int
tsk_ld_calc_check_site(tsk_ld_calc_t *TSK_UNUSED(self), const tsk_site_t *site)
{
int ret = 0;
/* These are both limitations in the current implementation, there's no
* fundamental reason why we can't support them */
if (site->mutations_length != 1) {
ret = tsk_trace_error(TSK_ERR_ONLY_INFINITE_SITES);
goto out;
}
if (site->ancestral_state_length == site->mutations[0].derived_state_length
&& tsk_memcmp(site->ancestral_state, site->mutations[0].derived_state,
site->ancestral_state_length)
== 0) {
ret = tsk_trace_error(TSK_ERR_SILENT_MUTATIONS_NOT_SUPPORTED);
goto out;
}
out:
return ret;
}
static int
tsk_ld_calc_set_focal_samples(tsk_ld_calc_t *self)
{
int ret = 0;
tsk_id_t focal_node = self->focal_site.mutations[0].node;
ret = tsk_tree_track_descendant_samples(&self->tree, focal_node);
if (ret != 0) {
goto out;
}
self->focal_samples = self->tree.num_tracked_samples[focal_node];
out:
return ret;
}
static int
tsk_ld_calc_initialise(tsk_ld_calc_t *self, tsk_id_t a)
{
int ret = 0;
ret = tsk_treeseq_get_site(self->tree_sequence, a, &self->focal_site);
if (ret != 0) {
goto out;
}
ret = tsk_ld_calc_check_site(self, &self->focal_site);
if (ret != 0) {
goto out;
}
ret = tsk_tree_seek(&self->tree, self->focal_site.position, 0);
if (ret != 0) {
goto out;
}
ret = tsk_ld_calc_set_focal_samples(self);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
tsk_ld_calc_compute_r2(tsk_ld_calc_t *self, const tsk_site_t *target_site, double *r2)
{
const double n = (double) self->total_samples;
double f_a, f_b, f_ab, D, denom;
tsk_id_t node;
int ret = tsk_ld_calc_check_site(self, target_site);
if (ret != 0) {
goto out;
}
node = target_site->mutations[0].node;
f_a = ((double) self->focal_samples) / n;
f_b = ((double) self->tree.num_samples[node]) / n;
f_ab = ((double) self->tree.num_tracked_samples[node]) / n;
D = f_ab - f_a * f_b;
denom = f_a * f_b * (1 - f_a) * (1 - f_b);
*r2 = (D * D) / denom;
out:
return ret;
}
static int
tsk_ld_calc_compute_and_append(
tsk_ld_calc_t *self, const tsk_site_t *target_site, bool *ret_done)
{
int ret = 0;
double r2;
double distance = fabs(self->focal_site.position - target_site->position);
bool done = true;
if (distance <= self->max_distance && self->result_length < self->max_sites) {
ret = tsk_ld_calc_compute_r2(self, target_site, &r2);
if (ret != 0) {
goto out;
}
self->result[self->result_length] = r2;
self->result_length++;
done = false;
}
*ret_done = done;
out:
return ret;
}
static int
tsk_ld_calc_run_forward(tsk_ld_calc_t *self)
{
int ret = 0;
tsk_size_t j;
bool done = false;
for (j = 0; j < self->tree.sites_length; j++) {
if (self->tree.sites[j].id > self->focal_site.id) {
ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);
if (ret != 0) {
goto out;
}
if (done) {
break;
}
}
}
while (((ret = tsk_tree_next(&self->tree)) == TSK_TREE_OK) && !done) {
for (j = 0; j < self->tree.sites_length; j++) {
ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);
if (ret != 0) {
goto out;
}
if (done) {
break;
}
}
}
if (ret < 0) {
goto out;
}
ret = 0;
out:
return ret;
}
static int
tsk_ld_calc_run_reverse(tsk_ld_calc_t *self)
{
int ret = 0;
tsk_id_t j;
bool done = false;
for (j = (tsk_id_t) self->tree.sites_length - 1; j >= 0; j--) {
if (self->tree.sites[j].id < self->focal_site.id) {
ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);
if (ret != 0) {
goto out;
}
if (done) {
break;
}
}
}
while (((ret = tsk_tree_prev(&self->tree)) == TSK_TREE_OK) && !done) {
for (j = (tsk_id_t) self->tree.sites_length - 1; j >= 0; j--) {
ret = tsk_ld_calc_compute_and_append(self, &self->tree.sites[j], &done);
if (ret != 0) {
goto out;
}
if (done) {
break;
}
}
}
if (ret < 0) {
goto out;
}
ret = 0;
out:
return ret;
}
int
tsk_ld_calc_get_r2(tsk_ld_calc_t *self, tsk_id_t a, tsk_id_t b, double *r2)
{
int ret = 0;
tsk_site_t target_site;
ret = tsk_ld_calc_initialise(self, a);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_get_site(self->tree_sequence, b, &target_site);
if (ret != 0) {
goto out;
}
ret = tsk_tree_seek(&self->tree, target_site.position, 0);
if (ret != 0) {
goto out;
}
ret = tsk_ld_calc_compute_r2(self, &target_site, r2);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int
tsk_ld_calc_get_r2_array(tsk_ld_calc_t *self, tsk_id_t a, int direction,
tsk_size_t max_sites, double max_distance, double *r2, tsk_size_t *num_r2_values)
{
int ret = tsk_ld_calc_initialise(self, a);
if (ret != 0) {
goto out;
}
self->max_sites = max_sites;
self->max_distance = max_distance;
self->result_length = 0;
self->result = r2;
if (direction == TSK_DIR_FORWARD) {
ret = tsk_ld_calc_run_forward(self);
} else if (direction == TSK_DIR_REVERSE) {
ret = tsk_ld_calc_run_reverse(self);
} else {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
}
if (ret != 0) {
goto out;
}
*num_r2_values = self->result_length;
out:
return ret;
}
================================================
FILE: c/tskit/stats.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2021 Tskit Developers
* Copyright (c) 2016-2017 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#ifndef TSK_STATS_H
#define TSK_STATS_H
#ifdef __cplusplus
extern "C" {
#endif
#include
typedef struct {
const tsk_treeseq_t *tree_sequence;
tsk_site_t focal_site;
tsk_size_t total_samples;
tsk_size_t focal_samples;
double max_distance;
tsk_size_t max_sites;
tsk_tree_t tree;
tsk_id_t *sample_buffer;
double *result;
tsk_size_t result_length;
} tsk_ld_calc_t;
int tsk_ld_calc_init(tsk_ld_calc_t *self, const tsk_treeseq_t *tree_sequence);
int tsk_ld_calc_free(tsk_ld_calc_t *self);
void tsk_ld_calc_print_state(const tsk_ld_calc_t *self, FILE *out);
int tsk_ld_calc_get_r2(tsk_ld_calc_t *self, tsk_id_t a, tsk_id_t b, double *r2);
int tsk_ld_calc_get_r2_array(tsk_ld_calc_t *self, tsk_id_t a, int direction,
tsk_size_t max_sites, double max_distance, double *r2, tsk_size_t *num_r2_values);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit/tables.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2025 Tskit Developers
* Copyright (c) 2017-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define TABLE_SEP "-----------------------------------------\n"
#define TSK_COL_OPTIONAL (1 << 0)
typedef struct {
const char *name;
void **array_dest;
int type;
tsk_flags_t options;
} read_table_col_t;
typedef struct {
const char *name;
void **data_array_dest;
tsk_size_t *data_len_dest;
int data_type;
tsk_size_t **offset_array_dest;
tsk_flags_t options;
} read_table_ragged_col_t;
typedef struct {
const char *name;
void **array_dest;
tsk_size_t *len_dest;
int type;
tsk_flags_t options;
} read_table_property_t;
typedef struct {
const char *name;
const void *array;
tsk_size_t len;
int type;
} write_table_col_t;
typedef struct {
const char *name;
const void *data_array;
tsk_size_t data_len;
int data_type;
const tsk_size_t *offset_array;
tsk_size_t num_rows;
} write_table_ragged_col_t;
/* Returns true if adding the specified number of rows would result in overflow.
* Tables can support indexes from 0 to TSK_MAX_ID, and therefore could have at most
* TSK_MAX_ID + 1 rows. However we limit to TSK_MAX_ID rows so that counts of rows
* can fit in a tsk_id_t. */
static bool
check_table_overflow(tsk_size_t current_size, tsk_size_t additional_rows)
{
tsk_size_t max_val = TSK_MAX_ID;
return additional_rows > max_val || current_size > (max_val - additional_rows);
}
/* Returns true if adding the specified number of elements would result in overflow
* of an offset column.
*/
static bool
check_offset_overflow(tsk_size_t current_size, tsk_size_t additional_elements)
{
tsk_size_t max_val = TSK_MAX_SIZE;
return additional_elements > max_val
|| current_size > (max_val - additional_elements);
}
#define TSK_NUM_ROWS_UNSET ((tsk_size_t) - 1)
#define TSK_MAX_COL_NAME_LEN 64
static int
read_table_cols(kastore_t *store, tsk_size_t *num_rows, read_table_col_t *cols,
tsk_flags_t TSK_UNUSED(flags))
{
int ret = 0;
size_t len;
int type;
read_table_col_t *col;
for (col = cols; col->name != NULL; col++) {
ret = kastore_containss(store, col->name);
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets(store, col->name, col->array_dest, &len, &type);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (*num_rows == TSK_NUM_ROWS_UNSET) {
*num_rows = (tsk_size_t) len;
} else {
if (*num_rows != (tsk_size_t) len) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
}
if (type != col->type) {
ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);
goto out;
}
} else if (!(col->options & TSK_COL_OPTIONAL)) {
ret = tsk_trace_error(TSK_ERR_REQUIRED_COL_NOT_FOUND);
goto out;
}
}
out:
return ret;
}
static int
cast_offset_array(read_table_ragged_col_t *col, uint32_t *source, tsk_size_t num_rows)
{
int ret = 0;
tsk_size_t len = num_rows + 1;
tsk_size_t j;
uint64_t *dest = tsk_malloc(len * sizeof(*dest));
if (dest == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
*col->offset_array_dest = dest;
for (j = 0; j < len; j++) {
dest[j] = source[j];
}
out:
return ret;
}
static int
read_table_ragged_cols(kastore_t *store, tsk_size_t *num_rows,
read_table_ragged_col_t *cols, tsk_flags_t TSK_UNUSED(flags))
{
int ret = 0;
size_t data_len = 0; // initial value unused, just to keep the compiler happy.
size_t offset_len;
int type;
read_table_ragged_col_t *col;
char offset_col_name[TSK_MAX_COL_NAME_LEN];
bool data_col_present, offset_col_present;
void *store_offset_array = NULL;
tsk_size_t *offset_array;
for (col = cols; col->name != NULL; col++) {
ret = kastore_containss(store, col->name);
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
data_col_present = false;
if (ret == 1) {
ret = kastore_gets(store, col->name, col->data_array_dest, &data_len, &type);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (type != col->data_type) {
ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);
goto out;
}
*col->data_len_dest = (tsk_size_t) data_len;
data_col_present = true;
} else if (!(col->options & TSK_COL_OPTIONAL)) {
ret = tsk_trace_error(TSK_ERR_REQUIRED_COL_NOT_FOUND);
goto out;
}
assert(strlen(col->name) + strlen("_offset") + 2 < sizeof(offset_col_name));
strcpy(offset_col_name, col->name);
strcat(offset_col_name, "_offset");
ret = kastore_containss(store, offset_col_name);
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
offset_col_present = ret == 1;
if (offset_col_present != data_col_present) {
ret = tsk_trace_error(TSK_ERR_BOTH_COLUMNS_REQUIRED);
goto out;
}
if (offset_col_present) {
ret = kastore_gets(
store, offset_col_name, &store_offset_array, &offset_len, &type);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
/* A table with zero rows will still have an offset length of 1;
* catching this here prevents underflows in the logic below */
if (offset_len == 0) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
/* Some tables have only ragged columns */
if (*num_rows == TSK_NUM_ROWS_UNSET) {
*num_rows = (tsk_size_t) offset_len - 1;
} else {
if (*num_rows != (tsk_size_t) offset_len - 1) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
}
if (type == KAS_UINT64) {
*col->offset_array_dest = (uint64_t *) store_offset_array;
store_offset_array = NULL;
} else if (type == KAS_UINT32) {
ret = cast_offset_array(col, (uint32_t *) store_offset_array, *num_rows);
if (ret != 0) {
goto out;
}
tsk_safe_free(store_offset_array);
store_offset_array = NULL;
} else {
ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);
goto out;
}
offset_array = *col->offset_array_dest;
if (offset_array[*num_rows] != (tsk_size_t) data_len) {
ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);
goto out;
}
}
}
out:
tsk_safe_free(store_offset_array);
return ret;
}
static int
read_table_properties(
kastore_t *store, read_table_property_t *properties, tsk_flags_t TSK_UNUSED(flags))
{
int ret = 0;
size_t len;
int type;
read_table_property_t *property;
for (property = properties; property->name != NULL; property++) {
ret = kastore_containss(store, property->name);
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets(store, property->name, property->array_dest, &len, &type);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
assert(ret != 0); /* Tell static analysers that we're handling errors */
goto out;
}
if (type != property->type) {
ret = tsk_trace_error(TSK_ERR_BAD_COLUMN_TYPE);
goto out;
}
*property->len_dest = (tsk_size_t) len;
}
assert(property->options & TSK_COL_OPTIONAL);
}
out:
return ret;
}
static int
read_table(kastore_t *store, tsk_size_t *num_rows, read_table_col_t *cols,
read_table_ragged_col_t *ragged_cols, read_table_property_t *properties,
tsk_flags_t options)
{
int ret = 0;
*num_rows = TSK_NUM_ROWS_UNSET;
if (cols != NULL) {
ret = read_table_cols(store, num_rows, cols, options);
if (ret != 0) {
goto out;
}
}
if (ragged_cols != NULL) {
ret = read_table_ragged_cols(store, num_rows, ragged_cols, options);
if (ret != 0) {
goto out;
}
}
if (*num_rows == TSK_NUM_ROWS_UNSET) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
if (properties != NULL) {
ret = read_table_properties(store, properties, options);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static void
free_read_table_mem(read_table_col_t *cols, read_table_ragged_col_t *ragged_cols,
read_table_property_t *properties)
{
read_table_col_t *col;
read_table_ragged_col_t *ragged_col;
read_table_property_t *property;
if (cols != NULL) {
for (col = cols; col->name != NULL; col++) {
tsk_safe_free(*(col->array_dest));
}
}
if (ragged_cols != NULL) {
for (ragged_col = ragged_cols; ragged_col->name != NULL; ragged_col++) {
tsk_safe_free(*(ragged_col->data_array_dest));
tsk_safe_free(*(ragged_col->offset_array_dest));
}
}
if (properties != NULL) {
for (property = properties; property->name != NULL; property++) {
tsk_safe_free(*(property->array_dest));
}
}
}
static int
write_offset_col(
kastore_t *store, const write_table_ragged_col_t *col, tsk_flags_t options)
{
int ret = 0;
char offset_col_name[TSK_MAX_COL_NAME_LEN];
uint32_t *offset32 = NULL;
tsk_size_t len = col->num_rows + 1;
tsk_size_t j;
int32_t put_flags = 0;
int type;
const void *data;
bool needs_64 = col->offset_array[col->num_rows] > UINT32_MAX;
assert(strlen(col->name) + strlen("_offset") + 2 < sizeof(offset_col_name));
strcpy(offset_col_name, col->name);
strcat(offset_col_name, "_offset");
if (options & TSK_DUMP_FORCE_OFFSET_64 || needs_64) {
type = KAS_UINT64;
data = col->offset_array;
put_flags = KAS_BORROWS_ARRAY;
} else {
offset32 = tsk_malloc(len * sizeof(*offset32));
if (offset32 == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < len; j++) {
offset32[j] = (uint32_t) col->offset_array[j];
}
type = KAS_UINT32;
data = offset32;
/* We've just allocated a temp buffer, so kas can't borrow so leave put_flags=0*/
}
ret = kastore_puts(store, offset_col_name, data, (size_t) len, type, put_flags);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
out:
tsk_safe_free(offset32);
return ret;
}
static int
write_table_ragged_cols(
kastore_t *store, const write_table_ragged_col_t *write_cols, tsk_flags_t options)
{
int ret = 0;
const write_table_ragged_col_t *col;
for (col = write_cols; col->name != NULL; col++) {
ret = kastore_puts(store, col->name, col->data_array, (size_t) col->data_len,
col->data_type, KAS_BORROWS_ARRAY);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
ret = write_offset_col(store, col, options);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
write_table_cols(kastore_t *store, const write_table_col_t *write_cols,
tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
const write_table_col_t *col;
for (col = write_cols; col->name != NULL; col++) {
ret = kastore_puts(store, col->name, col->array, (size_t) col->len, col->type,
KAS_BORROWS_ARRAY);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
}
out:
return ret;
}
static int
write_table(kastore_t *store, const write_table_col_t *cols,
const write_table_ragged_col_t *ragged_cols, tsk_flags_t options)
{
int ret = write_table_cols(store, cols, options);
if (ret != 0) {
goto out;
}
ret = write_table_ragged_cols(store, ragged_cols, options);
if (ret != 0) {
goto out;
}
out:
return ret;
}
/* Checks that the specified list of offsets is well-formed. */
static int
check_offsets(
tsk_size_t num_rows, const tsk_size_t *offsets, tsk_size_t length, bool check_length)
{
int ret = 0;
tsk_size_t j;
if (offsets[0] != 0) {
ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);
goto out;
}
if (check_length && offsets[num_rows] != length) {
ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);
goto out;
}
for (j = 0; j < num_rows; j++) {
if (offsets[j] > offsets[j + 1]) {
ret = tsk_trace_error(TSK_ERR_BAD_OFFSET);
goto out;
}
}
ret = 0;
out:
return ret;
}
static int
calculate_max_rows(tsk_size_t num_rows, tsk_size_t max_rows,
tsk_size_t max_rows_increment, tsk_size_t additional_rows,
tsk_size_t *ret_new_max_rows)
{
tsk_size_t new_max_rows;
int ret = 0;
if (check_table_overflow(num_rows, additional_rows)) {
ret = tsk_trace_error(TSK_ERR_TABLE_OVERFLOW);
goto out;
}
if (num_rows + additional_rows <= max_rows) {
new_max_rows = max_rows;
} else {
if (max_rows_increment == 0) {
/* Doubling by default */
new_max_rows = TSK_MIN(max_rows * 2, TSK_MAX_ID + (tsk_size_t) 1);
/* Add some constraints to prevent very small allocations */
if (new_max_rows < 1024) {
new_max_rows = 1024;
}
/* Prevent allocating more than ~2 million additional rows unless needed*/
if (new_max_rows - max_rows > 2097152) {
new_max_rows = max_rows + 2097152;
}
} else {
/* Use user increment value */
if (check_table_overflow(max_rows, max_rows_increment)) {
ret = tsk_trace_error(TSK_ERR_TABLE_OVERFLOW);
goto out;
}
new_max_rows = max_rows + max_rows_increment;
}
new_max_rows = TSK_MAX(new_max_rows, num_rows + additional_rows);
}
*ret_new_max_rows = new_max_rows;
out:
return ret;
}
static int
calculate_max_length(tsk_size_t current_length, tsk_size_t max_length,
tsk_size_t max_length_increment, tsk_size_t additional_length,
tsk_size_t *ret_new_max_length)
{
tsk_size_t new_max_length;
int ret = 0;
if (check_offset_overflow(current_length, additional_length)) {
ret = tsk_trace_error(TSK_ERR_COLUMN_OVERFLOW);
goto out;
}
if (current_length + additional_length <= max_length) {
new_max_length = max_length;
} else {
if (max_length_increment == 0) {
/* Doubling by default */
new_max_length = TSK_MIN(max_length * 2, TSK_MAX_SIZE);
/* Add some constraints to prevent very small allocations */
if (new_max_length < 65536) {
new_max_length = 65536;
}
/* Prevent allocating more than 100MB additional unless needed*/
if (new_max_length - max_length > 104857600) {
new_max_length = max_length + 104857600;
}
new_max_length = TSK_MAX(new_max_length, current_length + additional_length);
} else {
/* Use user increment value */
if (check_offset_overflow(max_length, max_length_increment)) {
/* Here we could allocate to the maximum size.
* Instead we are erroring out as this is much easier to test.
* The cost is that (at most) the last "max_length_increment"-1
* bytes of the possible array space can't be used. */
ret = tsk_trace_error(TSK_ERR_COLUMN_OVERFLOW);
goto out;
}
new_max_length = max_length + max_length_increment;
}
new_max_length = TSK_MAX(new_max_length, current_length + additional_length);
}
*ret_new_max_length = new_max_length;
out:
return ret;
}
static int
expand_column(void **column, tsk_size_t new_max_rows, size_t element_size)
{
int ret = 0;
void *tmp;
tmp = tsk_realloc((void **) *column, new_max_rows * element_size);
if (tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
*column = tmp;
out:
return ret;
}
static int
expand_ragged_column(tsk_size_t current_length, tsk_size_t additional_length,
tsk_size_t max_length_increment, tsk_size_t *max_length, void **column,
size_t element_size)
{
int ret = 0;
tsk_size_t new_max_length;
ret = calculate_max_length(current_length, *max_length, max_length_increment,
additional_length, &new_max_length);
if (ret != 0) {
goto out;
}
if (new_max_length > *max_length) {
ret = expand_column(column, new_max_length, element_size);
if (ret != 0) {
goto out;
}
*max_length = new_max_length;
}
out:
return ret;
}
/* TODO rename to copy_string or replace_and_copy_string */
static int
replace_string(
char **str, tsk_size_t *len, const char *new_str, const tsk_size_t new_len)
{
int ret = 0;
tsk_safe_free(*str);
*str = NULL;
*len = new_len;
if (new_len > 0) {
*str = tsk_malloc(new_len * sizeof(char));
if (*str == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(*str, new_str, new_len * sizeof(char));
}
out:
return ret;
}
static int
takeset_string(char **str, tsk_size_t *len, char *new_str, const tsk_size_t new_len)
{
tsk_safe_free(*str);
*str = new_str;
*len = new_len;
return 0;
}
static int
alloc_empty_ragged_column(tsk_size_t num_rows, void **data_col, tsk_size_t **offset_col)
{
int ret = 0;
*data_col = tsk_malloc(1);
*offset_col = tsk_calloc(num_rows + 1, sizeof(tsk_size_t));
if (*data_col == NULL || *offset_col == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
static int
check_ragged_column(tsk_size_t num_rows, void *data, tsk_size_t *offset)
{
int ret = 0;
if ((data == NULL) != (offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (data != NULL) {
ret = check_offsets(num_rows, offset, 0, false);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
takeset_ragged_column(tsk_size_t num_rows, void *data, tsk_size_t *offset,
void **data_dest, tsk_size_t **offset_dest, tsk_size_t *length_dest)
{
int ret = 0;
if (data == NULL) {
ret = alloc_empty_ragged_column(num_rows, (void *) data_dest, offset_dest);
if (ret != 0) {
goto out;
}
} else {
*data_dest = data;
*offset_dest = offset;
}
*length_dest = (*offset_dest)[num_rows];
out:
return ret;
}
static int
takeset_optional_id_column(tsk_size_t num_rows, tsk_id_t *input, tsk_id_t **dest)
{
int ret = 0;
tsk_size_t buffsize;
tsk_id_t *buff;
if (input == NULL) {
buffsize = num_rows * sizeof(*buff);
buff = tsk_malloc(buffsize);
if (buff == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
*dest = buff;
tsk_memset(buff, 0xff, buffsize);
} else {
*dest = input;
}
out:
return ret;
}
static int
write_metadata_schema_header(
FILE *out, const char *metadata_schema, tsk_size_t metadata_schema_length)
{
const char *fmt = "#metadata_schema#\n"
"%.*s\n"
"#end#metadata_schema\n" TABLE_SEP;
return fprintf(out, fmt, (int) metadata_schema_length, metadata_schema);
}
/* Utilities for in-place subsetting columns */
static tsk_size_t
count_true(tsk_size_t num_rows, const tsk_bool_t *restrict keep)
{
tsk_size_t j;
tsk_size_t count = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
count++;
}
}
return count;
}
static void
keep_mask_to_id_map(
tsk_size_t num_rows, const tsk_bool_t *restrict keep, tsk_id_t *restrict id_map)
{
tsk_size_t j;
tsk_id_t next_id = 0;
for (j = 0; j < num_rows; j++) {
id_map[j] = TSK_NULL;
if (keep[j]) {
id_map[j] = next_id;
next_id++;
}
}
}
static tsk_size_t
subset_remap_id_column(tsk_id_t *restrict column, tsk_size_t num_rows,
const tsk_bool_t *restrict keep, const tsk_id_t *restrict id_map)
{
tsk_size_t j, k;
tsk_id_t value;
k = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
value = column[j];
if (value != TSK_NULL) {
value = id_map[value];
}
column[k] = value;
k++;
}
}
return k;
}
/* Trigger warning: C++ programmers should look away... This may be one of the
* few cases where some macro funkiness is warranted, as these are exact
* duplicates of the same function with just the type of the column
* parameter changed. */
static tsk_size_t
subset_id_column(
tsk_id_t *restrict column, tsk_size_t num_rows, const tsk_bool_t *restrict keep)
{
tsk_size_t j, k;
k = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
column[k] = column[j];
k++;
}
}
return k;
}
static tsk_size_t
subset_flags_column(
tsk_flags_t *restrict column, tsk_size_t num_rows, const tsk_bool_t *restrict keep)
{
tsk_size_t j, k;
k = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
column[k] = column[j];
k++;
}
}
return k;
}
static tsk_size_t
subset_double_column(
double *restrict column, tsk_size_t num_rows, const tsk_bool_t *restrict keep)
{
tsk_size_t j, k;
k = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
column[k] = column[j];
k++;
}
}
return k;
}
static tsk_size_t
subset_ragged_char_column(char *restrict data, tsk_size_t *restrict offset_col,
tsk_size_t num_rows, const tsk_bool_t *restrict keep)
{
tsk_size_t j, k, i, offset;
k = 0;
offset = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
offset_col[k] = offset;
/* Note: Unclear whether it's worth calling memcpy instead here?
* Need to be careful since the regions are overlapping */
for (i = offset_col[j]; i < offset_col[j + 1]; i++) {
data[offset] = data[i];
offset++;
}
k++;
}
}
offset_col[k] = offset;
return offset;
}
static tsk_size_t
subset_ragged_double_column(double *restrict data, tsk_size_t *restrict offset_col,
tsk_size_t num_rows, const tsk_bool_t *restrict keep)
{
tsk_size_t j, k, i, offset;
k = 0;
offset = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
offset_col[k] = offset;
/* Note: Unclear whether it's worth calling memcpy instead here?
* Need to be careful since the regions are overlapping */
for (i = offset_col[j]; i < offset_col[j + 1]; i++) {
data[offset] = data[i];
offset++;
}
k++;
}
}
offset_col[k] = offset;
return offset;
}
static tsk_size_t
subset_remap_ragged_id_column(tsk_id_t *restrict data, tsk_size_t *restrict offset_col,
tsk_size_t num_rows, const tsk_bool_t *restrict keep,
const tsk_id_t *restrict id_map)
{
tsk_size_t j, k, i, offset;
tsk_id_t di;
k = 0;
offset = 0;
for (j = 0; j < num_rows; j++) {
if (keep[j]) {
offset_col[k] = offset;
for (i = offset_col[j]; i < offset_col[j + 1]; i++) {
di = data[i];
if (di != TSK_NULL) {
di = id_map[di];
}
data[offset] = di;
offset++;
}
k++;
}
}
offset_col[k] = offset;
return offset;
}
/*************************
* reference sequence
*************************/
int
tsk_reference_sequence_init(
tsk_reference_sequence_t *self, tsk_flags_t TSK_UNUSED(options))
{
tsk_memset(self, 0, sizeof(*self));
return 0;
}
int
tsk_reference_sequence_free(tsk_reference_sequence_t *self)
{
tsk_safe_free(self->data);
tsk_safe_free(self->url);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_schema);
return 0;
}
bool
tsk_reference_sequence_is_null(const tsk_reference_sequence_t *self)
{
return self->data_length == 0 && self->url_length == 0 && self->metadata_length == 0
&& self->metadata_schema_length == 0;
}
bool
tsk_reference_sequence_equals(const tsk_reference_sequence_t *self,
const tsk_reference_sequence_t *other, tsk_flags_t options)
{
int ret
= self->data_length == other->data_length
&& self->url_length == other->url_length
&& tsk_memcmp(self->data, other->data, self->data_length * sizeof(char)) == 0
&& tsk_memcmp(self->url, other->url, self->url_length * sizeof(char)) == 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_reference_sequence_copy(const tsk_reference_sequence_t *self,
tsk_reference_sequence_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_reference_sequence_init(dest, 0);
if (ret != 0) {
goto out;
}
}
if (tsk_reference_sequence_is_null(self)) {
/* This is a simple way to get any input into the NULL state */
tsk_reference_sequence_free(dest);
} else {
ret = tsk_reference_sequence_set_data(dest, self->data, self->data_length);
if (ret != 0) {
goto out;
}
ret = tsk_reference_sequence_set_url(dest, self->url, self->url_length);
if (ret != 0) {
goto out;
}
ret = tsk_reference_sequence_set_metadata(
dest, self->metadata, self->metadata_length);
if (ret != 0) {
goto out;
}
ret = tsk_reference_sequence_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_reference_sequence_set_data(
tsk_reference_sequence_t *self, const char *data, tsk_size_t data_length)
{
return replace_string(&self->data, &self->data_length, data, data_length);
}
int
tsk_reference_sequence_set_url(
tsk_reference_sequence_t *self, const char *url, tsk_size_t url_length)
{
return replace_string(&self->url, &self->url_length, url, url_length);
}
int
tsk_reference_sequence_set_metadata(
tsk_reference_sequence_t *self, const char *metadata, tsk_size_t metadata_length)
{
return replace_string(
&self->metadata, &self->metadata_length, metadata, metadata_length);
}
int
tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_reference_sequence_takeset_data(
tsk_reference_sequence_t *self, char *data, tsk_size_t data_length)
{
return takeset_string(&self->data, &self->data_length, data, data_length);
}
int
tsk_reference_sequence_takeset_metadata(
tsk_reference_sequence_t *self, char *metadata, tsk_size_t metadata_length)
{
return takeset_string(
&self->metadata, &self->metadata_length, metadata, metadata_length);
}
/*************************
* individual table
*************************/
static void
tsk_individual_table_free_columns(tsk_individual_table_t *self)
{
tsk_safe_free(self->flags);
tsk_safe_free(self->location);
tsk_safe_free(self->location_offset);
tsk_safe_free(self->parents);
tsk_safe_free(self->parents_offset);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_individual_table_free(tsk_individual_table_t *self)
{
tsk_individual_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_individual_table_expand_main_columns(
tsk_individual_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column((void **) &self->flags, new_max_rows, sizeof(tsk_flags_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->location_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->parents_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_individual_table_expand_location(
tsk_individual_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->location_length, additional_length,
self->max_location_length_increment, &self->max_location_length,
(void **) &self->location, sizeof(*self->location));
}
static int
tsk_individual_table_expand_parents(
tsk_individual_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->parents_length, additional_length,
self->max_parents_length_increment, &self->max_parents_length,
(void **) &self->parents, sizeof(*self->parents));
}
static int
tsk_individual_table_expand_metadata(
tsk_individual_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_individual_table_set_max_rows_increment(
tsk_individual_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_individual_table_set_max_metadata_length_increment(
tsk_individual_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = (tsk_size_t) max_metadata_length_increment;
return 0;
}
int
tsk_individual_table_set_max_location_length_increment(
tsk_individual_table_t *self, tsk_size_t max_location_length_increment)
{
self->max_location_length_increment = (tsk_size_t) max_location_length_increment;
return 0;
}
int
tsk_individual_table_set_max_parents_length_increment(
tsk_individual_table_t *self, tsk_size_t max_parents_length_increment)
{
self->max_parents_length_increment = (tsk_size_t) max_parents_length_increment;
return 0;
}
int
tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_individual_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_location_length_increment = 1;
self->max_parents_length_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_individual_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_expand_location(self, 1);
if (ret != 0) {
goto out;
}
self->location_offset[0] = 0;
ret = tsk_individual_table_expand_parents(self, 1);
if (ret != 0) {
goto out;
}
self->parents_offset[0] = 0;
ret = tsk_individual_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->metadata_offset[0] = 0;
self->max_rows_increment = 0;
self->max_location_length_increment = 0;
self->max_parents_length_increment = 0;
self->max_metadata_length_increment = 0;
tsk_individual_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_individual_table_copy(const tsk_individual_table_t *self,
tsk_individual_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_individual_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_individual_table_set_columns(dest, self->num_rows, self->flags,
self->location, self->location_offset, self->parents, self->parents_offset,
self->metadata, self->metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset,
const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata,
const tsk_size_t *metadata_offset)
{
int ret;
ret = tsk_individual_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_append_columns(self, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_individual_table_takeset_columns(tsk_individual_table_t *self, tsk_size_t num_rows,
tsk_flags_t *flags, double *location, tsk_size_t *location_offset, tsk_id_t *parents,
tsk_size_t *parents_offset, char *metadata, tsk_size_t *metadata_offset)
{
int ret = 0;
/* We need to check all the inputs before we start freeing or taking memory */
ret = check_ragged_column(num_rows, location, location_offset);
if (ret != 0) {
goto out;
}
ret = check_ragged_column(num_rows, parents, parents_offset);
if (ret != 0) {
goto out;
}
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_individual_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
if (flags == NULL) {
/* Flags defaults to all zeros if not specified. The column is often
* unused so this is a worthwhile optimisation. */
self->flags = tsk_calloc(num_rows, sizeof(*self->flags));
if (self->flags == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
} else {
self->flags = flags;
}
ret = takeset_ragged_column(num_rows, location, location_offset,
(void *) &self->location, &self->location_offset, &self->location_length);
if (ret != 0) {
goto out;
}
ret = takeset_ragged_column(num_rows, parents, parents_offset,
(void *) &self->parents, &self->parents_offset, &self->parents_length);
if (ret != 0) {
goto out;
}
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int
tsk_individual_table_append_columns(tsk_individual_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset,
const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata,
const tsk_size_t *metadata_offset)
{
int ret;
tsk_size_t j, metadata_length, location_length, parents_length;
if (flags == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((location == NULL) != (location_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((parents == NULL) != (parents_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((metadata == NULL) != (metadata_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_individual_table_expand_main_columns(self, (tsk_size_t) num_rows);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->flags + self->num_rows, flags, num_rows * sizeof(tsk_flags_t));
if (location == NULL) {
for (j = 0; j < num_rows; j++) {
self->location_offset[self->num_rows + j + 1]
= (tsk_size_t) self->location_length;
}
} else {
ret = check_offsets(num_rows, location_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->location_offset[self->num_rows + j]
= (tsk_size_t) self->location_length + location_offset[j];
}
location_length = location_offset[num_rows];
ret = tsk_individual_table_expand_location(self, location_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->location + self->location_length, location,
location_length * sizeof(double));
self->location_length += location_length;
}
if (parents == NULL) {
for (j = 0; j < num_rows; j++) {
self->parents_offset[self->num_rows + j + 1]
= (tsk_size_t) self->parents_length;
}
} else {
ret = check_offsets(num_rows, parents_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->parents_offset[self->num_rows + j]
= (tsk_size_t) self->parents_length + parents_offset[j];
}
parents_length = parents_offset[num_rows];
ret = tsk_individual_table_expand_parents(self, parents_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->parents + self->parents_length, parents,
parents_length * sizeof(tsk_id_t));
self->parents_length += parents_length;
}
if (metadata == NULL) {
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j + 1]
= (tsk_size_t) self->metadata_length;
}
} else {
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= (tsk_size_t) self->metadata_length + metadata_offset[j];
}
metadata_length = metadata_offset[num_rows];
ret = tsk_individual_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
self->metadata_length += metadata_length;
}
self->num_rows += (tsk_size_t) num_rows;
self->location_offset[self->num_rows] = self->location_length;
self->parents_offset[self->num_rows] = self->parents_length;
self->metadata_offset[self->num_rows] = self->metadata_length;
out:
return ret;
}
static tsk_id_t
tsk_individual_table_add_row_internal(tsk_individual_table_t *self, tsk_flags_t flags,
const double *location, tsk_size_t location_length, const tsk_id_t *parents,
const tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length)
{
tsk_bug_assert(self->num_rows < self->max_rows);
tsk_bug_assert(self->parents_length + parents_length <= self->max_parents_length);
tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);
tsk_bug_assert(self->location_length + location_length <= self->max_location_length);
self->flags[self->num_rows] = flags;
tsk_memmove(self->location + self->location_length, location,
location_length * sizeof(*self->location));
self->location_offset[self->num_rows + 1] = self->location_length + location_length;
self->location_length += location_length;
tsk_memmove(self->parents + self->parents_length, parents,
parents_length * sizeof(*self->parents));
self->parents_offset[self->num_rows + 1] = self->parents_length + parents_length;
self->parents_length += parents_length;
tsk_memmove(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(*self->metadata));
self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;
self->metadata_length += metadata_length;
self->num_rows++;
return (tsk_id_t) self->num_rows - 1;
}
tsk_id_t
tsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags,
const double *location, tsk_size_t location_length, const tsk_id_t *parents,
tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
ret = tsk_individual_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_expand_location(self, location_length);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_expand_parents(self, parents_length);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_add_row_internal(self, flags, location, location_length,
parents, parents_length, metadata, metadata_length);
out:
return ret;
}
static int
tsk_individual_table_update_row_rewrite(tsk_individual_table_t *self, tsk_id_t index,
tsk_flags_t flags, const double *location, tsk_size_t location_length,
const tsk_id_t *parents, tsk_size_t parents_length, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_individual_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_individual_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_individual_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_individual_table_add_row(self, flags, location, location_length,
parents, parents_length, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_individual_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_individual_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_individual_table_update_row(tsk_individual_table_t *self, tsk_id_t index,
tsk_flags_t flags, const double *location, tsk_size_t location_length,
const tsk_id_t *parents, tsk_size_t parents_length, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_individual_t current_row;
ret = tsk_individual_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.location_length == location_length
&& current_row.parents_length == parents_length
&& current_row.metadata_length == metadata_length) {
self->flags[index] = flags;
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->location[self->location_offset[index]], location,
location_length * sizeof(*location));
tsk_memmove(&self->parents[self->parents_offset[index]], parents,
parents_length * sizeof(*parents));
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
} else {
ret = tsk_individual_table_update_row_rewrite(self, index, flags, location,
location_length, parents, parents_length, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_individual_table_clear(tsk_individual_table_t *self)
{
return tsk_individual_table_truncate(self, 0);
}
int
tsk_individual_table_truncate(tsk_individual_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
self->location_length = self->location_offset[num_rows];
self->parents_length = self->parents_offset[num_rows];
self->metadata_length = self->metadata_offset[num_rows];
out:
return ret;
}
int
tsk_individual_table_extend(tsk_individual_table_t *self,
const tsk_individual_table_t *other, tsk_size_t num_rows,
const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_individual_t individual;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_individual_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_individual_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &individual);
if (ret != 0) {
goto out;
}
ret_id = tsk_individual_table_add_row(self, individual.flags,
individual.location, individual.location_length, individual.parents,
individual.parents_length, individual.metadata, individual.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_individual_table_print_state(const tsk_individual_table_t *self, FILE *out)
{
tsk_size_t j, k;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "tsk_individual_tbl: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "metadata_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
/* We duplicate the dump_text code here because we want to output
* the offset columns. */
write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
fprintf(out, "id\tflags\tlocation_offset\tlocation\t");
fprintf(out, "parents_offset\tparents\t");
fprintf(out, "metadata_offset\tmetadata\n");
for (j = 0; j < self->num_rows; j++) {
fprintf(out, "%lld\t%lld\t", (long long) j, (long long) self->flags[j]);
fprintf(out, "%lld\t", (long long) self->location_offset[j]);
for (k = self->location_offset[j]; k < self->location_offset[j + 1]; k++) {
fprintf(out, "%f", self->location[k]);
if (k + 1 < self->location_offset[j + 1]) {
fprintf(out, ",");
}
}
fprintf(out, "\t");
fprintf(out, "%lld\t", (long long) self->parents_offset[j]);
for (k = self->parents_offset[j]; k < self->parents_offset[j + 1]; k++) {
fprintf(out, "%lld", (long long) self->parents[k]);
if (k + 1 < self->parents_offset[j + 1]) {
fprintf(out, ",");
}
}
fprintf(out, "\t");
fprintf(out, "%lld\t", (long long) self->metadata_offset[j]);
for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) {
fprintf(out, "%c", self->metadata[k]);
}
fprintf(out, "\n");
}
}
static inline void
tsk_individual_table_get_row_unsafe(
const tsk_individual_table_t *self, tsk_id_t index, tsk_individual_t *row)
{
row->id = (tsk_id_t) index;
row->flags = self->flags[index];
row->location_length
= self->location_offset[index + 1] - self->location_offset[index];
row->location = self->location + self->location_offset[index];
row->parents_length = self->parents_offset[index + 1] - self->parents_offset[index];
row->parents = self->parents + self->parents_offset[index];
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
/* Also have referencing individuals here. Should this be a different struct?
* See also site. */
row->nodes_length = 0;
row->nodes = NULL;
}
int
tsk_individual_table_get_row(
const tsk_individual_table_t *self, tsk_id_t index, tsk_individual_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
goto out;
}
tsk_individual_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_individual_table_set_metadata_schema(tsk_individual_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out)
{
int ret = TSK_ERR_IO;
tsk_size_t j, k;
tsk_size_t metadata_len;
int err;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "id\tflags\tlocation\tparents\tmetadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];
err = fprintf(out, "%lld\t%lld\t", (long long) j, (long long) self->flags[j]);
if (err < 0) {
goto out;
}
for (k = self->location_offset[j]; k < self->location_offset[j + 1]; k++) {
err = fprintf(out, "%.*g", TSK_DBL_DECIMAL_DIG, self->location[k]);
if (err < 0) {
goto out;
}
if (k + 1 < self->location_offset[j + 1]) {
err = fprintf(out, ",");
if (err < 0) {
goto out;
}
}
}
err = fprintf(out, "\t");
if (err < 0) {
goto out;
}
for (k = self->parents_offset[j]; k < self->parents_offset[j + 1]; k++) {
err = fprintf(out, "%lld", (long long) self->parents[k]);
if (err < 0) {
goto out;
}
if (k + 1 < self->parents_offset[j + 1]) {
err = fprintf(out, ",");
if (err < 0) {
goto out;
}
}
}
err = fprintf(out, "\t%.*s\n", (int) metadata_len,
self->metadata + self->metadata_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
bool
tsk_individual_table_equals(const tsk_individual_table_t *self,
const tsk_individual_table_t *other, tsk_flags_t options)
{
bool ret
= self->num_rows == other->num_rows
&& tsk_memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t))
== 0
&& tsk_memcmp(self->location_offset, other->location_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(
self->location, other->location, self->location_length * sizeof(double))
== 0
&& tsk_memcmp(self->parents_offset, other->parents_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(
self->parents, other->parents, self->parents_length * sizeof(tsk_id_t))
== 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_individual_table_keep_rows(tsk_individual_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *ret_id_map)
{
int ret = 0;
const tsk_size_t current_num_rows = self->num_rows;
tsk_size_t j, k, remaining_rows;
tsk_id_t pk;
tsk_id_t *id_map = ret_id_map;
tsk_id_t *restrict parents = self->parents;
tsk_size_t *restrict parents_offset = self->parents_offset;
if (ret_id_map == NULL) {
id_map = tsk_malloc(current_num_rows * sizeof(*id_map));
if (id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
keep_mask_to_id_map(current_num_rows, keep, id_map);
/* See notes in tsk_mutation_table_keep_rows for possibilities
* on making this more flexible */
for (j = 0; j < current_num_rows; j++) {
if (keep[j]) {
for (k = parents_offset[j]; k < parents_offset[j + 1]; k++) {
pk = parents[k];
if (pk != TSK_NULL) {
if (pk < 0 || pk >= (tsk_id_t) current_num_rows) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
;
goto out;
}
if (id_map[pk] == TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);
goto out;
}
}
}
}
}
remaining_rows = subset_flags_column(self->flags, current_num_rows, keep);
self->parents_length = subset_remap_ragged_id_column(
self->parents, self->parents_offset, current_num_rows, keep, id_map);
self->location_length = subset_ragged_double_column(
self->location, self->location_offset, current_num_rows, keep);
if (self->metadata_length > 0) {
/* Implementation note: we special case metadata here because
* it'll make the common-case of no metadata a bit faster, and
* to also potentially support more general use of the
* TSK_TABLE_NO_METADATA option. This is done for all the tables
* but only commented on here. */
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, current_num_rows, keep);
}
self->num_rows = remaining_rows;
out:
if (ret_id_map == NULL) {
tsk_safe_free(id_map);
}
return ret;
}
static int
tsk_individual_table_dump(
const tsk_individual_table_t *self, kastore_t *store, tsk_flags_t options)
{
const write_table_col_t write_cols[] = {
{ "individuals/flags", (void *) self->flags, self->num_rows,
TSK_FLAGS_STORAGE_TYPE },
{ "individuals/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "individuals/location", (void *) self->location, self->location_length,
KAS_FLOAT64, self->location_offset, self->num_rows },
{ "individuals/parents", (void *) self->parents, self->parents_length,
TSK_ID_STORAGE_TYPE, self->parents_offset, self->num_rows },
{ "individuals/metadata", (void *) self->metadata, self->metadata_length,
KAS_UINT8, self->metadata_offset, self->num_rows },
{ .name = NULL },
};
return write_table(store, write_cols, ragged_cols, options);
}
static int
tsk_individual_table_load(tsk_individual_table_t *self, kastore_t *store)
{
int ret = 0;
tsk_flags_t *flags = NULL;
double *location = NULL;
tsk_size_t *location_offset = NULL;
tsk_id_t *parents = NULL;
tsk_size_t *parents_offset = NULL;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
char *metadata_schema = NULL;
tsk_size_t num_rows, location_length, parents_length, metadata_length,
metadata_schema_length;
read_table_col_t cols[] = {
{ "individuals/flags", (void **) &flags, TSK_FLAGS_STORAGE_TYPE, 0 },
{ .name = NULL },
};
read_table_ragged_col_t ragged_cols[] = {
{ "individuals/location", (void **) &location, &location_length, KAS_FLOAT64,
&location_offset, 0 },
{ "individuals/parents", (void **) &parents, &parents_length,
TSK_ID_STORAGE_TYPE, &parents_offset, TSK_COL_OPTIONAL },
{ "individuals/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, 0 },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "individuals/metadata_schema", (void **) &metadata_schema,
&metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_individual_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_individual_table_takeset_columns(self, num_rows, flags, location,
location_offset, parents, parents_offset, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
flags = NULL;
location = NULL;
location_offset = NULL;
parents = NULL;
parents_offset = NULL;
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(cols, ragged_cols, properties);
return ret;
}
/*************************
* node table
*************************/
static void
tsk_node_table_free_columns(tsk_node_table_t *self)
{
tsk_safe_free(self->flags);
tsk_safe_free(self->time);
tsk_safe_free(self->population);
tsk_safe_free(self->individual);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_node_table_free(tsk_node_table_t *self)
{
tsk_node_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_node_table_expand_main_columns(tsk_node_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if (new_max_rows > self->max_rows) {
ret = expand_column((void **) &self->flags, new_max_rows, sizeof(tsk_flags_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->time, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->population, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->individual, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_node_table_expand_metadata(tsk_node_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_node_table_set_max_rows_increment(
tsk_node_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_node_table_set_max_metadata_length_increment(
tsk_node_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = max_metadata_length_increment;
return 0;
}
int
tsk_node_table_init(tsk_node_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_node_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_node_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->metadata_offset[0] = 0;
self->max_rows_increment = 0;
self->max_metadata_length_increment = 0;
tsk_node_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_node_table_copy(
const tsk_node_table_t *self, tsk_node_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_node_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_node_table_set_columns(dest, self->num_rows, self->flags, self->time,
self->population, self->individual, self->metadata, self->metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_node_table_set_columns(tsk_node_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *time, const tsk_id_t *population,
const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
ret = tsk_node_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_append_columns(
self, num_rows, flags, time, population, individual, metadata, metadata_offset);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_node_table_takeset_columns(tsk_node_table_t *self, tsk_size_t num_rows,
tsk_flags_t *flags, double *time, tsk_id_t *population, tsk_id_t *individual,
char *metadata, tsk_size_t *metadata_offset)
{
int ret = 0;
/* We need to check all the inputs before we start freeing or taking memory */
if (flags == NULL || time == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_node_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
self->flags = flags;
self->time = time;
ret = takeset_optional_id_column(num_rows, population, &self->population);
if (ret != 0) {
goto out;
}
ret = takeset_optional_id_column(num_rows, individual, &self->individual);
if (ret != 0) {
goto out;
}
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int
tsk_node_table_append_columns(tsk_node_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *time, const tsk_id_t *population,
const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
tsk_size_t j, metadata_length;
if (flags == NULL || time == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((metadata == NULL) != (metadata_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_node_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->time + self->num_rows, time, num_rows * sizeof(double));
tsk_memcpy(self->flags + self->num_rows, flags, num_rows * sizeof(tsk_flags_t));
if (metadata == NULL) {
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;
}
} else {
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= (tsk_size_t) self->metadata_length + metadata_offset[j];
}
metadata_length = metadata_offset[num_rows];
ret = tsk_node_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
self->metadata_length += metadata_length;
}
if (population == NULL) {
/* Set population to NULL_POPULATION (-1) if not specified */
tsk_memset(self->population + self->num_rows, 0xff, num_rows * sizeof(tsk_id_t));
} else {
tsk_memcpy(
self->population + self->num_rows, population, num_rows * sizeof(tsk_id_t));
}
if (individual == NULL) {
/* Set individual to NULL_INDIVIDUAL (-1) if not specified */
tsk_memset(self->individual + self->num_rows, 0xff, num_rows * sizeof(tsk_id_t));
} else {
tsk_memcpy(
self->individual + self->num_rows, individual, num_rows * sizeof(tsk_id_t));
}
self->num_rows += (tsk_size_t) num_rows;
self->metadata_offset[self->num_rows] = self->metadata_length;
out:
return ret;
}
static tsk_id_t
tsk_node_table_add_row_internal(tsk_node_table_t *self, tsk_flags_t flags, double time,
tsk_id_t population, tsk_id_t individual, const char *metadata,
tsk_size_t metadata_length)
{
tsk_bug_assert(self->num_rows < self->max_rows);
tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);
tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);
self->flags[self->num_rows] = flags;
self->time[self->num_rows] = time;
self->population[self->num_rows] = population;
self->individual[self->num_rows] = individual;
self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;
self->metadata_length += metadata_length;
self->num_rows++;
return (tsk_id_t) self->num_rows - 1;
}
tsk_id_t
tsk_node_table_add_row(tsk_node_table_t *self, tsk_flags_t flags, double time,
tsk_id_t population, tsk_id_t individual, const char *metadata,
tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
ret = tsk_node_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_add_row_internal(
self, flags, time, population, individual, metadata, metadata_length);
out:
return ret;
}
static int
tsk_node_table_update_row_rewrite(tsk_node_table_t *self, tsk_id_t index,
tsk_flags_t flags, double time, tsk_id_t population, tsk_id_t individual,
const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_node_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_node_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_node_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_node_table_add_row(
self, flags, time, population, individual, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_node_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_node_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_node_table_update_row(tsk_node_table_t *self, tsk_id_t index, tsk_flags_t flags,
double time, tsk_id_t population, tsk_id_t individual, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_node_t current_row;
ret = tsk_node_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.metadata_length == metadata_length) {
self->flags[index] = flags;
self->time[index] = time;
self->population[index] = population;
self->individual[index] = individual;
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
} else {
ret = tsk_node_table_update_row_rewrite(
self, index, flags, time, population, individual, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_node_table_clear(tsk_node_table_t *self)
{
return tsk_node_table_truncate(self, 0);
}
int
tsk_node_table_truncate(tsk_node_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
self->metadata_length = self->metadata_offset[num_rows];
out:
return ret;
}
int
tsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_node_t node;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_node_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_node_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &node);
if (ret != 0) {
goto out;
}
ret_id = tsk_node_table_add_row(self, node.flags, node.time, node.population,
node.individual, node.metadata, node.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_node_table_print_state(const tsk_node_table_t *self, FILE *out)
{
tsk_size_t j, k;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "tsk_node_tbl: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "metadata_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
/* We duplicate the dump_text code here for simplicity because we want to output
* the flags column directly. */
write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
fprintf(out, "id\tflags\ttime\tpopulation\tindividual\tmetadata_offset\tmetadata\n");
for (j = 0; j < self->num_rows; j++) {
fprintf(out, "%lld\t%lld\t%f\t%lld\t%lld\t%lld\t", (long long) j,
(long long) self->flags[j], self->time[j], (long long) self->population[j],
(long long) self->individual[j], (long long) self->metadata_offset[j]);
for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) {
fprintf(out, "%c", self->metadata[k]);
}
fprintf(out, "\n");
}
tsk_bug_assert(self->metadata_offset[0] == 0);
tsk_bug_assert(self->metadata_offset[self->num_rows] == self->metadata_length);
}
int
tsk_node_table_set_metadata_schema(tsk_node_table_t *self, const char *metadata_schema,
tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_node_table_dump_text(const tsk_node_table_t *self, FILE *out)
{
int ret = TSK_ERR_IO;
tsk_size_t j;
tsk_size_t metadata_len;
int err;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "id\tis_sample\ttime\tpopulation\tindividual\tmetadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];
err = fprintf(out, "%lld\t%lld\t%f\t%lld\t%lld\t%.*s\n", (long long) j,
(long long) (self->flags[j] & TSK_NODE_IS_SAMPLE), self->time[j],
(long long) self->population[j], (long long) self->individual[j],
(int) metadata_len, self->metadata + self->metadata_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
bool
tsk_node_table_equals(
const tsk_node_table_t *self, const tsk_node_table_t *other, tsk_flags_t options)
{
bool ret
= self->num_rows == other->num_rows
&& tsk_memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0
&& tsk_memcmp(self->flags, other->flags, self->num_rows * sizeof(tsk_flags_t))
== 0
&& tsk_memcmp(
self->population, other->population, self->num_rows * sizeof(tsk_id_t))
== 0
&& tsk_memcmp(
self->individual, other->individual, self->num_rows * sizeof(tsk_id_t))
== 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
static inline void
tsk_node_table_get_row_unsafe(
const tsk_node_table_t *self, tsk_id_t index, tsk_node_t *row)
{
row->id = (tsk_id_t) index;
row->flags = self->flags[index];
row->time = self->time[index];
row->population = self->population[index];
row->individual = self->individual[index];
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
}
int
tsk_node_table_get_row(const tsk_node_table_t *self, tsk_id_t index, tsk_node_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
tsk_node_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_node_table_keep_rows(tsk_node_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret = 0;
tsk_size_t remaining_rows;
if (id_map != NULL) {
keep_mask_to_id_map(self->num_rows, keep, id_map);
}
remaining_rows = subset_flags_column(self->flags, self->num_rows, keep);
subset_double_column(self->time, self->num_rows, keep);
subset_id_column(self->population, self->num_rows, keep);
subset_id_column(self->individual, self->num_rows, keep);
if (self->metadata_length > 0) {
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, self->num_rows, keep);
}
self->num_rows = remaining_rows;
return ret;
}
static int
tsk_node_table_dump(const tsk_node_table_t *self, kastore_t *store, tsk_flags_t options)
{
const write_table_col_t cols[] = {
{ "nodes/time", (void *) self->time, self->num_rows, KAS_FLOAT64 },
{ "nodes/flags", (void *) self->flags, self->num_rows, TSK_FLAGS_STORAGE_TYPE },
{ "nodes/population", (void *) self->population, self->num_rows,
TSK_ID_STORAGE_TYPE },
{ "nodes/individual", (void *) self->individual, self->num_rows,
TSK_ID_STORAGE_TYPE },
{ "nodes/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "nodes/metadata", (void *) self->metadata, self->metadata_length, KAS_UINT8,
self->metadata_offset, self->num_rows },
{ .name = NULL },
};
return write_table(store, cols, ragged_cols, options);
}
static int
tsk_node_table_load(tsk_node_table_t *self, kastore_t *store)
{
int ret = 0;
char *metadata_schema = NULL;
double *time = NULL;
tsk_flags_t *flags = NULL;
tsk_id_t *population = NULL;
tsk_id_t *individual = NULL;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
tsk_size_t num_rows, metadata_length, metadata_schema_length;
read_table_col_t cols[] = {
{ "nodes/time", (void **) &time, KAS_FLOAT64, 0 },
{ "nodes/flags", (void **) &flags, TSK_FLAGS_STORAGE_TYPE, 0 },
{ "nodes/population", (void **) &population, TSK_ID_STORAGE_TYPE, 0 },
{ "nodes/individual", (void **) &individual, TSK_ID_STORAGE_TYPE, 0 },
{ .name = NULL },
};
read_table_ragged_col_t ragged_cols[] = {
{ "nodes/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, 0 },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "nodes/metadata_schema", (void **) &metadata_schema, &metadata_schema_length,
KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_node_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_node_table_takeset_columns(
self, num_rows, flags, time, population, individual, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
flags = NULL;
time = NULL;
population = NULL;
individual = NULL;
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(cols, ragged_cols, properties);
return ret;
}
/*************************
* edge table
*************************/
static void
tsk_edge_table_free_columns(tsk_edge_table_t *self)
{
tsk_safe_free(self->left);
tsk_safe_free(self->right);
tsk_safe_free(self->parent);
tsk_safe_free(self->child);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_edge_table_free(tsk_edge_table_t *self)
{
tsk_edge_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_edge_table_has_metadata(const tsk_edge_table_t *self)
{
return !(self->options & TSK_TABLE_NO_METADATA);
}
static int
tsk_edge_table_expand_main_columns(tsk_edge_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column((void **) &self->left, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->right, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->parent, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->child, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
if (tsk_edge_table_has_metadata(self)) {
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_edge_table_expand_metadata(tsk_edge_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_edge_table_set_max_rows_increment(
tsk_edge_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_edge_table_set_max_metadata_length_increment(
tsk_edge_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = max_metadata_length_increment;
return 0;
}
int
tsk_edge_table_init(tsk_edge_table_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_memset(self, 0, sizeof(*self));
self->options = options;
/* Allocate space for one row initially, ensuring we always have valid
* pointers even if the table is empty */
self->max_rows_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_edge_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
if (tsk_edge_table_has_metadata(self)) {
ret = tsk_edge_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->metadata_offset[0] = 0;
}
self->max_rows_increment = 0;
self->max_metadata_length_increment = 0;
tsk_edge_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
tsk_id_t
tsk_edge_table_add_row(tsk_edge_table_t *self, double left, double right,
tsk_id_t parent, tsk_id_t child, const char *metadata, tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
if (metadata_length > 0 && !tsk_edge_table_has_metadata(self)) {
ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);
goto out;
}
ret = tsk_edge_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
tsk_bug_assert(self->num_rows < self->max_rows);
self->left[self->num_rows] = left;
self->right[self->num_rows] = right;
self->parent[self->num_rows] = parent;
self->child[self->num_rows] = child;
if (tsk_edge_table_has_metadata(self)) {
ret = tsk_edge_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_bug_assert(
self->metadata_length + metadata_length <= self->max_metadata_length);
tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);
self->metadata_offset[self->num_rows + 1]
= self->metadata_length + metadata_length;
self->metadata_length += metadata_length;
}
ret = (tsk_id_t) self->num_rows;
self->num_rows++;
out:
return ret;
}
static int
tsk_edge_table_update_row_rewrite(tsk_edge_table_t *self, tsk_id_t index, double left,
double right, tsk_id_t parent, tsk_id_t child, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_edge_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_edge_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_edge_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_edge_table_add_row(
self, left, right, parent, child, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_edge_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_edge_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_edge_table_update_row(tsk_edge_table_t *self, tsk_id_t index, double left,
double right, tsk_id_t parent, tsk_id_t child, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_edge_t current_row;
ret = tsk_edge_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.metadata_length == metadata_length) {
self->left[index] = left;
self->right[index] = right;
self->parent[index] = parent;
self->child[index] = child;
if (tsk_edge_table_has_metadata(self)) {
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
}
} else {
ret = tsk_edge_table_update_row_rewrite(
self, index, left, right, parent, child, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_edge_table_copy(
const tsk_edge_table_t *self, tsk_edge_table_t *dest, tsk_flags_t options)
{
int ret = 0;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
if (!(options & TSK_NO_INIT)) {
ret = tsk_edge_table_init(dest, options);
if (ret != 0) {
goto out;
}
}
/* We can't use TSK_TABLE_NO_METADATA in dest if metadata_length is non-zero.
* This also captures the case where TSK_TABLE_NO_METADATA is set on this table.
*/
if (self->metadata_length > 0 && !tsk_edge_table_has_metadata(dest)) {
ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);
goto out;
}
if (tsk_edge_table_has_metadata(dest)) {
metadata = self->metadata;
metadata_offset = self->metadata_offset;
}
ret = tsk_edge_table_set_columns(dest, self->num_rows, self->left, self->right,
self->parent, self->child, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int
tsk_edge_table_set_columns(tsk_edge_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *parent,
const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset)
{
int ret = 0;
ret = tsk_edge_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_append_columns(
self, num_rows, left, right, parent, child, metadata, metadata_offset);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_edge_table_takeset_columns(tsk_edge_table_t *self, tsk_size_t num_rows, double *left,
double *right, tsk_id_t *parent, tsk_id_t *child, char *metadata,
tsk_size_t *metadata_offset)
{
int ret = 0;
/* We need to check all the inputs before we start freeing or taking memory */
if (left == NULL || right == NULL || parent == NULL || child == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (metadata != NULL && !tsk_edge_table_has_metadata(self)) {
ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);
goto out;
}
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_edge_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
self->left = left;
self->right = right;
self->parent = parent;
self->child = child;
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int
tsk_edge_table_append_columns(tsk_edge_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *parent,
const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
tsk_size_t j, metadata_length;
if (left == NULL || right == NULL || parent == NULL || child == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((metadata == NULL) != (metadata_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (metadata != NULL && !tsk_edge_table_has_metadata(self)) {
ret = tsk_trace_error(TSK_ERR_METADATA_DISABLED);
goto out;
}
ret = tsk_edge_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->left + self->num_rows, left, num_rows * sizeof(double));
tsk_memcpy(self->right + self->num_rows, right, num_rows * sizeof(double));
tsk_memcpy(self->parent + self->num_rows, parent, num_rows * sizeof(tsk_id_t));
tsk_memcpy(self->child + self->num_rows, child, num_rows * sizeof(tsk_id_t));
if (tsk_edge_table_has_metadata(self)) {
if (metadata == NULL) {
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;
}
} else {
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= (tsk_size_t) self->metadata_length + metadata_offset[j];
}
metadata_length = metadata_offset[num_rows];
ret = tsk_edge_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
self->metadata_length += metadata_length;
}
self->num_rows += num_rows;
self->metadata_offset[self->num_rows] = self->metadata_length;
} else {
self->num_rows += num_rows;
}
out:
return ret;
}
int
tsk_edge_table_clear(tsk_edge_table_t *self)
{
return tsk_edge_table_truncate(self, 0);
}
int
tsk_edge_table_truncate(tsk_edge_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
if (tsk_edge_table_has_metadata(self)) {
self->metadata_length = self->metadata_offset[num_rows];
}
out:
return ret;
}
int
tsk_edge_table_extend(tsk_edge_table_t *self, const tsk_edge_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_edge_t edge;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_edge_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_edge_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &edge);
if (ret != 0) {
goto out;
}
ret_id = tsk_edge_table_add_row(self, edge.left, edge.right, edge.parent,
edge.child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
static inline void
tsk_edge_table_get_row_unsafe(
const tsk_edge_table_t *self, tsk_id_t index, tsk_edge_t *row)
{
row->id = (tsk_id_t) index;
row->left = self->left[index];
row->right = self->right[index];
row->parent = self->parent[index];
row->child = self->child[index];
if (tsk_edge_table_has_metadata(self)) {
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
} else {
row->metadata_length = 0;
row->metadata = NULL;
}
}
int
tsk_edge_table_get_row(const tsk_edge_table_t *self, tsk_id_t index, tsk_edge_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);
goto out;
}
tsk_edge_table_get_row_unsafe(self, index, row);
out:
return ret;
}
void
tsk_edge_table_print_state(const tsk_edge_table_t *self, FILE *out)
{
int ret;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "edge_table: %p:\n", (const void *) self);
fprintf(out, "options = 0x%X\n", self->options);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "metadata_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
ret = tsk_edge_table_dump_text(self, out);
tsk_bug_assert(ret == 0);
}
int
tsk_edge_table_set_metadata_schema(tsk_edge_table_t *self, const char *metadata_schema,
tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_edge_table_dump_text(const tsk_edge_table_t *self, FILE *out)
{
tsk_id_t j;
int ret = TSK_ERR_IO;
tsk_edge_t row;
int err;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "id\tleft\tright\tparent\tchild\tmetadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) self->num_rows; j++) {
tsk_edge_table_get_row_unsafe(self, j, &row);
err = fprintf(out, "%lld\t%.3f\t%.3f\t%lld\t%lld\t%.*s\n", (long long) j,
row.left, row.right, (long long) row.parent, (long long) row.child,
(int) row.metadata_length, row.metadata);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
bool
tsk_edge_table_equals(
const tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_flags_t options)
{
bool metadata_equal;
bool ret
= self->num_rows == other->num_rows
&& tsk_memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0
&& tsk_memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0
&& tsk_memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t))
== 0
&& tsk_memcmp(self->child, other->child, self->num_rows * sizeof(tsk_id_t))
== 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
metadata_equal = false;
if (self->metadata_length == other->metadata_length) {
if (tsk_edge_table_has_metadata(self)
&& tsk_edge_table_has_metadata(other)) {
metadata_equal
= tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0;
} else {
/* The only way that the metadata lengths can be equal (which
* we've already tested) and either one or the other of the tables
* hasn't got metadata is if they are both zero length. */
tsk_bug_assert(self->metadata_length == 0);
metadata_equal = true;
}
}
ret = ret && metadata_equal;
}
return ret;
}
int
tsk_edge_table_keep_rows(tsk_edge_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret = 0;
tsk_size_t remaining_rows;
if (id_map != NULL) {
keep_mask_to_id_map(self->num_rows, keep, id_map);
}
remaining_rows = subset_double_column(self->left, self->num_rows, keep);
subset_double_column(self->right, self->num_rows, keep);
subset_id_column(self->parent, self->num_rows, keep);
subset_id_column(self->child, self->num_rows, keep);
if (self->metadata_length > 0) {
tsk_bug_assert(!(self->options & TSK_TABLE_NO_METADATA));
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, self->num_rows, keep);
}
self->num_rows = remaining_rows;
return ret;
}
static int
tsk_edge_table_dump(const tsk_edge_table_t *self, kastore_t *store, tsk_flags_t options)
{
int ret = 0;
const write_table_col_t write_cols[] = {
{ "edges/left", (void *) self->left, self->num_rows, KAS_FLOAT64 },
{ "edges/right", (void *) self->right, self->num_rows, KAS_FLOAT64 },
{ "edges/parent", (void *) self->parent, self->num_rows, TSK_ID_STORAGE_TYPE },
{ "edges/child", (void *) self->child, self->num_rows, TSK_ID_STORAGE_TYPE },
{ "edges/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "edges/metadata", (void *) self->metadata, self->metadata_length, KAS_UINT8,
self->metadata_offset, self->num_rows },
{ .name = NULL },
};
/* TODO when the general code has been updated to only write out the
* column when the lenght of ragged columns is > 0 we can get rid of
* this special case here and use write_table. */
ret = write_table_cols(store, write_cols, options);
if (ret != 0) {
goto out;
}
if (tsk_edge_table_has_metadata(self)) {
ret = write_table_ragged_cols(store, ragged_cols, options);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
tsk_edge_table_load(tsk_edge_table_t *self, kastore_t *store)
{
int ret = 0;
char *metadata_schema = NULL;
double *left = NULL;
double *right = NULL;
tsk_id_t *parent = NULL;
tsk_id_t *child = NULL;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
tsk_size_t num_rows, metadata_length, metadata_schema_length;
read_table_col_t cols[] = {
{ "edges/left", (void **) &left, KAS_FLOAT64, 0 },
{ "edges/right", (void **) &right, KAS_FLOAT64, 0 },
{ "edges/parent", (void **) &parent, TSK_ID_STORAGE_TYPE, 0 },
{ "edges/child", (void **) &child, TSK_ID_STORAGE_TYPE, 0 },
{ .name = NULL },
};
read_table_ragged_col_t ragged_cols[] = {
{ "edges/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, TSK_COL_OPTIONAL },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "edges/metadata_schema", (void **) &metadata_schema, &metadata_schema_length,
KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_edge_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_edge_table_takeset_columns(
self, num_rows, left, right, parent, child, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
left = NULL;
right = NULL;
parent = NULL;
child = NULL;
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(cols, ragged_cols, properties);
return ret;
}
int
tsk_edge_table_squash(tsk_edge_table_t *self)
{
int k;
int ret = 0;
tsk_edge_t *edges = NULL;
tsk_size_t num_output_edges;
if (self->metadata_length > 0) {
ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
goto out;
}
edges = tsk_malloc(self->num_rows * sizeof(tsk_edge_t));
if (edges == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (k = 0; k < (int) self->num_rows; k++) {
edges[k].left = self->left[k];
edges[k].right = self->right[k];
edges[k].parent = self->parent[k];
edges[k].child = self->child[k];
edges[k].metadata_length = 0;
}
ret = tsk_squash_edges(edges, self->num_rows, &num_output_edges);
if (ret != 0) {
goto out;
}
tsk_edge_table_clear(self);
tsk_bug_assert(num_output_edges <= self->max_rows);
self->num_rows = num_output_edges;
for (k = 0; k < (int) num_output_edges; k++) {
self->left[k] = edges[k].left;
self->right[k] = edges[k].right;
self->parent[k] = edges[k].parent;
self->child[k] = edges[k].child;
}
out:
tsk_safe_free(edges);
return ret;
}
/*************************
* site table
*************************/
static void
tsk_site_table_free_columns(tsk_site_table_t *self)
{
tsk_safe_free(self->position);
tsk_safe_free(self->ancestral_state);
tsk_safe_free(self->ancestral_state_offset);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_site_table_free(tsk_site_table_t *self)
{
tsk_site_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_site_table_expand_main_columns(tsk_site_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column((void **) &self->position, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->ancestral_state_offset, new_max_rows + 1,
sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_site_table_expand_ancestral_state(
tsk_site_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->ancestral_state_length, additional_length,
self->max_ancestral_state_length_increment, &self->max_ancestral_state_length,
(void **) &self->ancestral_state, sizeof(*self->ancestral_state));
}
static int
tsk_site_table_expand_metadata(tsk_site_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_site_table_set_max_rows_increment(
tsk_site_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_site_table_set_max_metadata_length_increment(
tsk_site_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = max_metadata_length_increment;
return 0;
}
int
tsk_site_table_set_max_ancestral_state_length_increment(
tsk_site_table_t *self, tsk_size_t max_ancestral_state_length_increment)
{
self->max_ancestral_state_length_increment = max_ancestral_state_length_increment;
return 0;
}
int
tsk_site_table_init(tsk_site_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_site_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_ancestral_state_length_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_site_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_expand_ancestral_state(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->ancestral_state_offset[0] = 0;
self->metadata_offset[0] = 0;
self->max_rows_increment = 0;
self->max_ancestral_state_length_increment = 0;
self->max_metadata_length_increment = 0;
tsk_site_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
tsk_id_t
tsk_site_table_add_row(tsk_site_table_t *self, double position,
const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,
tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
tsk_size_t ancestral_state_offset, metadata_offset;
ret = tsk_site_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
self->position[self->num_rows] = position;
ancestral_state_offset = (tsk_size_t) self->ancestral_state_length;
tsk_bug_assert(
self->ancestral_state_offset[self->num_rows] == ancestral_state_offset);
ret = tsk_site_table_expand_ancestral_state(self, ancestral_state_length);
if (ret != 0) {
goto out;
}
self->ancestral_state_length += ancestral_state_length;
tsk_memmove(self->ancestral_state + ancestral_state_offset, ancestral_state,
ancestral_state_length);
self->ancestral_state_offset[self->num_rows + 1] = self->ancestral_state_length;
metadata_offset = (tsk_size_t) self->metadata_length;
tsk_bug_assert(self->metadata_offset[self->num_rows] == metadata_offset);
ret = tsk_site_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
self->metadata_length += metadata_length;
tsk_memmove(self->metadata + metadata_offset, metadata, metadata_length);
self->metadata_offset[self->num_rows + 1] = self->metadata_length;
ret = (tsk_id_t) self->num_rows;
self->num_rows++;
out:
return ret;
}
static int
tsk_site_table_update_row_rewrite(tsk_site_table_t *self, tsk_id_t index,
double position, const char *ancestral_state, tsk_size_t ancestral_state_length,
const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_site_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_site_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_site_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_site_table_add_row(self, position, ancestral_state,
ancestral_state_length, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_site_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_site_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_site_table_update_row(tsk_site_table_t *self, tsk_id_t index, double position,
const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_site_t current_row;
ret = tsk_site_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.metadata_length == metadata_length
&& current_row.ancestral_state_length == ancestral_state_length) {
self->position[index] = position;
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->ancestral_state[self->ancestral_state_offset[index]],
ancestral_state, ancestral_state_length * sizeof(*ancestral_state));
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
} else {
ret = tsk_site_table_update_row_rewrite(self, index, position, ancestral_state,
ancestral_state_length, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_site_table_append_columns(tsk_site_table_t *self, tsk_size_t num_rows,
const double *position, const char *ancestral_state,
const tsk_size_t *ancestral_state_offset, const char *metadata,
const tsk_size_t *metadata_offset)
{
int ret = 0;
tsk_size_t j, ancestral_state_length, metadata_length;
if (position == NULL || ancestral_state == NULL || ancestral_state_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((metadata == NULL) != (metadata_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_site_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->position + self->num_rows, position, num_rows * sizeof(double));
/* Metadata column */
if (metadata == NULL) {
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;
}
} else {
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
metadata_length = metadata_offset[num_rows];
ret = tsk_site_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= self->metadata_length + metadata_offset[j];
}
self->metadata_length += metadata_length;
}
self->metadata_offset[self->num_rows + num_rows] = self->metadata_length;
/* Ancestral state column */
ret = check_offsets(num_rows, ancestral_state_offset, 0, false);
if (ret != 0) {
goto out;
}
ancestral_state_length = ancestral_state_offset[num_rows];
ret = tsk_site_table_expand_ancestral_state(self, ancestral_state_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->ancestral_state + self->ancestral_state_length, ancestral_state,
ancestral_state_length * sizeof(char));
for (j = 0; j < num_rows; j++) {
self->ancestral_state_offset[self->num_rows + j]
= self->ancestral_state_length + ancestral_state_offset[j];
}
self->ancestral_state_length += ancestral_state_length;
self->ancestral_state_offset[self->num_rows + num_rows]
= self->ancestral_state_length;
self->num_rows += num_rows;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_site_table_copy(
const tsk_site_table_t *self, tsk_site_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_site_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_site_table_set_columns(dest, self->num_rows, self->position,
self->ancestral_state, self->ancestral_state_offset, self->metadata,
self->metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int
tsk_site_table_set_columns(tsk_site_table_t *self, tsk_size_t num_rows,
const double *position, const char *ancestral_state,
const tsk_size_t *ancestral_state_offset, const char *metadata,
const tsk_size_t *metadata_offset)
{
int ret = 0;
ret = tsk_site_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_append_columns(self, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
out:
return ret;
}
int
tsk_site_table_takeset_columns(tsk_site_table_t *self, tsk_size_t num_rows,
double *position, char *ancestral_state, tsk_size_t *ancestral_state_offset,
char *metadata, tsk_size_t *metadata_offset)
{
int ret = 0;
/* We need to check all the inputs before we start freeing or taking memory */
if (position == NULL || ancestral_state == NULL || ancestral_state_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = check_ragged_column(num_rows, ancestral_state, ancestral_state_offset);
if (ret != 0) {
goto out;
}
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_site_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
self->position = position;
ret = takeset_ragged_column(num_rows, ancestral_state, ancestral_state_offset,
(void *) &self->ancestral_state, &self->ancestral_state_offset,
&self->ancestral_state_length);
if (ret != 0) {
goto out;
}
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
bool
tsk_site_table_equals(
const tsk_site_table_t *self, const tsk_site_table_t *other, tsk_flags_t options)
{
bool ret
= self->num_rows == other->num_rows
&& self->ancestral_state_length == other->ancestral_state_length
&& tsk_memcmp(self->position, other->position, self->num_rows * sizeof(double))
== 0
&& tsk_memcmp(self->ancestral_state_offset, other->ancestral_state_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->ancestral_state, other->ancestral_state,
self->ancestral_state_length * sizeof(char))
== 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_site_table_clear(tsk_site_table_t *self)
{
return tsk_site_table_truncate(self, 0);
}
int
tsk_site_table_truncate(tsk_site_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
self->ancestral_state_length = self->ancestral_state_offset[num_rows];
self->metadata_length = self->metadata_offset[num_rows];
out:
return ret;
}
int
tsk_site_table_extend(tsk_site_table_t *self, const tsk_site_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_site_t site;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_site_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_site_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &site);
if (ret != 0) {
goto out;
}
ret_id = tsk_site_table_add_row(self, site.position, site.ancestral_state,
site.ancestral_state_length, site.metadata, site.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_site_table_print_state(const tsk_site_table_t *self, FILE *out)
{
int ret;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "site_table: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\t(max= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "ancestral_state_length = %lld\t(max= %lld\tincrement = %lld)\n",
(long long) self->ancestral_state_length,
(long long) self->max_ancestral_state_length,
(long long) self->max_ancestral_state_length_increment);
fprintf(out, "metadata_length = %lld(\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
ret = tsk_site_table_dump_text(self, out);
tsk_bug_assert(ret == 0);
tsk_bug_assert(self->ancestral_state_offset[0] == 0);
tsk_bug_assert(
self->ancestral_state_length == self->ancestral_state_offset[self->num_rows]);
tsk_bug_assert(self->metadata_offset[0] == 0);
tsk_bug_assert(self->metadata_length == self->metadata_offset[self->num_rows]);
}
static inline void
tsk_site_table_get_row_unsafe(
const tsk_site_table_t *self, tsk_id_t index, tsk_site_t *row)
{
row->id = (tsk_id_t) index;
row->position = self->position[index];
row->ancestral_state_length
= self->ancestral_state_offset[index + 1] - self->ancestral_state_offset[index];
row->ancestral_state = self->ancestral_state + self->ancestral_state_offset[index];
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
/* This struct has a placeholder for mutations. Probably should be separate
* structs for this (tsk_site_table_row_t?) */
row->mutations_length = 0;
row->mutations = NULL;
}
int
tsk_site_table_get_row(const tsk_site_table_t *self, tsk_id_t index, tsk_site_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);
goto out;
}
tsk_site_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_site_table_set_metadata_schema(tsk_site_table_t *self, const char *metadata_schema,
tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_site_table_dump_text(const tsk_site_table_t *self, FILE *out)
{
tsk_size_t j;
int ret = TSK_ERR_IO;
int err;
tsk_size_t ancestral_state_len, metadata_len;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "id\tposition\tancestral_state\tmetadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
ancestral_state_len
= self->ancestral_state_offset[j + 1] - self->ancestral_state_offset[j];
metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];
err = fprintf(out, "%lld\t%f\t%.*s\t%.*s\n", (long long) j, self->position[j],
(int) ancestral_state_len,
self->ancestral_state + self->ancestral_state_offset[j], (int) metadata_len,
self->metadata + self->metadata_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
int
tsk_site_table_keep_rows(tsk_site_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret = 0;
tsk_size_t remaining_rows;
if (id_map != NULL) {
keep_mask_to_id_map(self->num_rows, keep, id_map);
}
remaining_rows = subset_double_column(self->position, self->num_rows, keep);
self->ancestral_state_length = subset_ragged_char_column(
self->ancestral_state, self->ancestral_state_offset, self->num_rows, keep);
if (self->metadata_length > 0) {
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, self->num_rows, keep);
}
self->num_rows = remaining_rows;
return ret;
}
static int
tsk_site_table_dump(const tsk_site_table_t *self, kastore_t *store, tsk_flags_t options)
{
const write_table_col_t cols[] = {
{ "sites/position", (void *) self->position, self->num_rows, KAS_FLOAT64 },
{ "sites/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "sites/ancestral_state", (void *) self->ancestral_state,
self->ancestral_state_length, KAS_UINT8, self->ancestral_state_offset,
self->num_rows },
{ "sites/metadata", (void *) self->metadata, self->metadata_length, KAS_UINT8,
self->metadata_offset, self->num_rows },
{ .name = NULL },
};
return write_table(store, cols, ragged_cols, options);
}
static int
tsk_site_table_load(tsk_site_table_t *self, kastore_t *store)
{
int ret = 0;
char *metadata_schema = NULL;
double *position = NULL;
char *ancestral_state = NULL;
tsk_size_t *ancestral_state_offset = NULL;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
tsk_size_t num_rows, ancestral_state_length, metadata_length, metadata_schema_length;
read_table_col_t cols[] = {
{ "sites/position", (void **) &position, KAS_FLOAT64, 0 },
{ .name = NULL },
};
read_table_ragged_col_t ragged_cols[] = {
{ "sites/ancestral_state", (void **) &ancestral_state, &ancestral_state_length,
KAS_UINT8, &ancestral_state_offset, 0 },
{ "sites/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, 0 },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "sites/metadata_schema", (void **) &metadata_schema, &metadata_schema_length,
KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_site_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_site_table_takeset_columns(self, num_rows, position, ancestral_state,
ancestral_state_offset, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
position = NULL;
ancestral_state = NULL;
ancestral_state_offset = NULL;
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(cols, ragged_cols, properties);
return ret;
}
/*************************
* mutation table
*************************/
static void
tsk_mutation_table_free_columns(tsk_mutation_table_t *self)
{
tsk_safe_free(self->node);
tsk_safe_free(self->site);
tsk_safe_free(self->parent);
tsk_safe_free(self->time);
tsk_safe_free(self->derived_state);
tsk_safe_free(self->derived_state_offset);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_mutation_table_free(tsk_mutation_table_t *self)
{
tsk_mutation_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_mutation_table_expand_main_columns(
tsk_mutation_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column((void **) &self->site, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->node, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->parent, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->time, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->derived_state_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_mutation_table_expand_derived_state(
tsk_mutation_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->derived_state_length, additional_length,
self->max_derived_state_length_increment, &self->max_derived_state_length,
(void **) &self->derived_state, sizeof(*self->derived_state));
}
static int
tsk_mutation_table_expand_metadata(
tsk_mutation_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_mutation_table_set_max_rows_increment(
tsk_mutation_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_mutation_table_set_max_metadata_length_increment(
tsk_mutation_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = max_metadata_length_increment;
return 0;
}
int
tsk_mutation_table_set_max_derived_state_length_increment(
tsk_mutation_table_t *self, tsk_size_t max_derived_state_length_increment)
{
self->max_derived_state_length_increment = max_derived_state_length_increment;
return 0;
}
int
tsk_mutation_table_init(tsk_mutation_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_mutation_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_derived_state_length_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_mutation_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_expand_derived_state(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->derived_state_offset[0] = 0;
self->metadata_offset[0] = 0;
self->max_rows_increment = 0;
self->max_derived_state_length_increment = 0;
self->max_metadata_length_increment = 0;
tsk_mutation_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
tsk_id_t
tsk_mutation_table_add_row(tsk_mutation_table_t *self, tsk_id_t site, tsk_id_t node,
tsk_id_t parent, double time, const char *derived_state,
tsk_size_t derived_state_length, const char *metadata, tsk_size_t metadata_length)
{
tsk_id_t ret;
tsk_size_t derived_state_offset, metadata_offset;
ret = tsk_mutation_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
self->site[self->num_rows] = site;
self->node[self->num_rows] = node;
self->parent[self->num_rows] = parent;
self->time[self->num_rows] = time;
derived_state_offset = self->derived_state_length;
tsk_bug_assert(self->derived_state_offset[self->num_rows] == derived_state_offset);
ret = tsk_mutation_table_expand_derived_state(self, derived_state_length);
if (ret != 0) {
goto out;
}
self->derived_state_length += derived_state_length;
tsk_memmove(
self->derived_state + derived_state_offset, derived_state, derived_state_length);
self->derived_state_offset[self->num_rows + 1] = self->derived_state_length;
metadata_offset = self->metadata_length;
tsk_bug_assert(self->metadata_offset[self->num_rows] == metadata_offset);
ret = tsk_mutation_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
self->metadata_length += metadata_length;
tsk_memmove(self->metadata + metadata_offset, metadata, metadata_length);
self->metadata_offset[self->num_rows + 1] = self->metadata_length;
ret = (tsk_id_t) self->num_rows;
self->num_rows++;
out:
return ret;
}
static int
tsk_mutation_table_update_row_rewrite(tsk_mutation_table_t *self, tsk_id_t index,
tsk_id_t site, tsk_id_t node, tsk_id_t parent, double time,
const char *derived_state, tsk_size_t derived_state_length, const char *metadata,
tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_mutation_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_mutation_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_mutation_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_mutation_table_add_row(self, site, node, parent, time, derived_state,
derived_state_length, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_mutation_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_mutation_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_mutation_table_update_row(tsk_mutation_table_t *self, tsk_id_t index, tsk_id_t site,
tsk_id_t node, tsk_id_t parent, double time, const char *derived_state,
tsk_size_t derived_state_length, const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_mutation_t current_row;
ret = tsk_mutation_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.metadata_length == metadata_length
&& current_row.derived_state_length == derived_state_length) {
self->site[index] = site;
self->node[index] = node;
self->parent[index] = parent;
self->time[index] = time;
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->derived_state[self->derived_state_offset[index]],
derived_state, derived_state_length * sizeof(*derived_state));
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
} else {
ret = tsk_mutation_table_update_row_rewrite(self, index, site, node, parent,
time, derived_state, derived_state_length, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_mutation_table_append_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,
const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,
const double *time, const char *derived_state,
const tsk_size_t *derived_state_offset, const char *metadata,
const tsk_size_t *metadata_offset)
{
int ret = 0;
tsk_size_t j, derived_state_length, metadata_length;
if (site == NULL || node == NULL || derived_state == NULL
|| derived_state_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((metadata == NULL) != (metadata_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_mutation_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->site + self->num_rows, site, num_rows * sizeof(tsk_id_t));
tsk_memcpy(self->node + self->num_rows, node, num_rows * sizeof(tsk_id_t));
if (parent == NULL) {
/* If parent is NULL, set all parents to the null mutation */
tsk_memset(self->parent + self->num_rows, 0xff, num_rows * sizeof(tsk_id_t));
} else {
tsk_memcpy(self->parent + self->num_rows, parent, num_rows * sizeof(tsk_id_t));
}
if (time == NULL) {
/* If time is NULL, set all times to TSK_UNKNOWN_TIME which is the
* default */
for (j = 0; j < num_rows; j++) {
self->time[self->num_rows + j] = TSK_UNKNOWN_TIME;
}
} else {
tsk_memcpy(self->time + self->num_rows, time, num_rows * sizeof(double));
}
/* Metadata column */
if (metadata == NULL) {
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;
}
} else {
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
metadata_length = metadata_offset[num_rows];
ret = tsk_mutation_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= self->metadata_length + metadata_offset[j];
}
self->metadata_length += metadata_length;
}
self->metadata_offset[self->num_rows + num_rows] = self->metadata_length;
/* Derived state column */
ret = check_offsets(num_rows, derived_state_offset, 0, false);
if (ret != 0) {
goto out;
}
derived_state_length = derived_state_offset[num_rows];
ret = tsk_mutation_table_expand_derived_state(self, derived_state_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->derived_state + self->derived_state_length, derived_state,
derived_state_length * sizeof(char));
for (j = 0; j < num_rows; j++) {
self->derived_state_offset[self->num_rows + j]
= self->derived_state_length + derived_state_offset[j];
}
self->derived_state_length += derived_state_length;
self->derived_state_offset[self->num_rows + num_rows] = self->derived_state_length;
self->num_rows += num_rows;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_mutation_table_takeset_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,
tsk_id_t *site, tsk_id_t *node, tsk_id_t *parent, double *time, char *derived_state,
tsk_size_t *derived_state_offset, char *metadata, tsk_size_t *metadata_offset)
{
tsk_size_t j;
int ret = 0;
if (site == NULL || node == NULL || derived_state == NULL
|| derived_state_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
/* We need to check all the inputs before we start freeing or taking memory */
ret = check_ragged_column(num_rows, derived_state, derived_state_offset);
if (ret != 0) {
goto out;
}
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_mutation_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
self->site = site;
self->node = node;
ret = takeset_optional_id_column(num_rows, parent, &self->parent);
if (ret != 0) {
goto out;
}
if (time == NULL) {
/* Time defaults to unknown time if not specified. */
self->time = tsk_malloc(num_rows * sizeof(*self->time));
if (self->time == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_rows; j++) {
self->time[j] = TSK_UNKNOWN_TIME;
}
} else {
self->time = time;
}
ret = takeset_ragged_column(num_rows, derived_state, derived_state_offset,
(void *) &self->derived_state, &self->derived_state_offset,
&self->derived_state_length);
if (ret != 0) {
goto out;
}
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_mutation_table_copy(
const tsk_mutation_table_t *self, tsk_mutation_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_mutation_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_mutation_table_set_columns(dest, self->num_rows, self->site, self->node,
self->parent, self->time, self->derived_state, self->derived_state_offset,
self->metadata, self->metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int
tsk_mutation_table_set_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,
const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,
const double *time, const char *derived_state,
const tsk_size_t *derived_state_offset, const char *metadata,
const tsk_size_t *metadata_offset)
{
int ret = 0;
ret = tsk_mutation_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_append_columns(self, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
out:
return ret;
}
bool
tsk_mutation_table_equals(const tsk_mutation_table_t *self,
const tsk_mutation_table_t *other, tsk_flags_t options)
{
bool ret
= self->num_rows == other->num_rows
&& self->derived_state_length == other->derived_state_length
&& tsk_memcmp(self->site, other->site, self->num_rows * sizeof(tsk_id_t)) == 0
&& tsk_memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0
&& tsk_memcmp(self->parent, other->parent, self->num_rows * sizeof(tsk_id_t))
== 0
&& tsk_memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0
&& tsk_memcmp(self->derived_state_offset, other->derived_state_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->derived_state, other->derived_state,
self->derived_state_length * sizeof(char))
== 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_mutation_table_clear(tsk_mutation_table_t *self)
{
return tsk_mutation_table_truncate(self, 0);
}
int
tsk_mutation_table_truncate(tsk_mutation_table_t *mutations, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > mutations->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
mutations->num_rows = num_rows;
mutations->derived_state_length = mutations->derived_state_offset[num_rows];
mutations->metadata_length = mutations->metadata_offset[num_rows];
out:
return ret;
}
int
tsk_mutation_table_extend(tsk_mutation_table_t *self, const tsk_mutation_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_mutation_t mutation;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_mutation_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_mutation_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &mutation);
if (ret != 0) {
goto out;
}
ret_id = tsk_mutation_table_add_row(self, mutation.site, mutation.node,
mutation.parent, mutation.time, mutation.derived_state,
mutation.derived_state_length, mutation.metadata, mutation.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_mutation_table_print_state(const tsk_mutation_table_t *self, FILE *out)
{
int ret;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "mutation_table: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "derived_state_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->derived_state_length,
(long long) self->max_derived_state_length,
(long long) self->max_derived_state_length_increment);
fprintf(out, "metadata_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
ret = tsk_mutation_table_dump_text(self, out);
tsk_bug_assert(ret == 0);
tsk_bug_assert(self->derived_state_offset[0] == 0);
tsk_bug_assert(
self->derived_state_length == self->derived_state_offset[self->num_rows]);
tsk_bug_assert(self->metadata_offset[0] == 0);
tsk_bug_assert(self->metadata_length == self->metadata_offset[self->num_rows]);
}
static inline void
tsk_mutation_table_get_row_unsafe(
const tsk_mutation_table_t *self, tsk_id_t index, tsk_mutation_t *row)
{
row->id = (tsk_id_t) index;
row->site = self->site[index];
row->node = self->node[index];
row->parent = self->parent[index];
row->time = self->time[index];
row->derived_state_length
= self->derived_state_offset[index + 1] - self->derived_state_offset[index];
row->derived_state = self->derived_state + self->derived_state_offset[index];
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
row->edge = TSK_NULL;
}
int
tsk_mutation_table_get_row(
const tsk_mutation_table_t *self, tsk_id_t index, tsk_mutation_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_MUTATION_OUT_OF_BOUNDS);
goto out;
}
tsk_mutation_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_mutation_table_set_metadata_schema(tsk_mutation_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_mutation_table_dump_text(const tsk_mutation_table_t *self, FILE *out)
{
int ret = TSK_ERR_IO;
int err;
tsk_size_t j, derived_state_len, metadata_len;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "id\tsite\tnode\tparent\ttime\tderived_state\tmetadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
derived_state_len
= self->derived_state_offset[j + 1] - self->derived_state_offset[j];
metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];
err = fprintf(out, "%lld\t%lld\t%lld\t%lld\t%f\t%.*s\t%.*s\n", (long long) j,
(long long) self->site[j], (long long) self->node[j],
(long long) self->parent[j], self->time[j], (int) derived_state_len,
self->derived_state + self->derived_state_offset[j], (int) metadata_len,
self->metadata + self->metadata_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
int
tsk_mutation_table_keep_rows(tsk_mutation_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *ret_id_map)
{
int ret = 0;
const tsk_size_t current_num_rows = self->num_rows;
tsk_size_t j, remaining_rows;
tsk_id_t pj;
tsk_id_t *id_map = ret_id_map;
tsk_id_t *restrict parent = self->parent;
if (ret_id_map == NULL) {
id_map = tsk_malloc(current_num_rows * sizeof(*id_map));
if (id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
keep_mask_to_id_map(current_num_rows, keep, id_map);
/* Note: we could add some options to avoid these checks if we wanted.
* MAP_DELETED_TO_NULL is an obvious one, and I guess it might be
* helpful to also provide NO_REMAP to prevent reference remapping
* entirely. */
for (j = 0; j < current_num_rows; j++) {
if (keep[j]) {
pj = parent[j];
if (pj != TSK_NULL) {
if (pj < 0 || pj >= (tsk_id_t) current_num_rows) {
ret = tsk_trace_error(TSK_ERR_MUTATION_OUT_OF_BOUNDS);
goto out;
}
if (id_map[pj] == TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_KEEP_ROWS_MAP_TO_DELETED);
goto out;
}
}
}
}
remaining_rows = subset_id_column(self->site, current_num_rows, keep);
subset_id_column(self->node, current_num_rows, keep);
subset_remap_id_column(parent, current_num_rows, keep, id_map);
subset_double_column(self->time, current_num_rows, keep);
self->derived_state_length = subset_ragged_char_column(
self->derived_state, self->derived_state_offset, current_num_rows, keep);
if (self->metadata_length > 0) {
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, current_num_rows, keep);
}
self->num_rows = remaining_rows;
out:
if (ret_id_map == NULL) {
tsk_safe_free(id_map);
}
return ret;
}
static int
tsk_mutation_table_dump(
const tsk_mutation_table_t *self, kastore_t *store, tsk_flags_t options)
{
const write_table_col_t cols[] = {
{ "mutations/site", (void *) self->site, self->num_rows, TSK_ID_STORAGE_TYPE },
{ "mutations/node", (void *) self->node, self->num_rows, TSK_ID_STORAGE_TYPE },
{ "mutations/parent", (void *) self->parent, self->num_rows,
TSK_ID_STORAGE_TYPE },
{ "mutations/time", (void *) self->time, self->num_rows, KAS_FLOAT64 },
{ "mutations/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "mutations/derived_state", (void *) self->derived_state,
self->derived_state_length, KAS_UINT8, self->derived_state_offset,
self->num_rows },
{ "mutations/metadata", (void *) self->metadata, self->metadata_length,
KAS_UINT8, self->metadata_offset, self->num_rows },
{ .name = NULL },
};
return write_table(store, cols, ragged_cols, options);
}
static int
tsk_mutation_table_load(tsk_mutation_table_t *self, kastore_t *store)
{
int ret = 0;
tsk_id_t *node = NULL;
tsk_id_t *site = NULL;
tsk_id_t *parent = NULL;
double *time = NULL;
char *derived_state = NULL;
tsk_size_t *derived_state_offset = NULL;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
char *metadata_schema = NULL;
tsk_size_t num_rows, derived_state_length, metadata_length, metadata_schema_length;
read_table_col_t cols[] = {
{ "mutations/site", (void **) &site, TSK_ID_STORAGE_TYPE, 0 },
{ "mutations/node", (void **) &node, TSK_ID_STORAGE_TYPE, 0 },
{ "mutations/parent", (void **) &parent, TSK_ID_STORAGE_TYPE, 0 },
{ "mutations/time", (void **) &time, KAS_FLOAT64, TSK_COL_OPTIONAL },
{ .name = NULL },
};
read_table_ragged_col_t ragged_cols[] = {
{ "mutations/derived_state", (void **) &derived_state, &derived_state_length,
KAS_UINT8, &derived_state_offset, 0 },
{ "mutations/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, 0 },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "mutations/metadata_schema", (void **) &metadata_schema,
&metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_mutation_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_mutation_table_takeset_columns(self, num_rows, site, node, parent, time,
derived_state, derived_state_offset, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
site = NULL;
node = NULL;
parent = NULL;
time = NULL;
derived_state = NULL;
derived_state_offset = NULL;
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(cols, ragged_cols, properties);
return ret;
}
/*************************
* migration table
*************************/
static void
tsk_migration_table_free_columns(tsk_migration_table_t *self)
{
tsk_safe_free(self->left);
tsk_safe_free(self->right);
tsk_safe_free(self->node);
tsk_safe_free(self->source);
tsk_safe_free(self->dest);
tsk_safe_free(self->time);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_migration_table_free(tsk_migration_table_t *self)
{
tsk_migration_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_migration_table_expand_main_columns(
tsk_migration_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column((void **) &self->left, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->right, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->node, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->source, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->dest, new_max_rows, sizeof(tsk_id_t));
if (ret != 0) {
goto out;
}
ret = expand_column((void **) &self->time, new_max_rows, sizeof(double));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_migration_table_expand_metadata(
tsk_migration_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_migration_table_set_max_rows_increment(
tsk_migration_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_migration_table_set_max_metadata_length_increment(
tsk_migration_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = max_metadata_length_increment;
return 0;
}
int
tsk_migration_table_init(tsk_migration_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_migration_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_migration_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->metadata_offset[0] = 0;
self->max_rows_increment = 0;
self->max_metadata_length_increment = 0;
tsk_migration_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
int
tsk_migration_table_append_columns(tsk_migration_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *node,
const tsk_id_t *source, const tsk_id_t *dest, const double *time,
const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
tsk_size_t j, metadata_length;
if (left == NULL || right == NULL || node == NULL || source == NULL || dest == NULL
|| time == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if ((metadata == NULL) != (metadata_offset == NULL)) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_migration_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->left + self->num_rows, left, num_rows * sizeof(double));
tsk_memcpy(self->right + self->num_rows, right, num_rows * sizeof(double));
tsk_memcpy(self->node + self->num_rows, node, num_rows * sizeof(tsk_id_t));
tsk_memcpy(self->source + self->num_rows, source, num_rows * sizeof(tsk_id_t));
tsk_memcpy(self->dest + self->num_rows, dest, num_rows * sizeof(tsk_id_t));
tsk_memcpy(self->time + self->num_rows, time, num_rows * sizeof(double));
if (metadata == NULL) {
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j + 1] = self->metadata_length;
}
} else {
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= (tsk_size_t) self->metadata_length + metadata_offset[j];
}
metadata_length = metadata_offset[num_rows];
ret = tsk_migration_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
self->metadata_length += metadata_length;
}
self->num_rows += num_rows;
self->metadata_offset[self->num_rows] = self->metadata_length;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_migration_table_takeset_columns(tsk_migration_table_t *self, tsk_size_t num_rows,
double *left, double *right, tsk_id_t *node, tsk_id_t *source, tsk_id_t *dest,
double *time, char *metadata, tsk_size_t *metadata_offset)
{
int ret = 0;
if (left == NULL || right == NULL || node == NULL || source == NULL || dest == NULL
|| time == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
/* We need to check all the inputs before we start freeing or taking memory */
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_migration_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
self->left = left;
self->right = right;
self->node = node;
self->source = source;
self->dest = dest;
self->time = time;
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_migration_table_copy(
const tsk_migration_table_t *self, tsk_migration_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_migration_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_migration_table_set_columns(dest, self->num_rows, self->left, self->right,
self->node, self->source, self->dest, self->time, self->metadata,
self->metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int
tsk_migration_table_set_columns(tsk_migration_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *node,
const tsk_id_t *source, const tsk_id_t *dest, const double *time,
const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
ret = tsk_migration_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_append_columns(self, num_rows, left, right, node, source,
dest, time, metadata, metadata_offset);
out:
return ret;
}
tsk_id_t
tsk_migration_table_add_row(tsk_migration_table_t *self, double left, double right,
tsk_id_t node, tsk_id_t source, tsk_id_t dest, double time, const char *metadata,
tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
ret = tsk_migration_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_bug_assert(self->num_rows < self->max_rows);
tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);
tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);
self->left[self->num_rows] = left;
self->right[self->num_rows] = right;
self->node[self->num_rows] = node;
self->source[self->num_rows] = source;
self->dest[self->num_rows] = dest;
self->time[self->num_rows] = time;
self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;
self->metadata_length += metadata_length;
ret = (tsk_id_t) self->num_rows;
self->num_rows++;
out:
return ret;
}
static int
tsk_migration_table_update_row_rewrite(tsk_migration_table_t *self, tsk_id_t index,
double left, double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest,
double time, const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_migration_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_migration_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_migration_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_migration_table_add_row(
self, left, right, node, source, dest, time, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_migration_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_migration_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_migration_table_update_row(tsk_migration_table_t *self, tsk_id_t index, double left,
double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest, double time,
const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_migration_t current_row;
ret = tsk_migration_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.metadata_length == metadata_length) {
self->left[index] = left;
self->right[index] = right;
self->node[index] = node;
self->source[index] = source;
self->dest[index] = dest;
self->time[index] = time;
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
} else {
ret = tsk_migration_table_update_row_rewrite(self, index, left, right, node,
source, dest, time, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_migration_table_clear(tsk_migration_table_t *self)
{
return tsk_migration_table_truncate(self, 0);
}
int
tsk_migration_table_truncate(tsk_migration_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
self->metadata_length = self->metadata_offset[num_rows];
out:
return ret;
}
int
tsk_migration_table_extend(tsk_migration_table_t *self,
const tsk_migration_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes,
tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_migration_t migration;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_migration_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_migration_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &migration);
if (ret != 0) {
goto out;
}
ret_id = tsk_migration_table_add_row(self, migration.left, migration.right,
migration.node, migration.source, migration.dest, migration.time,
migration.metadata, migration.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_migration_table_print_state(const tsk_migration_table_t *self, FILE *out)
{
int ret;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "migration_table: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "metadata_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
ret = tsk_migration_table_dump_text(self, out);
tsk_bug_assert(ret == 0);
}
static inline void
tsk_migration_table_get_row_unsafe(
const tsk_migration_table_t *self, tsk_id_t index, tsk_migration_t *row)
{
row->id = (tsk_id_t) index;
row->left = self->left[index];
row->right = self->right[index];
row->node = self->node[index];
row->source = self->source[index];
row->dest = self->dest[index];
row->time = self->time[index];
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
}
int
tsk_migration_table_get_row(
const tsk_migration_table_t *self, tsk_id_t index, tsk_migration_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
goto out;
}
tsk_migration_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_migration_table_set_metadata_schema(tsk_migration_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_migration_table_dump_text(const tsk_migration_table_t *self, FILE *out)
{
tsk_size_t j;
int ret = TSK_ERR_IO;
tsk_size_t metadata_len;
int err;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "left\tright\tnode\tsource\tdest\ttime\tmetadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];
err = fprintf(out, "%.3f\t%.3f\t%lld\t%lld\t%lld\t%f\t%.*s\n", self->left[j],
self->right[j], (long long) self->node[j], (long long) self->source[j],
(long long) self->dest[j], self->time[j], (int) metadata_len,
self->metadata + self->metadata_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
bool
tsk_migration_table_equals(const tsk_migration_table_t *self,
const tsk_migration_table_t *other, tsk_flags_t options)
{
bool ret
= self->num_rows == other->num_rows
&& tsk_memcmp(self->left, other->left, self->num_rows * sizeof(double)) == 0
&& tsk_memcmp(self->right, other->right, self->num_rows * sizeof(double)) == 0
&& tsk_memcmp(self->node, other->node, self->num_rows * sizeof(tsk_id_t)) == 0
&& tsk_memcmp(self->source, other->source, self->num_rows * sizeof(tsk_id_t))
== 0
&& tsk_memcmp(self->dest, other->dest, self->num_rows * sizeof(tsk_id_t)) == 0
&& tsk_memcmp(self->time, other->time, self->num_rows * sizeof(double)) == 0;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_migration_table_keep_rows(tsk_migration_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret = 0;
tsk_size_t remaining_rows;
if (id_map != NULL) {
keep_mask_to_id_map(self->num_rows, keep, id_map);
}
remaining_rows = subset_double_column(self->left, self->num_rows, keep);
subset_double_column(self->right, self->num_rows, keep);
subset_id_column(self->node, self->num_rows, keep);
subset_id_column(self->source, self->num_rows, keep);
subset_id_column(self->dest, self->num_rows, keep);
subset_double_column(self->time, self->num_rows, keep);
if (self->metadata_length > 0) {
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, self->num_rows, keep);
}
self->num_rows = remaining_rows;
return ret;
}
static int
tsk_migration_table_dump(
const tsk_migration_table_t *self, kastore_t *store, tsk_flags_t options)
{
const write_table_col_t cols[] = {
{ "migrations/left", (void *) self->left, self->num_rows, KAS_FLOAT64 },
{ "migrations/right", (void *) self->right, self->num_rows, KAS_FLOAT64 },
{ "migrations/node", (void *) self->node, self->num_rows, TSK_ID_STORAGE_TYPE },
{ "migrations/source", (void *) self->source, self->num_rows,
TSK_ID_STORAGE_TYPE },
{ "migrations/dest", (void *) self->dest, self->num_rows, TSK_ID_STORAGE_TYPE },
{ "migrations/time", (void *) self->time, self->num_rows, KAS_FLOAT64 },
{ "migrations/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "migrations/metadata", (void *) self->metadata, self->metadata_length,
KAS_UINT8, self->metadata_offset, self->num_rows },
{ .name = NULL },
};
return write_table(store, cols, ragged_cols, options);
}
static int
tsk_migration_table_load(tsk_migration_table_t *self, kastore_t *store)
{
int ret = 0;
tsk_id_t *source = NULL;
tsk_id_t *dest = NULL;
tsk_id_t *node = NULL;
double *left = NULL;
double *right = NULL;
double *time = NULL;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
char *metadata_schema = NULL;
tsk_size_t num_rows, metadata_length, metadata_schema_length;
read_table_col_t cols[] = {
{ "migrations/left", (void **) &left, KAS_FLOAT64, 0 },
{ "migrations/right", (void **) &right, KAS_FLOAT64, 0 },
{ "migrations/node", (void **) &node, TSK_ID_STORAGE_TYPE, 0 },
{ "migrations/source", (void **) &source, TSK_ID_STORAGE_TYPE, 0 },
{ "migrations/dest", (void **) &dest, TSK_ID_STORAGE_TYPE, 0 },
{ "migrations/time", (void **) &time, KAS_FLOAT64, 0 },
{ .name = NULL },
};
read_table_ragged_col_t ragged_cols[] = {
{ "migrations/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, TSK_COL_OPTIONAL },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "migrations/metadata_schema", (void **) &metadata_schema,
&metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, cols, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_migration_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_migration_table_takeset_columns(self, num_rows, left, right, node, source,
dest, time, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
left = NULL;
right = NULL;
node = NULL;
source = NULL;
dest = NULL;
time = NULL;
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(cols, ragged_cols, properties);
return ret;
}
/*************************
* population table
*************************/
static void
tsk_population_table_free_columns(tsk_population_table_t *self)
{
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_offset);
}
int
tsk_population_table_free(tsk_population_table_t *self)
{
tsk_population_table_free_columns(self);
tsk_safe_free(self->metadata_schema);
return 0;
}
static int
tsk_population_table_expand_main_columns(
tsk_population_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column(
(void **) &self->metadata_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_population_table_expand_metadata(
tsk_population_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->metadata_length, additional_length,
self->max_metadata_length_increment, &self->max_metadata_length,
(void **) &self->metadata, sizeof(*self->metadata));
}
int
tsk_population_table_set_max_rows_increment(
tsk_population_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_population_table_set_max_metadata_length_increment(
tsk_population_table_t *self, tsk_size_t max_metadata_length_increment)
{
self->max_metadata_length_increment = max_metadata_length_increment;
return 0;
}
int
tsk_population_table_init(tsk_population_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_population_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_metadata_length_increment = 1;
ret = tsk_population_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_expand_metadata(self, 1);
if (ret != 0) {
goto out;
}
self->metadata_offset[0] = 0;
self->max_rows_increment = 0;
self->max_metadata_length_increment = 0;
tsk_population_table_set_metadata_schema(self, NULL, 0);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_population_table_copy(const tsk_population_table_t *self,
tsk_population_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_population_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_population_table_set_columns(
dest, self->num_rows, self->metadata, self->metadata_offset);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
out:
return ret;
}
int
tsk_population_table_set_columns(tsk_population_table_t *self, tsk_size_t num_rows,
const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
ret = tsk_population_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_append_columns(self, num_rows, metadata, metadata_offset);
out:
return ret;
}
int
tsk_population_table_append_columns(tsk_population_table_t *self, tsk_size_t num_rows,
const char *metadata, const tsk_size_t *metadata_offset)
{
int ret;
tsk_size_t j, metadata_length;
if (metadata == NULL || metadata_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_population_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
ret = check_offsets(num_rows, metadata_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->metadata_offset[self->num_rows + j]
= self->metadata_length + metadata_offset[j];
}
metadata_length = metadata_offset[num_rows];
ret = tsk_population_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->metadata + self->metadata_length, metadata,
metadata_length * sizeof(char));
self->metadata_length += metadata_length;
self->num_rows += num_rows;
self->metadata_offset[self->num_rows] = self->metadata_length;
out:
return ret;
}
int
tsk_population_table_takeset_columns(tsk_population_table_t *self, tsk_size_t num_rows,
char *metadata, tsk_size_t *metadata_offset)
{
int ret = 0;
/* We need to check all the inputs before we start freeing or taking memory */
if (metadata == NULL || metadata_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = check_ragged_column(num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
tsk_population_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
ret = takeset_ragged_column(num_rows, metadata, metadata_offset,
(void *) &self->metadata, &self->metadata_offset, &self->metadata_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static tsk_id_t
tsk_population_table_add_row_internal(
tsk_population_table_t *self, const char *metadata, tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
tsk_bug_assert(self->num_rows < self->max_rows);
tsk_bug_assert(self->metadata_length + metadata_length <= self->max_metadata_length);
tsk_memmove(self->metadata + self->metadata_length, metadata, metadata_length);
self->metadata_offset[self->num_rows + 1] = self->metadata_length + metadata_length;
self->metadata_length += metadata_length;
ret = (tsk_id_t) self->num_rows;
self->num_rows++;
return ret;
}
tsk_id_t
tsk_population_table_add_row(
tsk_population_table_t *self, const char *metadata, tsk_size_t metadata_length)
{
tsk_id_t ret = 0;
ret = tsk_population_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_expand_metadata(self, metadata_length);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_add_row_internal(self, metadata, metadata_length);
out:
return ret;
}
static int
tsk_population_table_update_row_rewrite(tsk_population_table_t *self, tsk_id_t index,
const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_population_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_population_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_population_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_population_table_add_row(self, metadata, metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_population_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_population_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_population_table_update_row(tsk_population_table_t *self, tsk_id_t index,
const char *metadata, tsk_size_t metadata_length)
{
int ret = 0;
tsk_population_t current_row;
ret = tsk_population_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.metadata_length == metadata_length) {
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->metadata[self->metadata_offset[index]], metadata,
metadata_length * sizeof(*metadata));
} else {
ret = tsk_population_table_update_row_rewrite(
self, index, metadata, metadata_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_population_table_clear(tsk_population_table_t *self)
{
return tsk_population_table_truncate(self, 0);
}
int
tsk_population_table_truncate(tsk_population_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
self->metadata_length = self->metadata_offset[num_rows];
out:
return ret;
}
int
tsk_population_table_extend(tsk_population_table_t *self,
const tsk_population_table_t *other, tsk_size_t num_rows,
const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_population_t population;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_population_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_population_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &population);
if (ret != 0) {
goto out;
}
ret_id = tsk_population_table_add_row(
self, population.metadata, population.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_population_table_print_state(const tsk_population_table_t *self, FILE *out)
{
tsk_size_t j, k;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "population_table: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "metadata_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->metadata_length, (long long) self->max_metadata_length,
(long long) self->max_metadata_length_increment);
fprintf(out, TABLE_SEP);
write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
fprintf(out, "index\tmetadata_offset\tmetadata\n");
for (j = 0; j < self->num_rows; j++) {
fprintf(
out, "%lld\t%lld\t", (long long) j, (long long) self->metadata_offset[j]);
for (k = self->metadata_offset[j]; k < self->metadata_offset[j + 1]; k++) {
fprintf(out, "%c", self->metadata[k]);
}
fprintf(out, "\n");
}
tsk_bug_assert(self->metadata_offset[0] == 0);
tsk_bug_assert(self->metadata_offset[self->num_rows] == self->metadata_length);
}
static inline void
tsk_population_table_get_row_unsafe(
const tsk_population_table_t *self, tsk_id_t index, tsk_population_t *row)
{
row->id = (tsk_id_t) index;
row->metadata_length
= self->metadata_offset[index + 1] - self->metadata_offset[index];
row->metadata = self->metadata + self->metadata_offset[index];
}
int
tsk_population_table_get_row(
const tsk_population_table_t *self, tsk_id_t index, tsk_population_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);
goto out;
}
tsk_population_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_population_table_set_metadata_schema(tsk_population_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_population_table_dump_text(const tsk_population_table_t *self, FILE *out)
{
int ret = TSK_ERR_IO;
int err;
tsk_size_t j;
tsk_size_t metadata_len;
err = write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
if (err < 0) {
goto out;
}
err = fprintf(out, "metadata\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
metadata_len = self->metadata_offset[j + 1] - self->metadata_offset[j];
err = fprintf(out, "%.*s\n", (int) metadata_len,
self->metadata + self->metadata_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
bool
tsk_population_table_equals(const tsk_population_table_t *self,
const tsk_population_table_t *other, tsk_flags_t options)
{
/* Since we only have the metadata column in the table currently, equality
* reduces to comparing the number of rows if we disable metadata comparison.
*/
bool ret = self->num_rows == other->num_rows;
if (!(options & TSK_CMP_IGNORE_METADATA)) {
ret = ret && self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata_offset, other->metadata_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_population_table_keep_rows(tsk_population_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret = 0;
if (id_map != NULL) {
keep_mask_to_id_map(self->num_rows, keep, id_map);
}
if (self->metadata_length > 0) {
self->metadata_length = subset_ragged_char_column(
self->metadata, self->metadata_offset, self->num_rows, keep);
}
self->num_rows = count_true(self->num_rows, keep);
return ret;
}
static int
tsk_population_table_dump(
const tsk_population_table_t *self, kastore_t *store, tsk_flags_t options)
{
const write_table_col_t cols[] = {
{ "populations/metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
const write_table_ragged_col_t ragged_cols[] = {
{ "populations/metadata", (void *) self->metadata, self->metadata_length,
KAS_UINT8, self->metadata_offset, self->num_rows },
{ .name = NULL },
};
return write_table(store, cols, ragged_cols, options);
}
static int
tsk_population_table_load(tsk_population_table_t *self, kastore_t *store)
{
int ret = 0;
char *metadata = NULL;
tsk_size_t *metadata_offset = NULL;
char *metadata_schema = NULL;
tsk_size_t num_rows, metadata_length, metadata_schema_length;
read_table_ragged_col_t ragged_cols[] = {
{ "populations/metadata", (void **) &metadata, &metadata_length, KAS_UINT8,
&metadata_offset, 0 },
{ .name = NULL },
};
read_table_property_t properties[] = {
{ "populations/metadata_schema", (void **) &metadata_schema,
&metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table(store, &num_rows, NULL, ragged_cols, properties, 0);
if (ret != 0) {
goto out;
}
if (metadata_schema != NULL) {
ret = tsk_population_table_set_metadata_schema(
self, metadata_schema, metadata_schema_length);
if (ret != 0) {
goto out;
}
}
ret = tsk_population_table_takeset_columns(
self, num_rows, metadata, metadata_offset);
if (ret != 0) {
goto out;
}
metadata = NULL;
metadata_offset = NULL;
out:
free_read_table_mem(NULL, ragged_cols, properties);
return ret;
}
/*************************
* provenance table
*************************/
static void
tsk_provenance_table_free_columns(tsk_provenance_table_t *self)
{
tsk_safe_free(self->timestamp);
tsk_safe_free(self->timestamp_offset);
tsk_safe_free(self->record);
tsk_safe_free(self->record_offset);
}
int
tsk_provenance_table_free(tsk_provenance_table_t *self)
{
tsk_provenance_table_free_columns(self);
return 0;
}
static int
tsk_provenance_table_expand_main_columns(
tsk_provenance_table_t *self, tsk_size_t additional_rows)
{
int ret = 0;
tsk_size_t new_max_rows;
ret = calculate_max_rows(self->num_rows, self->max_rows, self->max_rows_increment,
additional_rows, &new_max_rows);
if (ret != 0) {
goto out;
}
if ((self->num_rows + additional_rows) > self->max_rows) {
ret = expand_column(
(void **) &self->timestamp_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
ret = expand_column(
(void **) &self->record_offset, new_max_rows + 1, sizeof(tsk_size_t));
if (ret != 0) {
goto out;
}
self->max_rows = new_max_rows;
}
out:
return ret;
}
static int
tsk_provenance_table_expand_timestamp(
tsk_provenance_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->timestamp_length, additional_length,
self->max_timestamp_length_increment, &self->max_timestamp_length,
(void **) &self->timestamp, sizeof(*self->timestamp));
}
static int
tsk_provenance_table_expand_record(
tsk_provenance_table_t *self, tsk_size_t additional_length)
{
return expand_ragged_column(self->record_length, additional_length,
self->max_record_length_increment, &self->max_record_length,
(void **) &self->record, sizeof(*self->record));
}
int
tsk_provenance_table_set_max_rows_increment(
tsk_provenance_table_t *self, tsk_size_t max_rows_increment)
{
self->max_rows_increment = max_rows_increment;
return 0;
}
int
tsk_provenance_table_set_max_timestamp_length_increment(
tsk_provenance_table_t *self, tsk_size_t max_timestamp_length_increment)
{
self->max_timestamp_length_increment = max_timestamp_length_increment;
return 0;
}
int
tsk_provenance_table_set_max_record_length_increment(
tsk_provenance_table_t *self, tsk_size_t max_record_length_increment)
{
self->max_record_length_increment = max_record_length_increment;
return 0;
}
int
tsk_provenance_table_init(tsk_provenance_table_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_memset(self, 0, sizeof(tsk_provenance_table_t));
/* Allocate space for one row initially, ensuring we always have valid pointers
* even if the table is empty */
self->max_rows_increment = 1;
self->max_timestamp_length_increment = 1;
self->max_record_length_increment = 1;
ret = tsk_provenance_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_expand_timestamp(self, 1);
if (ret != 0) {
goto out;
}
self->timestamp_offset[0] = 0;
ret = tsk_provenance_table_expand_record(self, 1);
if (ret != 0) {
goto out;
}
self->record_offset[0] = 0;
self->max_rows_increment = 0;
self->max_timestamp_length_increment = 0;
self->max_record_length_increment = 0;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_provenance_table_copy(const tsk_provenance_table_t *self,
tsk_provenance_table_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_provenance_table_init(dest, 0);
if (ret != 0) {
goto out;
}
}
ret = tsk_provenance_table_set_columns(dest, self->num_rows, self->timestamp,
self->timestamp_offset, self->record, self->record_offset);
out:
return ret;
}
int
tsk_provenance_table_set_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,
const char *timestamp, const tsk_size_t *timestamp_offset, const char *record,
const tsk_size_t *record_offset)
{
int ret;
ret = tsk_provenance_table_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_append_columns(
self, num_rows, timestamp, timestamp_offset, record, record_offset);
out:
return ret;
}
int
tsk_provenance_table_append_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,
const char *timestamp, const tsk_size_t *timestamp_offset, const char *record,
const tsk_size_t *record_offset)
{
int ret;
tsk_size_t j, timestamp_length, record_length;
if (timestamp == NULL || timestamp_offset == NULL || record == NULL
|| record_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = tsk_provenance_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
ret = check_offsets(num_rows, timestamp_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->timestamp_offset[self->num_rows + j]
= self->timestamp_length + timestamp_offset[j];
}
timestamp_length = timestamp_offset[num_rows];
ret = tsk_provenance_table_expand_timestamp(self, timestamp_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->timestamp + self->timestamp_length, timestamp,
timestamp_length * sizeof(char));
self->timestamp_length += timestamp_length;
ret = check_offsets(num_rows, record_offset, 0, false);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
self->record_offset[self->num_rows + j] = self->record_length + record_offset[j];
}
record_length = record_offset[num_rows];
ret = tsk_provenance_table_expand_record(self, record_length);
if (ret != 0) {
goto out;
}
tsk_memcpy(self->record + self->record_length, record, record_length * sizeof(char));
self->record_length += record_length;
self->num_rows += num_rows;
self->timestamp_offset[self->num_rows] = self->timestamp_length;
self->record_offset[self->num_rows] = self->record_length;
out:
return ret;
}
int
tsk_provenance_table_takeset_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,
char *timestamp, tsk_size_t *timestamp_offset, char *record,
tsk_size_t *record_offset)
{
int ret = 0;
/* We need to check all the inputs before we start freeing or taking memory */
if (timestamp == NULL || timestamp_offset == NULL || record == NULL
|| record_offset == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
ret = check_ragged_column(num_rows, timestamp, timestamp_offset);
if (ret != 0) {
goto out;
}
ret = check_ragged_column(num_rows, record, record_offset);
if (ret != 0) {
goto out;
}
tsk_provenance_table_free_columns(self);
self->num_rows = num_rows;
self->max_rows = num_rows;
ret = takeset_ragged_column(num_rows, timestamp, timestamp_offset,
(void *) &self->timestamp, &self->timestamp_offset, &self->timestamp_length);
if (ret != 0) {
goto out;
}
ret = takeset_ragged_column(num_rows, record, record_offset, (void *) &self->record,
&self->record_offset, &self->record_length);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static tsk_id_t
tsk_provenance_table_add_row_internal(tsk_provenance_table_t *self,
const char *timestamp, tsk_size_t timestamp_length, const char *record,
tsk_size_t record_length)
{
tsk_id_t ret = 0;
tsk_bug_assert(self->num_rows < self->max_rows);
tsk_bug_assert(
self->timestamp_length + timestamp_length <= self->max_timestamp_length);
tsk_memmove(self->timestamp + self->timestamp_length, timestamp, timestamp_length);
self->timestamp_offset[self->num_rows + 1]
= self->timestamp_length + timestamp_length;
self->timestamp_length += timestamp_length;
tsk_bug_assert(self->record_length + record_length <= self->max_record_length);
tsk_memmove(self->record + self->record_length, record, record_length);
self->record_offset[self->num_rows + 1] = self->record_length + record_length;
self->record_length += record_length;
ret = (tsk_id_t) self->num_rows;
self->num_rows++;
return ret;
}
tsk_id_t
tsk_provenance_table_add_row(tsk_provenance_table_t *self, const char *timestamp,
tsk_size_t timestamp_length, const char *record, tsk_size_t record_length)
{
tsk_id_t ret = 0;
ret = tsk_provenance_table_expand_main_columns(self, 1);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_expand_timestamp(self, timestamp_length);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_expand_record(self, record_length);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_add_row_internal(
self, timestamp, timestamp_length, record, record_length);
out:
return ret;
}
static int
tsk_provenance_table_update_row_rewrite(tsk_provenance_table_t *self, tsk_id_t index,
const char *timestamp, tsk_size_t timestamp_length, const char *record,
tsk_size_t record_length)
{
int ret = 0;
tsk_id_t j, ret_id;
tsk_provenance_table_t copy;
tsk_size_t num_rows;
tsk_id_t *rows = NULL;
ret = tsk_provenance_table_copy(self, ©, 0);
if (ret != 0) {
goto out;
}
rows = tsk_malloc(self->num_rows * sizeof(*rows));
if (rows == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_provenance_table_truncate(self, (tsk_size_t) index);
tsk_bug_assert(ret == 0);
ret_id = tsk_provenance_table_add_row(
self, timestamp, timestamp_length, record, record_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_rows = 0;
for (j = index + 1; j < (tsk_id_t) copy.num_rows; j++) {
rows[num_rows] = j;
num_rows++;
}
ret = tsk_provenance_table_extend(self, ©, num_rows, rows, 0);
if (ret != 0) {
goto out;
}
out:
tsk_provenance_table_free(©);
tsk_safe_free(rows);
return ret;
}
int
tsk_provenance_table_update_row(tsk_provenance_table_t *self, tsk_id_t index,
const char *timestamp, tsk_size_t timestamp_length, const char *record,
tsk_size_t record_length)
{
int ret = 0;
tsk_provenance_t current_row;
ret = tsk_provenance_table_get_row(self, index, ¤t_row);
if (ret != 0) {
goto out;
}
if (current_row.timestamp_length == timestamp_length
&& current_row.record_length == record_length) {
/* Note: important to use tsk_memmove here as we may be provided pointers
* to the column memory as input via get_row */
tsk_memmove(&self->timestamp[self->timestamp_offset[index]], timestamp,
timestamp_length * sizeof(*timestamp));
tsk_memmove(&self->record[self->record_offset[index]], record,
record_length * sizeof(*record));
} else {
ret = tsk_provenance_table_update_row_rewrite(
self, index, timestamp, timestamp_length, record, record_length);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_provenance_table_clear(tsk_provenance_table_t *self)
{
return tsk_provenance_table_truncate(self, 0);
}
int
tsk_provenance_table_truncate(tsk_provenance_table_t *self, tsk_size_t num_rows)
{
int ret = 0;
if (num_rows > self->num_rows) {
ret = tsk_trace_error(TSK_ERR_BAD_TABLE_POSITION);
goto out;
}
self->num_rows = num_rows;
self->timestamp_length = self->timestamp_offset[num_rows];
self->record_length = self->record_offset[num_rows];
out:
return ret;
}
int
tsk_provenance_table_extend(tsk_provenance_table_t *self,
const tsk_provenance_table_t *other, tsk_size_t num_rows,
const tsk_id_t *row_indexes, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_provenance_t provenance;
if (self == other) {
ret = tsk_trace_error(TSK_ERR_CANNOT_EXTEND_FROM_SELF);
goto out;
}
/* We know how much to expand the non-ragged columns, so do it ahead of time */
ret = tsk_provenance_table_expand_main_columns(self, num_rows);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_rows; j++) {
ret = tsk_provenance_table_get_row(
other, row_indexes == NULL ? (tsk_id_t) j : row_indexes[j], &provenance);
if (ret != 0) {
goto out;
}
ret_id = tsk_provenance_table_add_row(self, provenance.timestamp,
provenance.timestamp_length, provenance.record, provenance.record_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
return ret;
}
void
tsk_provenance_table_print_state(const tsk_provenance_table_t *self, FILE *out)
{
tsk_size_t j, k;
fprintf(out, "\n" TABLE_SEP);
fprintf(out, "provenance_table: %p:\n", (const void *) self);
fprintf(out, "num_rows = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->num_rows, (long long) self->max_rows,
(long long) self->max_rows_increment);
fprintf(out, "timestamp_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->timestamp_length, (long long) self->max_timestamp_length,
(long long) self->max_timestamp_length_increment);
fprintf(out, "record_length = %lld\tmax= %lld\tincrement = %lld)\n",
(long long) self->record_length, (long long) self->max_record_length,
(long long) self->max_record_length_increment);
fprintf(out, TABLE_SEP);
fprintf(out, "index\ttimestamp_offset\ttimestamp\trecord_offset\tprovenance\n");
for (j = 0; j < self->num_rows; j++) {
fprintf(
out, "%lld\t%lld\t", (long long) j, (long long) self->timestamp_offset[j]);
for (k = self->timestamp_offset[j]; k < self->timestamp_offset[j + 1]; k++) {
fprintf(out, "%c", self->timestamp[k]);
}
fprintf(out, "\t%lld\t", (long long) self->record_offset[j]);
for (k = self->record_offset[j]; k < self->record_offset[j + 1]; k++) {
fprintf(out, "%c", self->record[k]);
}
fprintf(out, "\n");
}
tsk_bug_assert(self->timestamp_offset[0] == 0);
tsk_bug_assert(self->timestamp_offset[self->num_rows] == self->timestamp_length);
tsk_bug_assert(self->record_offset[0] == 0);
tsk_bug_assert(self->record_offset[self->num_rows] == self->record_length);
}
static inline void
tsk_provenance_table_get_row_unsafe(
const tsk_provenance_table_t *self, tsk_id_t index, tsk_provenance_t *row)
{
row->id = (tsk_id_t) index;
row->timestamp_length
= self->timestamp_offset[index + 1] - self->timestamp_offset[index];
row->timestamp = self->timestamp + self->timestamp_offset[index];
row->record_length = self->record_offset[index + 1] - self->record_offset[index];
row->record = self->record + self->record_offset[index];
}
int
tsk_provenance_table_get_row(
const tsk_provenance_table_t *self, tsk_id_t index, tsk_provenance_t *row)
{
int ret = 0;
if (index < 0 || index >= (tsk_id_t) self->num_rows) {
ret = tsk_trace_error(TSK_ERR_PROVENANCE_OUT_OF_BOUNDS);
goto out;
}
tsk_provenance_table_get_row_unsafe(self, index, row);
out:
return ret;
}
int
tsk_provenance_table_dump_text(const tsk_provenance_table_t *self, FILE *out)
{
int ret = TSK_ERR_IO;
int err;
tsk_size_t j, timestamp_len, record_len;
err = fprintf(out, "record\ttimestamp\n");
if (err < 0) {
goto out;
}
for (j = 0; j < self->num_rows; j++) {
record_len = self->record_offset[j + 1] - self->record_offset[j];
timestamp_len = self->timestamp_offset[j + 1] - self->timestamp_offset[j];
err = fprintf(out, "%.*s\t%.*s\n", (int) record_len,
self->record + self->record_offset[j], (int) timestamp_len,
self->timestamp + self->timestamp_offset[j]);
if (err < 0) {
goto out;
}
}
ret = 0;
out:
return ret;
}
bool
tsk_provenance_table_equals(const tsk_provenance_table_t *self,
const tsk_provenance_table_t *other, tsk_flags_t options)
{
bool ret
= self->num_rows == other->num_rows
&& self->record_length == other->record_length
&& tsk_memcmp(self->record_offset, other->record_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->record, other->record, self->record_length * sizeof(char))
== 0;
if (!(options & TSK_CMP_IGNORE_TIMESTAMPS)) {
ret = ret && self->timestamp_length == other->timestamp_length
&& tsk_memcmp(self->timestamp_offset, other->timestamp_offset,
(self->num_rows + 1) * sizeof(tsk_size_t))
== 0
&& tsk_memcmp(self->timestamp, other->timestamp,
self->timestamp_length * sizeof(char))
== 0;
}
return ret;
}
int
tsk_provenance_table_keep_rows(tsk_provenance_table_t *self, const tsk_bool_t *keep,
tsk_flags_t TSK_UNUSED(options), tsk_id_t *id_map)
{
int ret = 0;
if (id_map != NULL) {
keep_mask_to_id_map(self->num_rows, keep, id_map);
}
self->timestamp_length = subset_ragged_char_column(
self->timestamp, self->timestamp_offset, self->num_rows, keep);
self->record_length = subset_ragged_char_column(
self->record, self->record_offset, self->num_rows, keep);
self->num_rows = count_true(self->num_rows, keep);
return ret;
}
static int
tsk_provenance_table_dump(
const tsk_provenance_table_t *self, kastore_t *store, tsk_flags_t options)
{
write_table_ragged_col_t ragged_cols[] = {
{ "provenances/timestamp", (void *) self->timestamp, self->timestamp_length,
KAS_UINT8, self->timestamp_offset, self->num_rows },
{ "provenances/record", (void *) self->record, self->record_length, KAS_UINT8,
self->record_offset, self->num_rows },
{ .name = NULL },
};
return write_table_ragged_cols(store, ragged_cols, options);
}
static int
tsk_provenance_table_load(tsk_provenance_table_t *self, kastore_t *store)
{
int ret;
char *timestamp = NULL;
tsk_size_t *timestamp_offset = NULL;
char *record = NULL;
tsk_size_t *record_offset = NULL;
tsk_size_t num_rows, timestamp_length, record_length;
read_table_ragged_col_t ragged_cols[] = {
{ "provenances/timestamp", (void **) ×tamp, ×tamp_length, KAS_UINT8,
×tamp_offset, 0 },
{ "provenances/record", (void **) &record, &record_length, KAS_UINT8,
&record_offset, 0 },
{ .name = NULL },
};
ret = read_table(store, &num_rows, NULL, ragged_cols, NULL, 0);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_takeset_columns(
self, num_rows, timestamp, timestamp_offset, record, record_offset);
if (ret != 0) {
goto out;
}
timestamp = NULL;
timestamp_offset = NULL;
record = NULL;
record_offset = NULL;
out:
free_read_table_mem(NULL, ragged_cols, NULL);
return ret;
}
/*************************
* sort_tables
*************************/
typedef struct {
double left;
double right;
tsk_id_t parent;
tsk_id_t child;
double time;
/* It would be a little bit more convenient to store a pointer to the
* metadata here in the struct rather than an offset back into the
* original array. However, this would increase the size of the struct
* from 40 bytes to 48 and we will allocate very large numbers of these.
*/
tsk_size_t metadata_offset;
tsk_size_t metadata_length;
} edge_sort_t;
typedef struct {
tsk_mutation_t mut;
int num_descendants;
double node_time;
} mutation_sort_t;
typedef struct {
tsk_individual_t ind;
tsk_id_t first_node;
tsk_size_t num_descendants;
} individual_canonical_sort_t;
typedef struct {
double left;
double right;
tsk_id_t node;
tsk_id_t source;
tsk_id_t dest;
double time;
tsk_size_t metadata_offset;
tsk_size_t metadata_length;
} migration_sort_t;
static int
cmp_site(const void *a, const void *b)
{
const tsk_site_t *ia = (const tsk_site_t *) a;
const tsk_site_t *ib = (const tsk_site_t *) b;
/* Compare sites by position */
int ret = (ia->position > ib->position) - (ia->position < ib->position);
if (ret == 0) {
/* Within a particular position sort by ID. This ensures that relative
* ordering of multiple sites at the same position is maintained; the
* redundant sites will get compacted down by clean_tables(), but in the
* meantime if the order of the redundant sites changes it will cause the
* sort order of mutations to be corrupted, as the mutations will follow
* their sites. */
ret = (ia->id > ib->id) - (ia->id < ib->id);
}
return ret;
}
static int
cmp_mutation(const void *a, const void *b)
{
const mutation_sort_t *ia = (const mutation_sort_t *) a;
const mutation_sort_t *ib = (const mutation_sort_t *) b;
/* Compare mutations by site */
int ret = (ia->mut.site > ib->mut.site) - (ia->mut.site < ib->mut.site);
/* Within a particular site sort by time if known */
if (ret == 0 && !tsk_is_unknown_time(ia->mut.time)
&& !tsk_is_unknown_time(ib->mut.time)) {
ret = (ia->mut.time < ib->mut.time) - (ia->mut.time > ib->mut.time);
}
/* Or node times when mutation times are unknown or equal */
if (ret == 0) {
ret = (ia->node_time < ib->node_time) - (ia->node_time > ib->node_time);
}
/* If node times are equal, sort by number of descendants */
if (ret == 0) {
ret = (ia->num_descendants < ib->num_descendants)
- (ia->num_descendants > ib->num_descendants);
}
/* If number of descendants are equal, sort by node */
if (ret == 0) {
ret = (ia->mut.node > ib->mut.node) - (ia->mut.node < ib->mut.node);
}
/* Final tiebreaker: ID */
if (ret == 0) {
ret = (ia->mut.id > ib->mut.id) - (ia->mut.id < ib->mut.id);
}
return ret;
}
static int
cmp_individual_canonical(const void *a, const void *b)
{
const individual_canonical_sort_t *ia = (const individual_canonical_sort_t *) a;
const individual_canonical_sort_t *ib = (const individual_canonical_sort_t *) b;
int ret = (ia->num_descendants < ib->num_descendants)
- (ia->num_descendants > ib->num_descendants);
if (ret == 0) {
ret = (ia->first_node > ib->first_node) - (ia->first_node < ib->first_node);
}
if (ret == 0) {
ret = (ia->ind.id > ib->ind.id) - (ia->ind.id < ib->ind.id);
}
return ret;
}
static int
cmp_edge(const void *a, const void *b)
{
const edge_sort_t *ca = (const edge_sort_t *) a;
const edge_sort_t *cb = (const edge_sort_t *) b;
int ret = (ca->time > cb->time) - (ca->time < cb->time);
/* If time values are equal, sort by the parent node */
if (ret == 0) {
ret = (ca->parent > cb->parent) - (ca->parent < cb->parent);
/* If the parent nodes are equal, sort by the child ID. */
if (ret == 0) {
ret = (ca->child > cb->child) - (ca->child < cb->child);
/* If the child nodes are equal, sort by the left coordinate. */
if (ret == 0) {
ret = (ca->left > cb->left) - (ca->left < cb->left);
}
}
}
return ret;
}
static int
cmp_migration(const void *a, const void *b)
{
const migration_sort_t *ca = (const migration_sort_t *) a;
const migration_sort_t *cb = (const migration_sort_t *) b;
int ret = (ca->time > cb->time) - (ca->time < cb->time);
/* If time values are equal, sort by the source population */
if (ret == 0) {
ret = (ca->source > cb->source) - (ca->source < cb->source);
/* If the source populations are equal, sort by the dest */
if (ret == 0) {
ret = (ca->dest > cb->dest) - (ca->dest < cb->dest);
/* If the dest populations are equal, sort by the left coordinate. */
if (ret == 0) {
ret = (ca->left > cb->left) - (ca->left < cb->left);
/* If everything else is equal, compare by node */
if (ret == 0) {
ret = (ca->node > cb->node) - (ca->node < cb->node);
}
}
}
}
return ret;
}
static int
tsk_table_sorter_sort_edges(tsk_table_sorter_t *self, tsk_size_t start)
{
int ret = 0;
const tsk_edge_table_t *edges = &self->tables->edges;
const double *restrict node_time = self->tables->nodes.time;
edge_sort_t *e;
tsk_size_t j, k, metadata_offset;
tsk_size_t n = edges->num_rows - start;
edge_sort_t *sorted_edges = tsk_malloc(n * sizeof(*sorted_edges));
char *old_metadata = tsk_malloc(edges->metadata_length);
bool has_metadata = tsk_edge_table_has_metadata(edges);
if (sorted_edges == NULL || old_metadata == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(old_metadata, edges->metadata, edges->metadata_length);
for (j = 0; j < n; j++) {
e = sorted_edges + j;
k = start + j;
e->left = edges->left[k];
e->right = edges->right[k];
e->parent = edges->parent[k];
e->child = edges->child[k];
e->time = node_time[e->parent];
if (has_metadata) {
e->metadata_offset = edges->metadata_offset[k];
e->metadata_length
= edges->metadata_offset[k + 1] - edges->metadata_offset[k];
}
}
qsort(sorted_edges, (size_t) n, sizeof(edge_sort_t), cmp_edge);
/* Copy the edges back into the table. */
metadata_offset = 0;
for (j = 0; j < n; j++) {
e = sorted_edges + j;
k = start + j;
edges->left[k] = e->left;
edges->right[k] = e->right;
edges->parent[k] = e->parent;
edges->child[k] = e->child;
if (has_metadata) {
tsk_memcpy(edges->metadata + metadata_offset,
old_metadata + e->metadata_offset, e->metadata_length);
edges->metadata_offset[k] = metadata_offset;
metadata_offset += e->metadata_length;
}
}
out:
tsk_safe_free(sorted_edges);
tsk_safe_free(old_metadata);
return ret;
}
static int
tsk_table_sorter_sort_migrations(tsk_table_sorter_t *self, tsk_size_t start)
{
int ret = 0;
const tsk_migration_table_t *migrations = &self->tables->migrations;
migration_sort_t *m;
tsk_size_t j, k, metadata_offset;
tsk_size_t n = migrations->num_rows - start;
migration_sort_t *sorted_migrations = tsk_malloc(n * sizeof(*sorted_migrations));
char *old_metadata = tsk_malloc(migrations->metadata_length);
if (sorted_migrations == NULL || old_metadata == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(old_metadata, migrations->metadata, migrations->metadata_length);
for (j = 0; j < n; j++) {
m = sorted_migrations + j;
k = start + j;
m->left = migrations->left[k];
m->right = migrations->right[k];
m->node = migrations->node[k];
m->source = migrations->source[k];
m->dest = migrations->dest[k];
m->time = migrations->time[k];
m->metadata_offset = migrations->metadata_offset[k];
m->metadata_length
= migrations->metadata_offset[k + 1] - migrations->metadata_offset[k];
}
qsort(sorted_migrations, (size_t) n, sizeof(migration_sort_t), cmp_migration);
/* Copy the migrations back into the table. */
metadata_offset = 0;
for (j = 0; j < n; j++) {
m = sorted_migrations + j;
k = start + j;
migrations->left[k] = m->left;
migrations->right[k] = m->right;
migrations->node[k] = m->node;
migrations->source[k] = m->source;
migrations->dest[k] = m->dest;
migrations->time[k] = m->time;
tsk_memcpy(migrations->metadata + metadata_offset,
old_metadata + m->metadata_offset, m->metadata_length);
migrations->metadata_offset[k] = metadata_offset;
metadata_offset += m->metadata_length;
}
out:
tsk_safe_free(sorted_migrations);
tsk_safe_free(old_metadata);
return ret;
}
static int
tsk_table_sorter_sort_sites(tsk_table_sorter_t *self)
{
int ret = 0;
tsk_id_t ret_id;
tsk_site_table_t *sites = &self->tables->sites;
tsk_site_table_t copy;
tsk_size_t j;
tsk_size_t num_sites = sites->num_rows;
tsk_site_t *sorted_sites = tsk_malloc(num_sites * sizeof(*sorted_sites));
ret = tsk_site_table_copy(sites, ©, 0);
if (ret != 0) {
goto out;
}
if (sorted_sites == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_sites; j++) {
tsk_site_table_get_row_unsafe(©, (tsk_id_t) j, sorted_sites + j);
}
/* Sort the sites by position */
qsort(sorted_sites, (size_t) num_sites, sizeof(*sorted_sites), cmp_site);
/* Build the mapping from old site IDs to new site IDs and copy back into the
* table
*/
tsk_site_table_clear(sites);
for (j = 0; j < num_sites; j++) {
self->site_id_map[sorted_sites[j].id] = (tsk_id_t) j;
ret_id = tsk_site_table_add_row(sites, sorted_sites[j].position,
sorted_sites[j].ancestral_state, sorted_sites[j].ancestral_state_length,
sorted_sites[j].metadata, sorted_sites[j].metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
tsk_safe_free(sorted_sites);
tsk_site_table_free(©);
return ret;
}
static int
tsk_table_sorter_sort_mutations(tsk_table_sorter_t *self)
{
int ret = 0;
tsk_size_t j;
tsk_id_t ret_id, parent, mapped_parent, p;
tsk_mutation_table_t *mutations = &self->tables->mutations;
tsk_node_table_t *nodes = &self->tables->nodes;
tsk_size_t num_mutations = mutations->num_rows;
tsk_mutation_table_t copy;
mutation_sort_t *sorted_mutations
= tsk_malloc(num_mutations * sizeof(*sorted_mutations));
tsk_id_t *mutation_id_map = tsk_malloc(num_mutations * sizeof(*mutation_id_map));
ret = tsk_mutation_table_copy(mutations, ©, 0);
if (ret != 0) {
goto out;
}
if (mutation_id_map == NULL || sorted_mutations == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* compute numbers of descendants for each mutation */
for (j = 0; j < num_mutations; j++) {
sorted_mutations[j].num_descendants = 0;
}
for (j = 0; j < num_mutations; j++) {
p = mutations->parent[j];
while (p != TSK_NULL) {
sorted_mutations[p].num_descendants += 1;
if (sorted_mutations[p].num_descendants > (int) num_mutations) {
ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_INCONSISTENT);
goto out;
}
p = mutations->parent[p];
}
}
for (j = 0; j < num_mutations; j++) {
tsk_mutation_table_get_row_unsafe(©, (tsk_id_t) j, &sorted_mutations[j].mut);
sorted_mutations[j].mut.site = self->site_id_map[sorted_mutations[j].mut.site];
sorted_mutations[j].node_time = nodes->time[sorted_mutations[j].mut.node];
}
ret = tsk_mutation_table_clear(mutations);
if (ret != 0) {
goto out;
}
qsort(sorted_mutations, (size_t) num_mutations, sizeof(*sorted_mutations),
cmp_mutation);
/* Make a first pass through the sorted mutations to build the ID map. */
for (j = 0; j < num_mutations; j++) {
mutation_id_map[sorted_mutations[j].mut.id] = (tsk_id_t) j;
}
for (j = 0; j < num_mutations; j++) {
mapped_parent = TSK_NULL;
parent = sorted_mutations[j].mut.parent;
if (parent != TSK_NULL) {
mapped_parent = mutation_id_map[parent];
}
ret_id = tsk_mutation_table_add_row(mutations, sorted_mutations[j].mut.site,
sorted_mutations[j].mut.node, mapped_parent, sorted_mutations[j].mut.time,
sorted_mutations[j].mut.derived_state,
sorted_mutations[j].mut.derived_state_length,
sorted_mutations[j].mut.metadata, sorted_mutations[j].mut.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
out:
tsk_safe_free(mutation_id_map);
tsk_safe_free(sorted_mutations);
tsk_mutation_table_free(©);
return ret;
}
static int
tsk_individual_table_topological_sort(
tsk_individual_table_t *self, tsk_id_t *traversal_order, tsk_size_t *num_descendants)
{
int ret = 0;
tsk_id_t i, j, p;
tsk_individual_t individual;
tsk_size_t num_individuals = self->num_rows;
tsk_size_t current_todo = 0;
tsk_size_t todo_insertion_point = 0;
tsk_size_t *incoming_edge_count
= tsk_malloc(num_individuals * sizeof(*incoming_edge_count));
bool count_descendants = (num_descendants != NULL);
if (incoming_edge_count == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (i = 0; i < (tsk_id_t) num_individuals; i++) {
incoming_edge_count[i] = 0;
traversal_order[i] = TSK_NULL;
if (count_descendants) {
num_descendants[i] = 0;
}
}
/* First find the set of individuals that have no children by creating
* an array of incoming edge counts */
for (i = 0; i < (tsk_id_t) self->parents_length; i++) {
if (self->parents[i] != TSK_NULL) {
incoming_edge_count[self->parents[i]]++;
}
}
/* Use these as the starting points for checking all individuals,
* doing this in reverse makes the sort stable */
for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {
if (incoming_edge_count[i] == 0) {
traversal_order[todo_insertion_point] = i;
todo_insertion_point++;
}
}
/* Now process individuals from the set that have no children, updating their
* parents' information as we go, and adding their parents to the list if
* this was their last child */
while (current_todo < todo_insertion_point) {
j = traversal_order[current_todo];
tsk_individual_table_get_row_unsafe(self, j, &individual);
for (i = 0; i < (tsk_id_t) individual.parents_length; i++) {
p = individual.parents[i];
if (p != TSK_NULL) {
incoming_edge_count[p]--;
if (count_descendants) {
num_descendants[p] += 1 + num_descendants[j];
}
if (incoming_edge_count[p] == 0) {
traversal_order[todo_insertion_point] = p;
todo_insertion_point++;
}
}
}
current_todo++;
}
/* Any edges left are parts of cycles */
for (i = 0; i < (tsk_id_t) num_individuals; i++) {
if (incoming_edge_count[i] > 0) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_PARENT_CYCLE);
goto out;
}
}
out:
tsk_safe_free(incoming_edge_count);
return ret;
}
int
tsk_table_collection_individual_topological_sort(
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t i, ret_id;
tsk_individual_table_t copy;
tsk_individual_t individual;
tsk_individual_table_t *individuals = &self->individuals;
tsk_node_table_t *nodes = &self->nodes;
tsk_size_t num_individuals = individuals->num_rows;
tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));
tsk_id_t *new_id_map = tsk_malloc(num_individuals * sizeof(*new_id_map));
if (new_id_map == NULL || traversal_order == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(new_id_map, 0xff, num_individuals * sizeof(*new_id_map));
ret = tsk_individual_table_copy(individuals, ©, 0);
if (ret != 0) {
goto out;
}
ret_id = tsk_table_collection_check_integrity(self, 0);
if (ret_id != 0) {
ret = (int) ret_id;
goto out;
}
ret = tsk_individual_table_clear(individuals);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_topological_sort(©, traversal_order, NULL);
if (ret != 0) {
goto out;
}
/* The sorted individuals are in reverse order */
for (i = (tsk_id_t) num_individuals - 1; i >= 0; i--) {
tsk_individual_table_get_row_unsafe(©, traversal_order[i], &individual);
ret_id = tsk_individual_table_add_row(individuals, individual.flags,
individual.location, individual.location_length, individual.parents,
individual.parents_length, individual.metadata, individual.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
new_id_map[traversal_order[i]] = ret_id;
}
/* Rewrite the parent ids */
for (i = 0; i < (tsk_id_t) individuals->parents_length; i++) {
if (individuals->parents[i] != TSK_NULL) {
individuals->parents[i] = new_id_map[individuals->parents[i]];
}
}
/* Rewrite the node individual ids */
for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {
if (nodes->individual[i] != TSK_NULL) {
nodes->individual[i] = new_id_map[nodes->individual[i]];
}
}
ret = 0;
out:
tsk_safe_free(traversal_order);
tsk_safe_free(new_id_map);
tsk_individual_table_free(©);
return ret;
}
static int
tsk_table_sorter_sort_individuals_canonical(tsk_table_sorter_t *self)
{
int ret = 0;
tsk_id_t ret_id, i, j, parent, mapped_parent;
tsk_individual_table_t *individuals = &self->tables->individuals;
tsk_node_table_t *nodes = &self->tables->nodes;
tsk_individual_table_t copy;
tsk_size_t num_individuals = individuals->num_rows;
individual_canonical_sort_t *sorted_individuals
= tsk_malloc(num_individuals * sizeof(*sorted_individuals));
tsk_id_t *individual_id_map
= tsk_malloc(num_individuals * sizeof(*individual_id_map));
tsk_size_t *num_descendants = tsk_malloc(num_individuals * sizeof(*num_descendants));
tsk_id_t *traversal_order = tsk_malloc(num_individuals * sizeof(*traversal_order));
if (individual_id_map == NULL || sorted_individuals == NULL
|| traversal_order == NULL || num_descendants == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_individual_table_copy(individuals, ©, 0);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_clear(individuals);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_topological_sort(©, traversal_order, num_descendants);
if (ret != 0) {
goto out;
}
for (i = 0; i < (tsk_id_t) num_individuals; i++) {
sorted_individuals[i].num_descendants = num_descendants[i];
sorted_individuals[i].first_node = (tsk_id_t) nodes->num_rows;
}
/* find first referring node */
for (j = 0; j < (tsk_id_t) nodes->num_rows; j++) {
if (nodes->individual[j] != TSK_NULL) {
sorted_individuals[nodes->individual[j]].first_node
= TSK_MIN(j, sorted_individuals[nodes->individual[j]].first_node);
}
}
for (j = 0; j < (tsk_id_t) num_individuals; j++) {
tsk_individual_table_get_row_unsafe(
©, (tsk_id_t) j, &sorted_individuals[j].ind);
}
qsort(sorted_individuals, (size_t) num_individuals, sizeof(*sorted_individuals),
cmp_individual_canonical);
/* Make a first pass through the sorted individuals to build the ID map. */
for (j = 0; j < (tsk_id_t) num_individuals; j++) {
individual_id_map[sorted_individuals[j].ind.id] = (tsk_id_t) j;
}
for (i = 0; i < (tsk_id_t) num_individuals; i++) {
for (j = 0; j < (tsk_id_t) sorted_individuals[i].ind.parents_length; j++) {
parent = sorted_individuals[i].ind.parents[j];
if (parent != TSK_NULL) {
mapped_parent = individual_id_map[parent];
sorted_individuals[i].ind.parents[j] = mapped_parent;
}
}
ret_id = tsk_individual_table_add_row(individuals,
sorted_individuals[i].ind.flags, sorted_individuals[i].ind.location,
sorted_individuals[i].ind.location_length, sorted_individuals[i].ind.parents,
sorted_individuals[i].ind.parents_length, sorted_individuals[i].ind.metadata,
sorted_individuals[i].ind.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
ret = 0;
/* remap individuals in the node table */
for (i = 0; i < (tsk_id_t) nodes->num_rows; i++) {
j = nodes->individual[i];
if (j != TSK_NULL) {
nodes->individual[i] = individual_id_map[j];
}
}
out:
tsk_safe_free(sorted_individuals);
tsk_safe_free(individual_id_map);
tsk_safe_free(traversal_order);
tsk_safe_free(num_descendants);
tsk_individual_table_free(©);
return ret;
}
int
tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
{
int ret = 0;
tsk_size_t edge_start = 0;
tsk_size_t migration_start = 0;
bool skip_sites = false;
bool skip_individuals = false;
if (start != NULL) {
if (start->edges > self->tables->edges.num_rows) {
ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);
goto out;
}
edge_start = start->edges;
if (start->migrations > self->tables->migrations.num_rows) {
ret = tsk_trace_error(TSK_ERR_MIGRATION_OUT_OF_BOUNDS);
goto out;
}
migration_start = start->migrations;
/* We only allow sites and mutations to be specified as a way to
* skip sorting them entirely. Both sites and mutations must be
* equal to the number of rows */
if (start->sites == self->tables->sites.num_rows
&& start->mutations == self->tables->mutations.num_rows) {
skip_sites = true;
} else if (start->sites != 0 || start->mutations != 0) {
ret = tsk_trace_error(TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
goto out;
}
}
/* The indexes will be invalidated, so drop them */
ret = tsk_table_collection_drop_index(self->tables, 0);
if (ret != 0) {
goto out;
}
if (self->sort_edges != NULL) {
ret = self->sort_edges(self, edge_start);
if (ret != 0) {
goto out;
}
}
/* Avoid calling sort_migrations in the common case when it's a no-op */
if (self->tables->migrations.num_rows > 0) {
ret = tsk_table_sorter_sort_migrations(self, migration_start);
if (ret != 0) {
goto out;
}
}
if (!skip_sites) {
ret = tsk_table_sorter_sort_sites(self);
if (ret != 0) {
goto out;
}
ret = self->sort_mutations(self);
if (ret != 0) {
goto out;
}
}
if (!skip_individuals && self->sort_individuals != NULL) {
ret = self->sort_individuals(self);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
int
tsk_table_sorter_init(
tsk_table_sorter_t *self, tsk_table_collection_t *tables, tsk_flags_t options)
{
int ret = 0;
tsk_id_t ret_id;
tsk_memset(self, 0, sizeof(tsk_table_sorter_t));
if (!(options & TSK_NO_CHECK_INTEGRITY)) {
ret_id = tsk_table_collection_check_integrity(tables, 0);
if (ret_id != 0) {
ret = (int) ret_id;
goto out;
}
}
self->tables = tables;
self->site_id_map = tsk_malloc(self->tables->sites.num_rows * sizeof(tsk_id_t));
if (self->site_id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* Set the sort_edges and sort_mutations methods to the default. */
self->sort_edges = tsk_table_sorter_sort_edges;
self->sort_mutations = tsk_table_sorter_sort_mutations;
/* Default sort doesn't touch individuals */
self->sort_individuals = NULL;
out:
return ret;
}
int
tsk_table_sorter_free(tsk_table_sorter_t *self)
{
tsk_safe_free(self->site_id_map);
return 0;
}
/*************************
* segment overlapper
*************************/
typedef struct _interval_list_t {
double left;
double right;
struct _interval_list_t *next;
} interval_list_t;
typedef struct _mutation_id_list_t {
tsk_id_t mutation;
struct _mutation_id_list_t *next;
} mutation_id_list_t;
typedef struct _tsk_segment_t {
double left;
double right;
struct _tsk_segment_t *next;
tsk_id_t node;
} tsk_segment_t;
/* segment overlap finding algorithm */
typedef struct {
/* The input segments. This buffer is sorted by the algorithm and we also
* assume that there is space for an extra element at the end */
tsk_segment_t *segments;
tsk_size_t num_segments;
tsk_size_t index;
tsk_size_t num_overlapping;
double left;
double right;
/* Output buffer */
tsk_size_t max_overlapping;
tsk_segment_t **overlapping;
} segment_overlapper_t;
typedef struct {
tsk_size_t num_samples;
tsk_flags_t options;
tsk_table_collection_t *tables;
/* Keep a copy of the input tables */
tsk_table_collection_t input_tables;
/* State for topology */
tsk_segment_t **ancestor_map_head;
tsk_segment_t **ancestor_map_tail;
/* Mapping of input node IDs to output node IDs. */
tsk_id_t *node_id_map;
bool *is_sample;
/* Segments for a particular parent that are processed together */
tsk_segment_t *segment_queue;
tsk_size_t segment_queue_size;
tsk_size_t max_segment_queue_size;
segment_overlapper_t segment_overlapper;
tsk_blkalloc_t segment_heap;
/* Buffer for output edges. For each child we keep a linked list of
* intervals, and also store the actual children that have been buffered. */
tsk_blkalloc_t interval_list_heap;
interval_list_t **child_edge_map_head;
interval_list_t **child_edge_map_tail;
tsk_id_t *buffered_children;
tsk_size_t num_buffered_children;
/* For each mutation, map its output node. */
tsk_id_t *mutation_node_map;
/* Map of input nodes to the list of input mutation IDs */
mutation_id_list_t **node_mutation_list_map_head;
mutation_id_list_t **node_mutation_list_map_tail;
mutation_id_list_t *node_mutation_list_mem;
/* When reducing topology, we need a map positions to their corresponding
* sites.*/
double *position_lookup;
int64_t edge_sort_offset;
} simplifier_t;
static int
cmp_segment(const void *a, const void *b)
{
const tsk_segment_t *ia = (const tsk_segment_t *) a;
const tsk_segment_t *ib = (const tsk_segment_t *) b;
int ret = (ia->left > ib->left) - (ia->left < ib->left);
/* Break ties using the node */
if (ret == 0) {
ret = (ia->node > ib->node) - (ia->node < ib->node);
}
return ret;
}
static int TSK_WARN_UNUSED
segment_overlapper_alloc(segment_overlapper_t *self)
{
int ret = 0;
tsk_memset(self, 0, sizeof(*self));
self->max_overlapping = 8; /* Making sure we call tsk_realloc in tests */
self->overlapping = tsk_malloc(self->max_overlapping * sizeof(*self->overlapping));
if (self->overlapping == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
static int
segment_overlapper_free(segment_overlapper_t *self)
{
tsk_safe_free(self->overlapping);
return 0;
}
/* Initialise the segment overlapper for use. Note that the segments
* array must have space for num_segments + 1 elements!
*/
static int TSK_WARN_UNUSED
segment_overlapper_start(
segment_overlapper_t *self, tsk_segment_t *segments, tsk_size_t num_segments)
{
int ret = 0;
tsk_segment_t *sentinel;
void *p;
if (self->max_overlapping < num_segments) {
self->max_overlapping = num_segments;
p = tsk_realloc(
self->overlapping, self->max_overlapping * sizeof(*self->overlapping));
if (p == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->overlapping = p;
}
self->segments = segments;
self->num_segments = num_segments;
self->index = 0;
self->num_overlapping = 0;
self->left = 0;
self->right = DBL_MAX;
/* Sort the segments in the buffer by left coordinate */
qsort(
self->segments, (size_t) self->num_segments, sizeof(tsk_segment_t), cmp_segment);
/* NOTE! We are assuming that there's space for another element on the end
* here. This is to insert a sentinel which simplifies the logic. */
sentinel = self->segments + self->num_segments;
sentinel->left = DBL_MAX;
out:
return ret;
}
static int TSK_WARN_UNUSED
segment_overlapper_next(segment_overlapper_t *self, double *left, double *right,
tsk_segment_t ***overlapping, tsk_size_t *num_overlapping)
{
int ret = 0;
tsk_size_t j, k;
tsk_size_t n = self->num_segments;
tsk_segment_t *S = self->segments;
if (self->index < n) {
self->left = self->right;
/* Remove any elements of X with right <= left */
k = 0;
for (j = 0; j < self->num_overlapping; j++) {
if (self->overlapping[j]->right > self->left) {
self->overlapping[k] = self->overlapping[j];
k++;
}
}
self->num_overlapping = k;
if (k == 0) {
self->left = S[self->index].left;
}
while (self->index < n && S[self->index].left == self->left) {
tsk_bug_assert(self->num_overlapping < self->max_overlapping);
self->overlapping[self->num_overlapping] = &S[self->index];
self->num_overlapping++;
self->index++;
}
self->index--;
self->right = S[self->index + 1].left;
for (j = 0; j < self->num_overlapping; j++) {
self->right = TSK_MIN(self->right, self->overlapping[j]->right);
}
tsk_bug_assert(self->left < self->right);
self->index++;
ret = 1;
} else {
self->left = self->right;
self->right = DBL_MAX;
k = 0;
for (j = 0; j < self->num_overlapping; j++) {
if (self->overlapping[j]->right > self->left) {
self->right = TSK_MIN(self->right, self->overlapping[j]->right);
self->overlapping[k] = self->overlapping[j];
k++;
}
}
self->num_overlapping = k;
if (k > 0) {
ret = 1;
}
}
*left = self->left;
*right = self->right;
*overlapping = self->overlapping;
*num_overlapping = self->num_overlapping;
return ret;
}
static int
cmp_node_id(const void *a, const void *b)
{
const tsk_id_t *ia = (const tsk_id_t *) a;
const tsk_id_t *ib = (const tsk_id_t *) b;
return (*ia > *ib) - (*ia < *ib);
}
/*************************
* Ancestor mapper
*************************/
/* NOTE: this struct shares a lot with the simplifier_t, mostly in
* terms of infrastructure for managing the list of intervals, saving
* edges etc. We should try to abstract the common functionality out
* into a separate class, which handles this.
*/
typedef struct {
tsk_id_t *samples;
tsk_size_t num_samples;
tsk_id_t *ancestors;
tsk_size_t num_ancestors;
tsk_table_collection_t *tables;
tsk_edge_table_t *result;
tsk_segment_t **ancestor_map_head;
tsk_segment_t **ancestor_map_tail;
bool *is_sample;
bool *is_ancestor;
tsk_segment_t *segment_queue;
tsk_size_t segment_queue_size;
tsk_size_t max_segment_queue_size;
segment_overlapper_t segment_overlapper;
tsk_blkalloc_t segment_heap;
tsk_blkalloc_t interval_list_heap;
interval_list_t **child_edge_map_head;
interval_list_t **child_edge_map_tail;
tsk_id_t *buffered_children;
tsk_size_t num_buffered_children;
double sequence_length;
double oldest_node_time;
} ancestor_mapper_t;
static tsk_segment_t *TSK_WARN_UNUSED
ancestor_mapper_alloc_segment(
ancestor_mapper_t *self, double left, double right, tsk_id_t node)
{
tsk_segment_t *seg = NULL;
seg = tsk_blkalloc_get(&self->segment_heap, sizeof(*seg));
if (seg == NULL) {
goto out;
}
seg->next = NULL;
seg->left = left;
seg->right = right;
seg->node = node;
out:
return seg;
}
static interval_list_t *TSK_WARN_UNUSED
ancestor_mapper_alloc_interval_list(ancestor_mapper_t *self, double left, double right)
{
interval_list_t *x = NULL;
x = tsk_blkalloc_get(&self->interval_list_heap, sizeof(*x));
if (x == NULL) {
goto out;
}
x->next = NULL;
x->left = left;
x->right = right;
out:
return x;
}
static int
ancestor_mapper_flush_edges(
ancestor_mapper_t *self, tsk_id_t parent, tsk_size_t *ret_num_edges)
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_id_t child;
interval_list_t *x;
tsk_size_t num_edges = 0;
qsort(self->buffered_children, (size_t) self->num_buffered_children,
sizeof(tsk_id_t), cmp_node_id);
for (j = 0; j < self->num_buffered_children; j++) {
child = self->buffered_children[j];
for (x = self->child_edge_map_head[child]; x != NULL; x = x->next) {
ret_id = tsk_edge_table_add_row(
self->result, x->left, x->right, parent, child, NULL, 0);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_edges++;
}
self->child_edge_map_head[child] = NULL;
self->child_edge_map_tail[child] = NULL;
}
self->num_buffered_children = 0;
*ret_num_edges = num_edges;
ret = tsk_blkalloc_reset(&self->interval_list_heap);
out:
return ret;
}
static int
ancestor_mapper_record_edge(
ancestor_mapper_t *self, double left, double right, tsk_id_t child)
{
int ret = 0;
interval_list_t *tail, *x;
tail = self->child_edge_map_tail[child];
if (tail == NULL) {
tsk_bug_assert(self->num_buffered_children < self->tables->nodes.num_rows);
self->buffered_children[self->num_buffered_children] = child;
self->num_buffered_children++;
x = ancestor_mapper_alloc_interval_list(self, left, right);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->child_edge_map_head[child] = x;
self->child_edge_map_tail[child] = x;
} else {
if (tail->right == left) {
tail->right = right;
} else {
x = ancestor_mapper_alloc_interval_list(self, left, right);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tail->next = x;
self->child_edge_map_tail[child] = x;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
ancestor_mapper_add_ancestry(ancestor_mapper_t *self, tsk_id_t input_id, double left,
double right, tsk_id_t output_id)
{
int ret = 0;
tsk_segment_t *tail = self->ancestor_map_tail[input_id];
tsk_segment_t *x;
tsk_bug_assert(left < right);
if (tail == NULL) {
x = ancestor_mapper_alloc_segment(self, left, right, output_id);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->ancestor_map_head[input_id] = x;
self->ancestor_map_tail[input_id] = x;
} else {
if (tail->right == left && tail->node == output_id) {
tail->right = right;
} else {
x = ancestor_mapper_alloc_segment(self, left, right, output_id);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tail->next = x;
self->ancestor_map_tail[input_id] = x;
}
}
out:
return ret;
}
static void
ancestor_mapper_find_oldest_node(ancestor_mapper_t *self)
{
const double *node_time = self->tables->nodes.time;
tsk_size_t j;
double max_time = -1;
for (j = 0; j < self->num_ancestors; j++) {
max_time = TSK_MAX(max_time, node_time[self->ancestors[j]]);
}
for (j = 0; j < self->num_samples; j++) {
max_time = TSK_MAX(max_time, node_time[self->samples[j]]);
}
self->oldest_node_time = max_time;
}
static int
ancestor_mapper_init_samples(ancestor_mapper_t *self, tsk_id_t *samples)
{
int ret = 0;
tsk_size_t j;
/* Go through the samples to check for errors. */
for (j = 0; j < self->num_samples; j++) {
if (samples[j] < 0 || samples[j] > (tsk_id_t) self->tables->nodes.num_rows) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (self->is_sample[samples[j]]) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
self->is_sample[samples[j]] = true;
ret = ancestor_mapper_add_ancestry(
self, samples[j], 0, self->tables->sequence_length, samples[j]);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
ancestor_mapper_init_ancestors(ancestor_mapper_t *self, tsk_id_t *ancestors)
{
int ret = 0;
tsk_size_t j;
/* Go through the samples to check for errors. */
for (j = 0; j < self->num_ancestors; j++) {
if (ancestors[j] < 0 || ancestors[j] > (tsk_id_t) self->tables->nodes.num_rows) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (self->is_ancestor[ancestors[j]]) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
self->is_ancestor[ancestors[j]] = true;
}
out:
return ret;
}
static int
ancestor_mapper_init(ancestor_mapper_t *self, tsk_id_t *samples, tsk_size_t num_samples,
tsk_id_t *ancestors, tsk_size_t num_ancestors, tsk_table_collection_t *tables,
tsk_edge_table_t *result)
{
int ret = 0;
tsk_size_t num_nodes;
tsk_memset(self, 0, sizeof(ancestor_mapper_t));
self->num_samples = num_samples;
self->num_ancestors = num_ancestors;
self->samples = samples;
self->ancestors = ancestors;
self->tables = tables;
self->result = result;
self->sequence_length = self->tables->sequence_length;
if (samples == NULL || num_samples == 0 || ancestors == NULL || num_ancestors == 0) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
/* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size
*/
ret = tsk_blkalloc_init(&self->segment_heap, 8192);
if (ret != 0) {
goto out;
}
ret = tsk_blkalloc_init(&self->interval_list_heap, 8192);
if (ret != 0) {
goto out;
}
ret = segment_overlapper_alloc(&self->segment_overlapper);
if (ret != 0) {
goto out;
}
num_nodes = tables->nodes.num_rows;
/* Make the maps and set the intial state */
self->ancestor_map_head = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));
self->ancestor_map_tail = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));
self->child_edge_map_head = tsk_calloc(num_nodes, sizeof(interval_list_t *));
self->child_edge_map_tail = tsk_calloc(num_nodes, sizeof(interval_list_t *));
self->buffered_children = tsk_malloc(num_nodes * sizeof(tsk_id_t));
self->is_sample = tsk_calloc(num_nodes, sizeof(bool));
self->is_ancestor = tsk_calloc(num_nodes, sizeof(bool));
self->max_segment_queue_size = 64;
self->segment_queue
= tsk_malloc(self->max_segment_queue_size * sizeof(tsk_segment_t));
if (self->ancestor_map_head == NULL || self->ancestor_map_tail == NULL
|| self->child_edge_map_head == NULL || self->child_edge_map_tail == NULL
|| self->is_sample == NULL || self->is_ancestor == NULL
|| self->segment_queue == NULL || self->buffered_children == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
// Clear memory.
ret = ancestor_mapper_init_samples(self, samples);
if (ret != 0) {
goto out;
}
ret = ancestor_mapper_init_ancestors(self, ancestors);
if (ret != 0) {
goto out;
}
ancestor_mapper_find_oldest_node(self);
ret = tsk_edge_table_clear(self->result);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
ancestor_mapper_free(ancestor_mapper_t *self)
{
tsk_blkalloc_free(&self->segment_heap);
tsk_blkalloc_free(&self->interval_list_heap);
segment_overlapper_free(&self->segment_overlapper);
tsk_safe_free(self->ancestor_map_head);
tsk_safe_free(self->ancestor_map_tail);
tsk_safe_free(self->child_edge_map_head);
tsk_safe_free(self->child_edge_map_tail);
tsk_safe_free(self->segment_queue);
tsk_safe_free(self->is_sample);
tsk_safe_free(self->is_ancestor);
tsk_safe_free(self->buffered_children);
return 0;
}
static int TSK_WARN_UNUSED
ancestor_mapper_enqueue_segment(
ancestor_mapper_t *self, double left, double right, tsk_id_t node)
{
int ret = 0;
tsk_segment_t *seg;
void *p;
tsk_bug_assert(left < right);
/* Make sure we always have room for one more segment in the queue so we
* can put a tail sentinel on it */
if (self->segment_queue_size == self->max_segment_queue_size - 1) {
self->max_segment_queue_size *= 2;
p = tsk_realloc(self->segment_queue,
self->max_segment_queue_size * sizeof(*self->segment_queue));
if (p == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->segment_queue = p;
}
seg = self->segment_queue + self->segment_queue_size;
seg->left = left;
seg->right = right;
seg->node = node;
self->segment_queue_size++;
out:
return ret;
}
static int TSK_WARN_UNUSED
ancestor_mapper_merge_ancestors(ancestor_mapper_t *self, tsk_id_t input_id)
{
int ret = 0;
tsk_segment_t **X, *x;
tsk_size_t j, num_overlapping, num_flushed_edges;
double left, right, prev_right;
bool is_sample = self->is_sample[input_id];
bool is_ancestor = self->is_ancestor[input_id];
if (is_sample) {
/* Free up the existing ancestry mapping. */
x = self->ancestor_map_tail[input_id];
tsk_bug_assert(x->left == 0 && x->right == self->sequence_length);
self->ancestor_map_head[input_id] = NULL;
self->ancestor_map_tail[input_id] = NULL;
}
ret = segment_overlapper_start(
&self->segment_overlapper, self->segment_queue, self->segment_queue_size);
if (ret != 0) {
goto out;
}
prev_right = 0;
while ((ret = segment_overlapper_next(
&self->segment_overlapper, &left, &right, &X, &num_overlapping))
== 1) {
tsk_bug_assert(left < right);
tsk_bug_assert(num_overlapping > 0);
if (is_ancestor || is_sample) {
for (j = 0; j < num_overlapping; j++) {
ret = ancestor_mapper_record_edge(self, left, right, X[j]->node);
if (ret != 0) {
goto out;
}
}
ret = ancestor_mapper_add_ancestry(self, input_id, left, right, input_id);
if (ret != 0) {
goto out;
}
if (is_sample && left != prev_right) {
/* Fill in any gaps in ancestry for the sample */
ret = ancestor_mapper_add_ancestry(
self, input_id, prev_right, left, input_id);
if (ret != 0) {
goto out;
}
}
} else {
for (j = 0; j < num_overlapping; j++) {
ret = ancestor_mapper_add_ancestry(
self, input_id, left, right, X[j]->node);
if (ret != 0) {
goto out;
}
}
}
prev_right = right;
}
if (is_sample && prev_right != self->tables->sequence_length) {
/* If a trailing gap exists in the sample ancestry, fill it in. */
ret = ancestor_mapper_add_ancestry(
self, input_id, prev_right, self->sequence_length, input_id);
if (ret != 0) {
goto out;
}
}
if (input_id != TSK_NULL) {
ret = ancestor_mapper_flush_edges(self, input_id, &num_flushed_edges);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
ancestor_mapper_process_parent_edges(
ancestor_mapper_t *self, tsk_id_t parent, tsk_size_t start, tsk_size_t end)
{
int ret = 0;
tsk_size_t j;
tsk_segment_t *x;
const tsk_edge_table_t *input_edges = &self->tables->edges;
tsk_id_t child;
double left, right;
/* Go through the edges and queue up ancestry segments for processing. */
self->segment_queue_size = 0;
for (j = start; j < end; j++) {
tsk_bug_assert(parent == input_edges->parent[j]);
child = input_edges->child[j];
left = input_edges->left[j];
right = input_edges->right[j];
// printf("C: %i, L: %f, R: %f\n", child, left, right);
for (x = self->ancestor_map_head[child]; x != NULL; x = x->next) {
if (x->right > left && right > x->left) {
ret = ancestor_mapper_enqueue_segment(
self, TSK_MAX(x->left, left), TSK_MIN(x->right, right), x->node);
if (ret != 0) {
goto out;
}
}
}
}
// We can now merge the ancestral segments for the parent
ret = ancestor_mapper_merge_ancestors(self, parent);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int TSK_WARN_UNUSED
ancestor_mapper_run(ancestor_mapper_t *self)
{
int ret = 0;
tsk_size_t j, start;
tsk_id_t parent, current_parent;
const tsk_edge_table_t *input_edges = &self->tables->edges;
tsk_size_t num_edges = input_edges->num_rows;
const double *node_time = self->tables->nodes.time;
bool early_exit = false;
if (num_edges > 0) {
start = 0;
current_parent = input_edges->parent[0];
for (j = 0; j < num_edges; j++) {
parent = input_edges->parent[j];
if (parent != current_parent) {
ret = ancestor_mapper_process_parent_edges(
self, current_parent, start, j);
if (ret != 0) {
goto out;
}
start = j;
current_parent = parent;
if (node_time[current_parent] > self->oldest_node_time) {
early_exit = true;
break;
}
}
}
if (!early_exit) {
/* If we didn't break out of the loop early, we need to still process
* the final parent */
ret = ancestor_mapper_process_parent_edges(self, current_parent, start, j);
if (ret != 0) {
goto out;
}
}
}
out:
return ret;
}
/*************************
* IBD Segments
*************************/
/* This maps two positive integers 0 <= a < b < N into the set
* {0, ..., N^2}. For us to overflow an int64, N would need to
* be > sqrt(2^63), ~3 * 10^9. The maximum value for a 32bit int
* is ~2 * 10^9, so this can't happen here, however it is
* theoretically possible with 64 bit IDs. It would require
* a *very* large node table --- assuming 24 bytes per row
* it would be at least 67GiB. To make sure this eventuality
* doesn't happen, we have a tsk_bug_assert in the
* tsk_identity_segments_init.
*/
static inline int64_t
pair_to_integer(tsk_id_t a, tsk_id_t b, tsk_size_t N)
{
tsk_id_t tmp;
if (a > b) {
tmp = a;
a = b;
b = tmp;
}
return ((int64_t) a) * (int64_t) N + (int64_t) b;
}
static inline void
integer_to_pair(int64_t index, tsk_size_t N, tsk_id_t *a, tsk_id_t *b)
{
*a = (tsk_id_t) (index / (int64_t) N);
*b = (tsk_id_t) (index % (int64_t) N);
}
static int64_t
tsk_identity_segments_get_key(
const tsk_identity_segments_t *self, tsk_id_t a, tsk_id_t b)
{
int64_t ret;
tsk_id_t N = (tsk_id_t) self->num_nodes;
if (a < 0 || b < 0 || a >= N || b >= N) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (a == b) {
ret = tsk_trace_error(TSK_ERR_SAME_NODES_IN_PAIR);
goto out;
}
ret = pair_to_integer(a, b, self->num_nodes);
out:
return ret;
}
static tsk_identity_segment_t *TSK_WARN_UNUSED
tsk_identity_segments_alloc_segment(
tsk_identity_segments_t *self, double left, double right, tsk_id_t node)
{
tsk_identity_segment_t *seg = tsk_blkalloc_get(&self->heap, sizeof(*seg));
if (seg == NULL) {
goto out;
}
tsk_bug_assert(left < right);
tsk_bug_assert(node >= 0 && node < (tsk_id_t) self->num_nodes);
seg->next = NULL;
seg->left = left;
seg->right = right;
seg->node = node;
out:
return seg;
}
static tsk_avl_node_int_t *
tsk_identity_segments_alloc_new_pair(tsk_identity_segments_t *self, int64_t key)
{
tsk_avl_node_int_t *avl_node = tsk_blkalloc_get(&self->heap, sizeof(*avl_node));
tsk_identity_segment_list_t *list = tsk_blkalloc_get(&self->heap, sizeof(*list));
if (avl_node == NULL || list == NULL) {
return NULL;
}
avl_node->key = key;
avl_node->value = list;
memset(list, 0, sizeof(*list));
return avl_node;
}
/* Deliberately not making this a part of the public interface for now,
* so we don't have to worry about the signature */
static int
tsk_identity_segments_init(
tsk_identity_segments_t *self, tsk_size_t num_nodes, tsk_flags_t options)
{
int ret = 0;
/* Make sure we don't overflow in the ID mapping. See the comments in pair_to_integer
* for details. */
double max_num_nodes = sqrt(1ULL << 63);
tsk_bug_assert((double) num_nodes < max_num_nodes);
memset(self, 0, sizeof(*self));
self->num_nodes = num_nodes;
/* Storing segments implies storing pairs */
if (options & TSK_IBD_STORE_SEGMENTS) {
self->store_pairs = true;
self->store_segments = true;
} else if (options & TSK_IBD_STORE_PAIRS) {
self->store_pairs = true;
}
ret = tsk_avl_tree_int_init(&self->pair_map);
if (ret != 0) {
goto out;
}
/* Allocate heap memory in 1MiB blocks */
ret = tsk_blkalloc_init(&self->heap, 1024 * 1024);
if (ret != 0) {
goto out;
}
out:
return ret;
}
void
tsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out)
{
tsk_avl_node_int_t **nodes = tsk_malloc(self->pair_map.size * sizeof(*nodes));
int64_t key;
tsk_identity_segment_list_t *value;
tsk_identity_segment_t *seg;
tsk_size_t j;
tsk_id_t a, b;
tsk_bug_assert(nodes != NULL);
fprintf(out, "===\nIBD Result\n===\n");
fprintf(out, "total_span = %f\n", self->total_span);
fprintf(out, "num_segments = %lld\n", (unsigned long long) self->num_segments);
fprintf(out, "store_pairs = %d\n", self->store_pairs);
fprintf(out, "store_segments = %d\n", self->store_segments);
if (self->store_pairs) {
fprintf(out, "num_keys = %d\n", (int) self->pair_map.size);
tsk_avl_tree_int_ordered_nodes(&self->pair_map, nodes);
for (j = 0; j < self->pair_map.size; j++) {
key = nodes[j]->key;
value = (tsk_identity_segment_list_t *) nodes[j]->value;
integer_to_pair(key, self->num_nodes, &a, &b);
fprintf(out, "%lld\t(%d,%d) n=%d total_span=%f\t", (long long) key, (int) a,
(int) b, (int) value->num_segments, value->total_span);
if (self->store_segments) {
for (seg = value->head; seg != NULL; seg = seg->next) {
fprintf(
out, "(%f, %f)->%d, ", seg->left, seg->right, (int) seg->node);
}
}
fprintf(out, "\n");
}
}
fprintf(out, "Segment memory\n");
tsk_blkalloc_print_state(&self->heap, out);
tsk_safe_free(nodes);
}
tsk_size_t
tsk_identity_segments_get_num_segments(const tsk_identity_segments_t *self)
{
return self->num_segments;
}
double
tsk_identity_segments_get_total_span(const tsk_identity_segments_t *self)
{
return self->total_span;
}
tsk_size_t
tsk_identity_segments_get_num_pairs(const tsk_identity_segments_t *self)
{
return self->pair_map.size;
}
/* Use an inorder traversal on the AVL tree to get the pairs in order.
* Recursion is safe here because it's a balanced tree (see the AVL tree
* code for notes on this).
*/
static int
get_keys_traverse(tsk_avl_node_int_t *node, int index, tsk_size_t N, tsk_id_t *pairs)
{
tsk_id_t a, b;
if (node == NULL) {
return index;
}
index = get_keys_traverse(node->llink, index, N, pairs);
integer_to_pair(node->key, N, &a, &b);
pairs[2 * index] = a;
pairs[2 * index + 1] = b;
return get_keys_traverse(node->rlink, index + 1, N, pairs);
}
int
tsk_identity_segments_get_keys(const tsk_identity_segments_t *self, tsk_id_t *pairs)
{
if (!self->store_pairs) {
return TSK_ERR_IBD_PAIRS_NOT_STORED;
}
get_keys_traverse(
tsk_avl_tree_int_get_root(&self->pair_map), 0, self->num_nodes, pairs);
return 0;
}
static int
get_items_traverse(tsk_avl_node_int_t *node, int index, tsk_size_t N, tsk_id_t *pairs,
tsk_identity_segment_list_t **lists)
{
tsk_id_t a, b;
if (node == NULL) {
return index;
}
index = get_items_traverse(node->llink, index, N, pairs, lists);
integer_to_pair(node->key, N, &a, &b);
pairs[2 * index] = a;
pairs[2 * index + 1] = b;
lists[index] = node->value;
return get_items_traverse(node->rlink, index + 1, N, pairs, lists);
}
int
tsk_identity_segments_get_items(const tsk_identity_segments_t *self, tsk_id_t *pairs,
tsk_identity_segment_list_t **lists)
{
if (!self->store_pairs) {
return TSK_ERR_IBD_PAIRS_NOT_STORED;
}
get_items_traverse(
tsk_avl_tree_int_get_root(&self->pair_map), 0, self->num_nodes, pairs, lists);
return 0;
}
int
tsk_identity_segments_free(tsk_identity_segments_t *self)
{
tsk_blkalloc_free(&self->heap);
tsk_avl_tree_int_free(&self->pair_map);
return 0;
}
static int TSK_WARN_UNUSED
tsk_identity_segments_update_pair(tsk_identity_segments_t *self, tsk_id_t a, tsk_id_t b,
double left, double right, tsk_id_t node)
{
int ret = 0;
tsk_identity_segment_t *x;
tsk_identity_segment_list_t *list;
/* skip the error checking here since this an internal API */
int64_t key = pair_to_integer(a, b, self->num_nodes);
tsk_avl_node_int_t *avl_node = tsk_avl_tree_int_search(&self->pair_map, key);
if (avl_node == NULL) {
/* We haven't seen this pair before */
avl_node = tsk_identity_segments_alloc_new_pair(self, key);
if (avl_node == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_avl_tree_int_insert(&self->pair_map, avl_node);
tsk_bug_assert(ret == 0);
}
list = (tsk_identity_segment_list_t *) avl_node->value;
list->num_segments++;
list->total_span += right - left;
if (self->store_segments) {
x = tsk_identity_segments_alloc_segment(self, left, right, node);
if (x == NULL) {
goto out;
}
if (list->tail == NULL) {
list->head = x;
list->tail = x;
} else {
list->tail->next = x;
list->tail = x;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_identity_segments_add_segment(tsk_identity_segments_t *self, tsk_id_t a, tsk_id_t b,
double left, double right, tsk_id_t node)
{
int ret = 0;
if (self->store_pairs) {
ret = tsk_identity_segments_update_pair(self, a, b, left, right, node);
if (ret != 0) {
goto out;
}
}
self->total_span += right - left;
self->num_segments++;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t sample_a,
tsk_id_t sample_b, tsk_identity_segment_list_t **ret_list)
{
int ret = 0;
int64_t key = tsk_identity_segments_get_key(self, sample_a, sample_b);
tsk_avl_node_int_t *avl_node;
if (key < 0) {
ret = (int) key;
goto out;
}
if (!self->store_pairs) {
ret = tsk_trace_error(TSK_ERR_IBD_PAIRS_NOT_STORED);
goto out;
}
avl_node = tsk_avl_tree_int_search(&self->pair_map, key);
*ret_list = NULL;
if (avl_node != NULL) {
*ret_list = (tsk_identity_segment_list_t *) avl_node->value;
}
out:
return ret;
}
/*************************
* IBD finder
*************************/
typedef struct {
tsk_identity_segments_t *result;
double min_span;
double max_time;
const tsk_table_collection_t *tables;
/* Maps nodes to their sample set IDs. Input samples map to set 0
* in the "within" case. */
tsk_id_t *sample_set_id;
/* True if we're finding IBD between sample sets, false otherwise. */
bool finding_between;
tsk_segment_t **ancestor_map_head;
tsk_segment_t **ancestor_map_tail;
tsk_segment_t *segment_queue;
tsk_size_t segment_queue_size;
tsk_size_t max_segment_queue_size;
tsk_blkalloc_t segment_heap;
} tsk_ibd_finder_t;
static tsk_segment_t *TSK_WARN_UNUSED
tsk_ibd_finder_alloc_segment(
tsk_ibd_finder_t *self, double left, double right, tsk_id_t node)
{
tsk_segment_t *seg = NULL;
seg = tsk_blkalloc_get(&self->segment_heap, sizeof(*seg));
if (seg == NULL) {
goto out;
}
seg->next = NULL;
seg->left = left;
seg->right = right;
seg->node = node;
out:
return seg;
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_add_ancestry(tsk_ibd_finder_t *self, tsk_id_t input_id, double left,
double right, tsk_id_t output_id)
{
int ret = 0;
tsk_segment_t *tail = self->ancestor_map_tail[input_id];
tsk_segment_t *x = NULL;
tsk_bug_assert(left < right);
x = tsk_ibd_finder_alloc_segment(self, left, right, output_id);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (tail == NULL) {
self->ancestor_map_head[input_id] = x;
self->ancestor_map_tail[input_id] = x;
} else {
tail->next = x;
self->ancestor_map_tail[input_id] = x;
}
out:
return ret;
}
static int
tsk_ibd_finder_init_samples_from_set(
tsk_ibd_finder_t *self, const tsk_id_t *samples, tsk_size_t num_samples)
{
int ret = 0;
tsk_size_t j;
tsk_id_t u;
for (j = 0; j < num_samples; j++) {
u = samples[j];
if (u < 0 || u > (tsk_id_t) self->tables->nodes.num_rows) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (self->sample_set_id[u] != TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
self->sample_set_id[u] = 0;
}
out:
return ret;
}
static void
tsk_ibd_finder_init_samples_from_nodes(tsk_ibd_finder_t *self)
{
tsk_id_t u;
const tsk_id_t num_nodes = (tsk_id_t) self->tables->nodes.num_rows;
const tsk_flags_t *restrict flags = self->tables->nodes.flags;
for (u = 0; u < num_nodes; u++) {
if (flags[u] & TSK_NODE_IS_SAMPLE) {
self->sample_set_id[u] = 0;
}
}
}
static int
tsk_ibd_finder_add_sample_ancestry(tsk_ibd_finder_t *self)
{
int ret = 0;
tsk_id_t u;
const tsk_id_t num_nodes = (tsk_id_t) self->tables->nodes.num_rows;
const double L = self->tables->sequence_length;
for (u = 0; u < num_nodes; u++) {
if (self->sample_set_id[u] != TSK_NULL) {
ret = tsk_ibd_finder_add_ancestry(self, u, 0, L, u);
if (ret != 0) {
goto out;
}
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_init(tsk_ibd_finder_t *self, const tsk_table_collection_t *tables,
tsk_identity_segments_t *result, double min_span, double max_time)
{
int ret = 0;
tsk_size_t num_nodes;
tsk_memset(self, 0, sizeof(tsk_ibd_finder_t));
if (min_span < 0) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (max_time < 0) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
self->tables = tables;
self->result = result;
self->max_time = max_time;
self->min_span = min_span;
ret = tsk_blkalloc_init(&self->segment_heap, 8192);
if (ret != 0) {
goto out;
}
num_nodes = tables->nodes.num_rows;
self->ancestor_map_head = tsk_calloc(num_nodes, sizeof(*self->ancestor_map_head));
self->ancestor_map_tail = tsk_calloc(num_nodes, sizeof(*self->ancestor_map_tail));
self->sample_set_id = tsk_malloc(num_nodes * sizeof(*self->sample_set_id));
self->segment_queue_size = 0;
self->max_segment_queue_size = 64;
self->segment_queue
= tsk_malloc(self->max_segment_queue_size * sizeof(*self->segment_queue));
if (self->ancestor_map_head == NULL || self->ancestor_map_tail == NULL
|| self->sample_set_id == NULL || self->segment_queue == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(self->sample_set_id, TSK_NULL, num_nodes * sizeof(*self->sample_set_id));
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_enqueue_segment(
tsk_ibd_finder_t *self, double left, double right, tsk_id_t node)
{
int ret = 0;
tsk_segment_t *seg;
void *p;
if ((right - left) > self->min_span) {
/* Make sure we always have room for one more segment in the queue so we
* can put a tail sentinel on it */
if (self->segment_queue_size == self->max_segment_queue_size - 1) {
self->max_segment_queue_size *= 2;
p = tsk_realloc(self->segment_queue,
self->max_segment_queue_size * sizeof(*self->segment_queue));
if (p == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->segment_queue = p;
}
seg = self->segment_queue + self->segment_queue_size;
seg->left = left;
seg->right = right;
seg->node = node;
self->segment_queue_size++;
}
out:
return ret;
}
static bool
tsk_ibd_finder_passes_filters(
const tsk_ibd_finder_t *self, tsk_id_t a, tsk_id_t b, double left, double right)
{
if (a == b) {
return false;
}
if ((right - left) <= self->min_span) {
return false;
}
if (self->finding_between) {
return self->sample_set_id[a] != self->sample_set_id[b];
} else {
return true;
}
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_record_ibd(tsk_ibd_finder_t *self, tsk_id_t parent)
{
int ret = 0;
tsk_size_t j;
tsk_segment_t *seg0, *seg1;
double left, right;
for (seg0 = self->ancestor_map_head[parent]; seg0 != NULL; seg0 = seg0->next) {
for (j = 0; j < self->segment_queue_size; j++) {
seg1 = &self->segment_queue[j];
left = TSK_MAX(seg0->left, seg1->left);
right = TSK_MIN(seg0->right, seg1->right);
if (tsk_ibd_finder_passes_filters(
self, seg0->node, seg1->node, left, right)) {
ret = tsk_identity_segments_add_segment(
self->result, seg0->node, seg1->node, left, right, parent);
if (ret != 0) {
goto out;
}
}
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_add_queued_ancestry(tsk_ibd_finder_t *self, tsk_id_t parent)
{
int ret = 0;
tsk_size_t j;
tsk_segment_t seg;
for (j = 0; j < self->segment_queue_size; j++) {
seg = self->segment_queue[j];
ret = tsk_ibd_finder_add_ancestry(self, parent, seg.left, seg.right, seg.node);
if (ret != 0) {
goto out;
}
}
self->segment_queue_size = 0;
out:
return ret;
}
static void
tsk_ibd_finder_print_state(tsk_ibd_finder_t *self, FILE *out)
{
tsk_size_t j;
tsk_segment_t *u = NULL;
fprintf(out, "--ibd-finder stats--\n");
fprintf(out, "max_time = %f\n", self->max_time);
fprintf(out, "min_span = %f\n", self->min_span);
fprintf(out, "finding_between = %d\n", self->finding_between);
fprintf(out, "===\nEdges\n===\n");
for (j = 0; j < self->tables->edges.num_rows; j++) {
fprintf(out, "L:%f, R:%f, P:%lld, C:%lld\n", self->tables->edges.left[j],
self->tables->edges.right[j], (long long) self->tables->edges.parent[j],
(long long) self->tables->edges.child[j]);
}
fprintf(out, "===\nNodes\n===\n");
for (j = 0; j < self->tables->nodes.num_rows; j++) {
fprintf(out, "ID:%d, Time:%f, Flag:%lld Sample set:%d\n", (int) j,
self->tables->nodes.time[j], (long long) self->tables->nodes.flags[j],
(int) self->sample_set_id[j]);
}
fprintf(out, "===\nAncestral map\n===\n");
for (j = 0; j < self->tables->nodes.num_rows; j++) {
fprintf(out, "Node %lld: ", (long long) j);
for (u = self->ancestor_map_head[j]; u != NULL; u = u->next) {
fprintf(out, "(%f,%f->%lld)", u->left, u->right, (long long) u->node);
}
fprintf(out, "\n");
}
tsk_identity_segments_print_state(self->result, out);
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_init_within(
tsk_ibd_finder_t *self, const tsk_id_t *samples, tsk_size_t num_samples)
{
int ret;
if (samples == NULL) {
tsk_ibd_finder_init_samples_from_nodes(self);
} else {
ret = tsk_ibd_finder_init_samples_from_set(self, samples, num_samples);
if (ret != 0) {
goto out;
}
}
self->finding_between = false;
ret = tsk_ibd_finder_add_sample_ancestry(self);
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_init_between(tsk_ibd_finder_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets)
{
int ret = 0;
tsk_size_t j, k, index;
tsk_id_t u;
index = 0;
for (j = 0; j < num_sample_sets; j++) {
for (k = 0; k < sample_set_sizes[j]; k++) {
u = sample_sets[index];
if (u < 0 || u > (tsk_id_t) self->tables->nodes.num_rows) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (self->sample_set_id[u] != TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
self->sample_set_id[u] = (tsk_id_t) j;
index++;
}
}
self->finding_between = true;
ret = tsk_ibd_finder_add_sample_ancestry(self);
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_ibd_finder_run(tsk_ibd_finder_t *self)
{
const tsk_edge_table_t *input_edges = &self->tables->edges;
const tsk_size_t num_edges = input_edges->num_rows;
int ret = 0;
tsk_size_t j;
tsk_segment_t *s;
tsk_id_t parent, child;
double left, right, intvl_l, intvl_r, time;
for (j = 0; j < num_edges; j++) {
parent = input_edges->parent[j];
left = input_edges->left[j];
right = input_edges->right[j];
child = input_edges->child[j];
time = self->tables->nodes.time[parent];
if (time > self->max_time) {
break;
}
for (s = self->ancestor_map_head[child]; s != NULL; s = s->next) {
intvl_l = TSK_MAX(left, s->left);
intvl_r = TSK_MIN(right, s->right);
ret = tsk_ibd_finder_enqueue_segment(self, intvl_l, intvl_r, s->node);
if (ret != 0) {
goto out;
}
}
ret = tsk_ibd_finder_record_ibd(self, parent);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_add_queued_ancestry(self, parent);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
tsk_ibd_finder_free(tsk_ibd_finder_t *self)
{
tsk_blkalloc_free(&self->segment_heap);
tsk_safe_free(self->sample_set_id);
tsk_safe_free(self->ancestor_map_head);
tsk_safe_free(self->ancestor_map_tail);
tsk_safe_free(self->segment_queue);
return 0;
}
/*************************
* simplifier
*************************/
static void
simplifier_check_state(simplifier_t *self)
{
tsk_size_t j, k;
tsk_segment_t *u;
mutation_id_list_t *list_node;
tsk_id_t site;
interval_list_t *int_list;
tsk_id_t child;
double position, last_position;
bool found;
tsk_size_t num_intervals;
for (j = 0; j < self->input_tables.nodes.num_rows; j++) {
tsk_bug_assert((self->ancestor_map_head[j] == NULL)
== (self->ancestor_map_tail[j] == NULL));
for (u = self->ancestor_map_head[j]; u != NULL; u = u->next) {
tsk_bug_assert(u->left < u->right);
if (u->next != NULL) {
tsk_bug_assert(u->right <= u->next->left);
if (u->right == u->next->left) {
tsk_bug_assert(u->node != u->next->node);
}
} else {
tsk_bug_assert(u == self->ancestor_map_tail[j]);
}
}
}
for (j = 0; j < self->segment_queue_size; j++) {
tsk_bug_assert(self->segment_queue[j].left < self->segment_queue[j].right);
}
for (j = 0; j < self->input_tables.nodes.num_rows; j++) {
last_position = -1;
for (list_node = self->node_mutation_list_map_head[j]; list_node != NULL;
list_node = list_node->next) {
tsk_bug_assert(
self->input_tables.mutations.node[list_node->mutation] == (tsk_id_t) j);
site = self->input_tables.mutations.site[list_node->mutation];
position = self->input_tables.sites.position[site];
tsk_bug_assert(last_position <= position);
last_position = position;
}
}
/* check the buffered edges */
for (j = 0; j < self->input_tables.nodes.num_rows; j++) {
tsk_bug_assert((self->child_edge_map_head[j] == NULL)
== (self->child_edge_map_tail[j] == NULL));
if (self->child_edge_map_head[j] != NULL) {
/* Make sure that the child is in our list */
found = false;
for (k = 0; k < self->num_buffered_children; k++) {
if (self->buffered_children[k] == (tsk_id_t) j) {
found = true;
break;
}
}
tsk_bug_assert(found);
}
}
num_intervals = 0;
for (j = 0; j < self->num_buffered_children; j++) {
child = self->buffered_children[j];
tsk_bug_assert(self->child_edge_map_head[child] != NULL);
for (int_list = self->child_edge_map_head[child]; int_list != NULL;
int_list = int_list->next) {
tsk_bug_assert(int_list->left < int_list->right);
if (int_list->next != NULL) {
tsk_bug_assert(int_list->right < int_list->next->left);
}
num_intervals++;
}
}
tsk_bug_assert(
num_intervals
== self->interval_list_heap.total_allocated / (sizeof(interval_list_t)));
}
static void
print_segment_chain(tsk_segment_t *head, FILE *out)
{
tsk_segment_t *u;
for (u = head; u != NULL; u = u->next) {
fprintf(out, "(%f,%f->%lld)", u->left, u->right, (long long) u->node);
}
}
static void
simplifier_print_state(simplifier_t *self, FILE *out)
{
tsk_size_t j;
tsk_segment_t *u;
mutation_id_list_t *list_node;
interval_list_t *int_list;
tsk_id_t child;
fprintf(out, "--simplifier state--\n");
fprintf(out, "options:\n");
fprintf(out, "\tfilter_unreferenced_sites : %d\n",
!!(self->options & TSK_SIMPLIFY_FILTER_SITES));
fprintf(out, "\tno_filter_nodes : %d\n",
!!(self->options & TSK_SIMPLIFY_NO_FILTER_NODES));
fprintf(out, "\treduce_to_site_topology : %d\n",
!!(self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY));
fprintf(out, "\tkeep_unary : %d\n",
!!(self->options & TSK_SIMPLIFY_KEEP_UNARY));
fprintf(out, "\tkeep_input_roots : %d\n",
!!(self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS));
fprintf(out, "\tkeep_unary_in_individuals : %d\n",
!!(self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS));
fprintf(out, "===\nInput tables\n==\n");
tsk_table_collection_print_state(&self->input_tables, out);
fprintf(out, "===\nOutput tables\n==\n");
tsk_table_collection_print_state(self->tables, out);
fprintf(out, "===\nmemory heaps\n==\n");
fprintf(out, "segment_heap:\n");
tsk_blkalloc_print_state(&self->segment_heap, out);
fprintf(out, "interval_list_heap:\n");
tsk_blkalloc_print_state(&self->interval_list_heap, out);
fprintf(out, "===\nancestors\n==\n");
for (j = 0; j < self->input_tables.nodes.num_rows; j++) {
fprintf(out, "%lld:\t", (long long) j);
print_segment_chain(self->ancestor_map_head[j], out);
fprintf(out, "\n");
}
fprintf(out, "===\nnode_id map (input->output)\n==\n");
for (j = 0; j < self->input_tables.nodes.num_rows; j++) {
if (self->node_id_map[j] != TSK_NULL) {
fprintf(
out, "%lld->%lld\n", (long long) j, (long long) self->node_id_map[j]);
}
}
fprintf(out, "===\nsegment queue\n==\n");
for (j = 0; j < self->segment_queue_size; j++) {
u = &self->segment_queue[j];
fprintf(out, "(%f,%f->%lld)", u->left, u->right, (long long) u->node);
fprintf(out, "\n");
}
fprintf(out, "===\nbuffered children\n==\n");
for (j = 0; j < self->num_buffered_children; j++) {
child = self->buffered_children[j];
fprintf(out, "%lld -> ", (long long) j);
for (int_list = self->child_edge_map_head[child]; int_list != NULL;
int_list = int_list->next) {
fprintf(out, "(%f, %f), ", int_list->left, int_list->right);
}
fprintf(out, "\n");
}
fprintf(out, "===\nmutation node map\n==\n");
for (j = 0; j < self->input_tables.mutations.num_rows; j++) {
fprintf(out, "%lld\t-> %lld\n", (long long) j,
(long long) self->mutation_node_map[j]);
}
fprintf(out, "===\nnode mutation id list map\n==\n");
for (j = 0; j < self->input_tables.nodes.num_rows; j++) {
if (self->node_mutation_list_map_head[j] != NULL) {
fprintf(out, "%lld\t-> [", (long long) j);
for (list_node = self->node_mutation_list_map_head[j]; list_node != NULL;
list_node = list_node->next) {
fprintf(out, "%lld,", (long long) list_node->mutation);
}
fprintf(out, "]\n");
}
}
if (!!(self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY)) {
fprintf(out, "===\nposition_lookup\n==\n");
for (j = 0; j < self->input_tables.sites.num_rows + 2; j++) {
fprintf(out, "%lld\t-> %f\n", (long long) j, self->position_lookup[j]);
}
}
simplifier_check_state(self);
}
static tsk_segment_t *TSK_WARN_UNUSED
simplifier_alloc_segment(simplifier_t *self, double left, double right, tsk_id_t node)
{
tsk_segment_t *seg = NULL;
seg = tsk_blkalloc_get(&self->segment_heap, sizeof(*seg));
if (seg == NULL) {
goto out;
}
seg->next = NULL;
seg->left = left;
seg->right = right;
seg->node = node;
out:
return seg;
}
static interval_list_t *TSK_WARN_UNUSED
simplifier_alloc_interval_list(simplifier_t *self, double left, double right)
{
interval_list_t *x = NULL;
x = tsk_blkalloc_get(&self->interval_list_heap, sizeof(*x));
if (x == NULL) {
goto out;
}
x->next = NULL;
x->left = left;
x->right = right;
out:
return x;
}
/* Add a new node to the output node table corresponding to the specified input id.
* Returns the new ID. */
static tsk_id_t TSK_WARN_UNUSED
simplifier_record_node(simplifier_t *self, tsk_id_t input_id)
{
tsk_node_t node;
bool update_flags = !(self->options & TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS);
tsk_node_table_get_row_unsafe(&self->input_tables.nodes, (tsk_id_t) input_id, &node);
if (update_flags) {
/* Zero out the sample bit */
node.flags &= (tsk_flags_t) ~TSK_NODE_IS_SAMPLE;
if (self->is_sample[input_id]) {
node.flags |= TSK_NODE_IS_SAMPLE;
}
}
self->node_id_map[input_id] = (tsk_id_t) self->tables->nodes.num_rows;
return tsk_node_table_add_row(&self->tables->nodes, node.flags, node.time,
node.population, node.individual, node.metadata, node.metadata_length);
}
/* Remove the mapping for the last recorded node. */
static int
simplifier_rewind_node(simplifier_t *self, tsk_id_t input_id, tsk_id_t output_id)
{
self->node_id_map[input_id] = TSK_NULL;
return tsk_node_table_truncate(&self->tables->nodes, (tsk_size_t) output_id);
}
static int
simplifier_flush_edges(simplifier_t *self, tsk_id_t parent, tsk_size_t *ret_num_edges)
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_id_t child;
interval_list_t *x;
tsk_size_t num_edges = 0;
qsort(self->buffered_children, (size_t) self->num_buffered_children,
sizeof(tsk_id_t), cmp_node_id);
for (j = 0; j < self->num_buffered_children; j++) {
child = self->buffered_children[j];
for (x = self->child_edge_map_head[child]; x != NULL; x = x->next) {
ret_id = tsk_edge_table_add_row(
&self->tables->edges, x->left, x->right, parent, child, NULL, 0);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
num_edges++;
}
self->child_edge_map_head[child] = NULL;
self->child_edge_map_tail[child] = NULL;
}
self->num_buffered_children = 0;
*ret_num_edges = num_edges;
ret = tsk_blkalloc_reset(&self->interval_list_heap);
out:
return ret;
}
/* When we are reducing topology down to what is visible at the sites we need a
* lookup table to find the closest site position for each edge. We do this with
* a sorted array and binary search */
static int
simplifier_init_position_lookup(simplifier_t *self)
{
int ret = 0;
tsk_size_t num_sites = self->input_tables.sites.num_rows;
self->position_lookup = tsk_malloc((num_sites + 2) * sizeof(*self->position_lookup));
if (self->position_lookup == NULL) {
goto out;
}
self->position_lookup[0] = 0;
self->position_lookup[num_sites + 1] = self->input_tables.sequence_length;
tsk_memcpy(self->position_lookup + 1, self->input_tables.sites.position,
num_sites * sizeof(double));
out:
return ret;
}
/*
* Find the smallest site position index greater than or equal to left
* and right, i.e., slide each endpoint of an interval to the right
* until they hit a site position. If both left and right map to the
* the same position then we discard this edge. We also discard an
* edge if left = 0 and right is less than the first site position.
*/
static bool
simplifier_map_reduced_coordinates(simplifier_t *self, double *left, double *right)
{
double *X = self->position_lookup;
tsk_size_t N = self->input_tables.sites.num_rows + 2;
tsk_size_t left_index, right_index;
bool skip = false;
left_index = tsk_search_sorted(X, N, *left);
right_index = tsk_search_sorted(X, N, *right);
if (left_index == right_index || (left_index == 0 && right_index == 1)) {
skip = true;
} else {
/* Remap back to zero if the left end maps to the first site. */
if (left_index == 1) {
left_index = 0;
}
*left = X[left_index];
*right = X[right_index];
}
return skip;
}
/* Records the specified edge for the current parent by buffering it */
static int
simplifier_record_edge(simplifier_t *self, double left, double right, tsk_id_t child)
{
int ret = 0;
interval_list_t *tail, *x;
bool skip;
if (self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY) {
skip = simplifier_map_reduced_coordinates(self, &left, &right);
/* NOTE: we exit early here when reduce_coordindates has told us to
* skip this edge, as it is not visible in the reduced tree sequence */
if (skip) {
goto out;
}
}
tail = self->child_edge_map_tail[child];
if (tail == NULL) {
tsk_bug_assert(self->num_buffered_children < self->input_tables.nodes.num_rows);
self->buffered_children[self->num_buffered_children] = child;
self->num_buffered_children++;
x = simplifier_alloc_interval_list(self, left, right);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->child_edge_map_head[child] = x;
self->child_edge_map_tail[child] = x;
} else {
if (tail->right == left) {
tail->right = right;
} else {
x = simplifier_alloc_interval_list(self, left, right);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tail->next = x;
self->child_edge_map_tail[child] = x;
}
}
out:
return ret;
}
static int
simplifier_init_sites(simplifier_t *self)
{
int ret = 0;
tsk_id_t node;
mutation_id_list_t *list_node;
tsk_size_t j;
self->mutation_node_map
= tsk_calloc(self->input_tables.mutations.num_rows, sizeof(tsk_id_t));
self->node_mutation_list_mem
= tsk_malloc(self->input_tables.mutations.num_rows * sizeof(mutation_id_list_t));
self->node_mutation_list_map_head
= tsk_calloc(self->input_tables.nodes.num_rows, sizeof(mutation_id_list_t *));
self->node_mutation_list_map_tail
= tsk_calloc(self->input_tables.nodes.num_rows, sizeof(mutation_id_list_t *));
if (self->mutation_node_map == NULL || self->node_mutation_list_mem == NULL
|| self->node_mutation_list_map_head == NULL
|| self->node_mutation_list_map_tail == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(self->mutation_node_map, 0xff,
self->input_tables.mutations.num_rows * sizeof(tsk_id_t));
for (j = 0; j < self->input_tables.mutations.num_rows; j++) {
node = self->input_tables.mutations.node[j];
list_node = self->node_mutation_list_mem + j;
list_node->mutation = (tsk_id_t) j;
list_node->next = NULL;
if (self->node_mutation_list_map_head[node] == NULL) {
self->node_mutation_list_map_head[node] = list_node;
} else {
self->node_mutation_list_map_tail[node]->next = list_node;
}
self->node_mutation_list_map_tail[node] = list_node;
}
out:
return ret;
}
static void
simplifier_map_mutations(
simplifier_t *self, tsk_id_t input_id, double left, double right, tsk_id_t output_id)
{
mutation_id_list_t *m_node;
double position;
tsk_id_t site;
m_node = self->node_mutation_list_map_head[input_id];
while (m_node != NULL) {
site = self->input_tables.mutations.site[m_node->mutation];
position = self->input_tables.sites.position[site];
if (left <= position && position < right) {
self->mutation_node_map[m_node->mutation] = output_id;
}
m_node = m_node->next;
}
}
static int TSK_WARN_UNUSED
simplifier_add_ancestry(
simplifier_t *self, tsk_id_t input_id, double left, double right, tsk_id_t output_id)
{
int ret = 0;
tsk_segment_t *tail = self->ancestor_map_tail[input_id];
tsk_segment_t *x;
tsk_bug_assert(left < right);
if (tail == NULL) {
x = simplifier_alloc_segment(self, left, right, output_id);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->ancestor_map_head[input_id] = x;
self->ancestor_map_tail[input_id] = x;
} else {
if (tail->right == left && tail->node == output_id) {
tail->right = right;
} else {
x = simplifier_alloc_segment(self, left, right, output_id);
if (x == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tail->next = x;
self->ancestor_map_tail[input_id] = x;
}
}
simplifier_map_mutations(self, input_id, left, right, output_id);
out:
return ret;
}
/* Sets up the internal working copies of the various tables, as needed
* depending on the specified options. */
static int
simplifier_init_tables(simplifier_t *self)
{
int ret;
bool filter_nodes = !(self->options & TSK_SIMPLIFY_NO_FILTER_NODES);
bool filter_populations = self->options & TSK_SIMPLIFY_FILTER_POPULATIONS;
bool filter_individuals = self->options & TSK_SIMPLIFY_FILTER_INDIVIDUALS;
bool filter_sites = self->options & TSK_SIMPLIFY_FILTER_SITES;
tsk_bookmark_t rows_to_retain;
/* NOTE: this is a bit inefficient here as we're taking copies of
* the tables even in the no-filter case where the original tables
* won't be touched (beyond references to external tables that may
* need updating). Future versions may do something a bit more
* complicated like temporarily stealing the pointers to the
* underlying column memory in these tables, and then being careful
* not to free the table at the end.
*/
ret = tsk_table_collection_copy(self->tables, &self->input_tables, 0);
if (ret != 0) {
goto out;
}
memset(&rows_to_retain, 0, sizeof(rows_to_retain));
rows_to_retain.provenances = self->tables->provenances.num_rows;
if (!filter_nodes) {
rows_to_retain.nodes = self->tables->nodes.num_rows;
}
if (!filter_populations) {
rows_to_retain.populations = self->tables->populations.num_rows;
}
if (!filter_individuals) {
rows_to_retain.individuals = self->tables->individuals.num_rows;
}
if (!filter_sites) {
rows_to_retain.sites = self->tables->sites.num_rows;
}
ret = tsk_table_collection_truncate(self->tables, &rows_to_retain);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
simplifier_init_nodes(simplifier_t *self, const tsk_id_t *samples)
{
int ret = 0;
tsk_id_t node_id;
tsk_size_t j;
const tsk_size_t num_nodes = self->input_tables.nodes.num_rows;
bool filter_nodes = !(self->options & TSK_SIMPLIFY_NO_FILTER_NODES);
bool update_flags = !(self->options & TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS);
tsk_flags_t *node_flags = self->tables->nodes.flags;
tsk_id_t *node_id_map = self->node_id_map;
if (filter_nodes) {
tsk_bug_assert(self->tables->nodes.num_rows == 0);
/* The node table has been cleared. Add nodes for the samples. */
for (j = 0; j < self->num_samples; j++) {
node_id = simplifier_record_node(self, samples[j]);
if (node_id < 0) {
ret = (int) node_id;
goto out;
}
}
} else {
tsk_bug_assert(self->tables->nodes.num_rows == num_nodes);
if (update_flags) {
for (j = 0; j < num_nodes; j++) {
/* Reset the sample flags */
node_flags[j] &= (tsk_flags_t) ~TSK_NODE_IS_SAMPLE;
if (self->is_sample[j]) {
node_flags[j] |= TSK_NODE_IS_SAMPLE;
}
}
}
for (j = 0; j < num_nodes; j++) {
node_id_map[j] = (tsk_id_t) j;
}
}
/* Add the initial ancestry */
for (j = 0; j < self->num_samples; j++) {
node_id = samples[j];
ret = simplifier_add_ancestry(self, node_id, 0,
self->input_tables.sequence_length, self->node_id_map[node_id]);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
simplifier_init(simplifier_t *self, const tsk_id_t *samples, tsk_size_t num_samples,
tsk_table_collection_t *tables, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
tsk_id_t ret_id;
tsk_size_t num_nodes;
tsk_memset(self, 0, sizeof(simplifier_t));
self->num_samples = num_samples;
self->options = options;
self->tables = tables;
/* TODO we can add a flag to skip these checks for when we know they are
* unnecessary */
/* TODO Current unit tests require TSK_CHECK_SITE_DUPLICATES but it's
* debateable whether we need it. If we remove, we definitely need explicit
* tests to ensure we're doing sensible things with duplicate sites.
* (Particularly, re TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY.) */
ret_id = tsk_table_collection_check_integrity(tables,
TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING | TSK_CHECK_SITE_DUPLICATES);
if (ret_id != 0) {
ret = (int) ret_id;
goto out;
}
/* Allocate the heaps used for small objects-> Assuming 8K is a good chunk size
*/
ret = tsk_blkalloc_init(&self->segment_heap, 8192);
if (ret != 0) {
goto out;
}
ret = tsk_blkalloc_init(&self->interval_list_heap, 8192);
if (ret != 0) {
goto out;
}
ret = segment_overlapper_alloc(&self->segment_overlapper);
if (ret != 0) {
goto out;
}
num_nodes = tables->nodes.num_rows;
/* Make the maps and set the intial state */
self->ancestor_map_head = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));
self->ancestor_map_tail = tsk_calloc(num_nodes, sizeof(tsk_segment_t *));
self->child_edge_map_head = tsk_calloc(num_nodes, sizeof(interval_list_t *));
self->child_edge_map_tail = tsk_calloc(num_nodes, sizeof(interval_list_t *));
self->node_id_map = tsk_malloc(num_nodes * sizeof(tsk_id_t));
self->buffered_children = tsk_malloc(num_nodes * sizeof(tsk_id_t));
self->is_sample = tsk_calloc(num_nodes, sizeof(bool));
self->max_segment_queue_size = 64;
self->segment_queue
= tsk_malloc(self->max_segment_queue_size * sizeof(tsk_segment_t));
if (self->ancestor_map_head == NULL || self->ancestor_map_tail == NULL
|| self->child_edge_map_head == NULL || self->child_edge_map_tail == NULL
|| self->node_id_map == NULL || self->is_sample == NULL
|| self->segment_queue == NULL || self->buffered_children == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* Go through the samples to check for errors before we clear the tables. */
for (j = 0; j < self->num_samples; j++) {
if (samples[j] < 0 || samples[j] >= (tsk_id_t) num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (self->is_sample[samples[j]]) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
self->is_sample[samples[j]] = true;
}
tsk_memset(self->node_id_map, 0xff, num_nodes * sizeof(tsk_id_t));
ret = simplifier_init_tables(self);
if (ret != 0) {
goto out;
}
ret = simplifier_init_sites(self);
if (ret != 0) {
goto out;
}
ret = simplifier_init_nodes(self, samples);
if (ret != 0) {
goto out;
}
if (self->options & TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY) {
ret = simplifier_init_position_lookup(self);
if (ret != 0) {
goto out;
}
}
self->edge_sort_offset = TSK_NULL;
out:
return ret;
}
static int
simplifier_free(simplifier_t *self)
{
tsk_table_collection_free(&self->input_tables);
tsk_blkalloc_free(&self->segment_heap);
tsk_blkalloc_free(&self->interval_list_heap);
segment_overlapper_free(&self->segment_overlapper);
tsk_safe_free(self->ancestor_map_head);
tsk_safe_free(self->ancestor_map_tail);
tsk_safe_free(self->child_edge_map_head);
tsk_safe_free(self->child_edge_map_tail);
tsk_safe_free(self->node_id_map);
tsk_safe_free(self->segment_queue);
tsk_safe_free(self->is_sample);
tsk_safe_free(self->mutation_node_map);
tsk_safe_free(self->node_mutation_list_mem);
tsk_safe_free(self->node_mutation_list_map_head);
tsk_safe_free(self->node_mutation_list_map_tail);
tsk_safe_free(self->buffered_children);
tsk_safe_free(self->position_lookup);
return 0;
}
static int TSK_WARN_UNUSED
simplifier_enqueue_segment(simplifier_t *self, double left, double right, tsk_id_t node)
{
int ret = 0;
tsk_segment_t *seg;
void *p;
tsk_bug_assert(left < right);
/* Make sure we always have room for one more segment in the queue so we
* can put a tail sentinel on it */
if (self->segment_queue_size == self->max_segment_queue_size - 1) {
self->max_segment_queue_size *= 2;
p = tsk_realloc(self->segment_queue,
self->max_segment_queue_size * sizeof(*self->segment_queue));
if (p == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
self->segment_queue = p;
}
seg = self->segment_queue + self->segment_queue_size;
seg->left = left;
seg->right = right;
seg->node = node;
self->segment_queue_size++;
out:
return ret;
}
static int TSK_WARN_UNUSED
simplifier_merge_ancestors(simplifier_t *self, tsk_id_t input_id)
{
int ret = 0;
tsk_segment_t **X, *x;
tsk_size_t j, num_overlapping, num_flushed_edges;
double left, right, prev_right;
tsk_id_t ancestry_node;
tsk_id_t output_id = self->node_id_map[input_id];
bool is_sample = self->is_sample[input_id];
bool filter_nodes = !(self->options & TSK_SIMPLIFY_NO_FILTER_NODES);
bool keep_unary = self->options & TSK_SIMPLIFY_KEEP_UNARY;
if ((self->options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)
&& (self->input_tables.nodes.individual[input_id] != TSK_NULL)) {
keep_unary = true;
}
if (is_sample) {
/* Free up the existing ancestry mapping. */
x = self->ancestor_map_tail[input_id];
tsk_bug_assert(x->left == 0 && x->right == self->tables->sequence_length);
self->ancestor_map_head[input_id] = NULL;
self->ancestor_map_tail[input_id] = NULL;
}
ret = segment_overlapper_start(
&self->segment_overlapper, self->segment_queue, self->segment_queue_size);
if (ret != 0) {
goto out;
}
prev_right = 0;
while ((ret = segment_overlapper_next(
&self->segment_overlapper, &left, &right, &X, &num_overlapping))
== 1) {
tsk_bug_assert(left < right);
tsk_bug_assert(num_overlapping > 0);
if (num_overlapping == 1) {
ancestry_node = X[0]->node;
if (is_sample) {
ret = simplifier_record_edge(self, left, right, ancestry_node);
if (ret != 0) {
goto out;
}
ancestry_node = output_id;
} else if (keep_unary) {
if (output_id == TSK_NULL) {
output_id = simplifier_record_node(self, input_id);
}
ret = simplifier_record_edge(self, left, right, ancestry_node);
if (ret != 0) {
goto out;
}
}
} else {
if (output_id == TSK_NULL) {
output_id = simplifier_record_node(self, input_id);
if (output_id < 0) {
ret = (int) output_id;
goto out;
}
}
ancestry_node = output_id;
for (j = 0; j < num_overlapping; j++) {
ret = simplifier_record_edge(self, left, right, X[j]->node);
if (ret != 0) {
goto out;
}
}
}
if (is_sample && left != prev_right) {
/* Fill in any gaps in ancestry for the sample */
ret = simplifier_add_ancestry(self, input_id, prev_right, left, output_id);
if (ret != 0) {
goto out;
}
}
if (keep_unary) {
ancestry_node = output_id;
}
ret = simplifier_add_ancestry(self, input_id, left, right, ancestry_node);
if (ret != 0) {
goto out;
}
prev_right = right;
}
/* Check for errors occuring in the loop condition */
if (ret != 0) {
goto out;
}
if (is_sample && prev_right != self->tables->sequence_length) {
/* If a trailing gap exists in the sample ancestry, fill it in. */
ret = simplifier_add_ancestry(
self, input_id, prev_right, self->tables->sequence_length, output_id);
if (ret != 0) {
goto out;
}
}
if (output_id != TSK_NULL) {
ret = simplifier_flush_edges(self, output_id, &num_flushed_edges);
if (ret != 0) {
goto out;
}
if (filter_nodes && (num_flushed_edges == 0) && !is_sample) {
ret = simplifier_rewind_node(self, input_id, output_id);
}
}
out:
return ret;
}
/* Extract the ancestry for the specified input node over the specified
* interval and queue it up for merging.
*/
static int TSK_WARN_UNUSED
simplifier_extract_ancestry(
simplifier_t *self, double left, double right, tsk_id_t input_id)
{
int ret = 0;
tsk_segment_t *x = self->ancestor_map_head[input_id];
tsk_segment_t y; /* y is the segment that has been removed */
tsk_segment_t *x_head, *x_prev, *seg_left, *seg_right;
x_head = NULL;
x_prev = NULL;
while (x != NULL) {
if (x->right > left && right > x->left) {
y.left = TSK_MAX(x->left, left);
y.right = TSK_MIN(x->right, right);
y.node = x->node;
ret = simplifier_enqueue_segment(self, y.left, y.right, y.node);
if (ret != 0) {
goto out;
}
seg_left = NULL;
seg_right = NULL;
if (x->left != y.left) {
seg_left = simplifier_alloc_segment(self, x->left, y.left, x->node);
if (seg_left == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (x_prev == NULL) {
x_head = seg_left;
} else {
x_prev->next = seg_left;
}
x_prev = seg_left;
}
if (x->right != y.right) {
x->left = y.right;
seg_right = x;
} else {
seg_right = x->next;
// TODO free x
}
if (x_prev == NULL) {
x_head = seg_right;
} else {
x_prev->next = seg_right;
}
x = seg_right;
} else {
if (x_prev == NULL) {
x_head = x;
}
x_prev = x;
x = x->next;
}
}
self->ancestor_map_head[input_id] = x_head;
self->ancestor_map_tail[input_id] = x_prev;
out:
return ret;
}
static int TSK_WARN_UNUSED
simplifier_process_parent_edges(
simplifier_t *self, tsk_id_t parent, tsk_size_t start, tsk_size_t end)
{
int ret = 0;
tsk_size_t j;
const tsk_edge_table_t *input_edges = &self->input_tables.edges;
tsk_id_t child;
double left, right;
/* Go through the edges and queue up ancestry segments for processing. */
self->segment_queue_size = 0;
for (j = start; j < end; j++) {
tsk_bug_assert(parent == input_edges->parent[j]);
child = input_edges->child[j];
left = input_edges->left[j];
right = input_edges->right[j];
ret = simplifier_extract_ancestry(self, left, right, child);
if (ret != 0) {
goto out;
}
}
/* We can now merge the ancestral segments for the parent */
ret = simplifier_merge_ancestors(self, parent);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int TSK_WARN_UNUSED
simplifier_finalise_site_references(
simplifier_t *self, const bool *site_referenced, tsk_id_t *site_id_map)
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_site_t site;
const tsk_size_t num_sites = self->input_tables.sites.num_rows;
if (self->options & TSK_SIMPLIFY_FILTER_SITES) {
for (j = 0; j < num_sites; j++) {
tsk_site_table_get_row_unsafe(
&self->input_tables.sites, (tsk_id_t) j, &site);
site_id_map[j] = TSK_NULL;
if (site_referenced[j]) {
ret_id = tsk_site_table_add_row(&self->tables->sites, site.position,
site.ancestral_state, site.ancestral_state_length, site.metadata,
site.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
site_id_map[j] = ret_id;
}
}
} else {
tsk_bug_assert(self->tables->sites.num_rows == num_sites);
for (j = 0; j < num_sites; j++) {
site_id_map[j] = (tsk_id_t) j;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
simplifier_finalise_population_references(simplifier_t *self)
{
int ret = 0;
tsk_size_t j;
tsk_id_t pop_id, ret_id;
tsk_population_t pop;
tsk_id_t *node_population = self->tables->nodes.population;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_size_t num_populations = self->input_tables.populations.num_rows;
bool *population_referenced
= tsk_calloc(num_populations, sizeof(*population_referenced));
tsk_id_t *population_id_map
= tsk_malloc(num_populations * sizeof(*population_id_map));
tsk_bug_assert(self->options & TSK_SIMPLIFY_FILTER_POPULATIONS);
if (population_referenced == NULL || population_id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_nodes; j++) {
pop_id = node_population[j];
if (pop_id != TSK_NULL) {
population_referenced[pop_id] = true;
}
}
for (j = 0; j < num_populations; j++) {
tsk_population_table_get_row_unsafe(
&self->input_tables.populations, (tsk_id_t) j, &pop);
population_id_map[j] = TSK_NULL;
if (population_referenced[j]) {
ret_id = tsk_population_table_add_row(
&self->tables->populations, pop.metadata, pop.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
population_id_map[j] = ret_id;
}
}
/* Remap the IDs in the node table */
for (j = 0; j < num_nodes; j++) {
pop_id = node_population[j];
if (pop_id != TSK_NULL) {
node_population[j] = population_id_map[pop_id];
}
}
out:
tsk_safe_free(population_id_map);
tsk_safe_free(population_referenced);
return ret;
}
static int TSK_WARN_UNUSED
simplifier_finalise_individual_references(simplifier_t *self)
{
int ret = 0;
tsk_size_t j;
tsk_id_t pop_id, ret_id;
tsk_individual_t ind;
tsk_id_t *node_individual = self->tables->nodes.individual;
tsk_id_t *parents;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_size_t num_individuals = self->input_tables.individuals.num_rows;
bool *individual_referenced
= tsk_calloc(num_individuals, sizeof(*individual_referenced));
tsk_id_t *individual_id_map
= tsk_malloc(num_individuals * sizeof(*individual_id_map));
tsk_bug_assert(self->options & TSK_SIMPLIFY_FILTER_INDIVIDUALS);
if (individual_referenced == NULL || individual_id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_nodes; j++) {
pop_id = node_individual[j];
if (pop_id != TSK_NULL) {
individual_referenced[pop_id] = true;
}
}
for (j = 0; j < num_individuals; j++) {
tsk_individual_table_get_row_unsafe(
&self->input_tables.individuals, (tsk_id_t) j, &ind);
individual_id_map[j] = TSK_NULL;
if (individual_referenced[j]) {
/* Can't remap the parents inline here because we have no
* guarantees about sortedness */
ret_id = tsk_individual_table_add_row(&self->tables->individuals, ind.flags,
ind.location, ind.location_length, ind.parents, ind.parents_length,
ind.metadata, ind.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
individual_id_map[j] = ret_id;
}
}
/* Remap the IDs in the node table */
for (j = 0; j < num_nodes; j++) {
pop_id = node_individual[j];
if (pop_id != TSK_NULL) {
node_individual[j] = individual_id_map[pop_id];
}
}
/* Remap parent IDs. *
* NOTE! must take the pointer reference here as it can change from
* the start of the function */
parents = self->tables->individuals.parents;
for (j = 0; j < self->tables->individuals.parents_length; j++) {
if (parents[j] != TSK_NULL) {
parents[j] = individual_id_map[parents[j]];
}
}
out:
tsk_safe_free(individual_id_map);
tsk_safe_free(individual_referenced);
return ret;
}
static int TSK_WARN_UNUSED
simplifier_output_sites(simplifier_t *self)
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
tsk_mutation_t mutation;
const tsk_size_t num_sites = self->input_tables.sites.num_rows;
const tsk_size_t num_mutations = self->input_tables.mutations.num_rows;
bool *site_referenced = tsk_calloc(num_sites, sizeof(*site_referenced));
tsk_id_t *site_id_map = tsk_malloc(num_sites * sizeof(*site_id_map));
tsk_id_t *mutation_id_map = tsk_malloc(num_mutations * sizeof(*mutation_id_map));
const tsk_id_t *mutation_node_map = self->mutation_node_map;
const tsk_id_t *mutation_site = self->input_tables.mutations.site;
if (site_referenced == NULL || site_id_map == NULL || mutation_id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_mutations; j++) {
if (mutation_node_map[j] != TSK_NULL) {
site_referenced[mutation_site[j]] = true;
}
}
ret = simplifier_finalise_site_references(self, site_referenced, site_id_map);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_mutations; j++) {
mutation_id_map[j] = TSK_NULL;
if (mutation_node_map[j] != TSK_NULL) {
tsk_mutation_table_get_row_unsafe(
&self->input_tables.mutations, (tsk_id_t) j, &mutation);
mutation.node = mutation_node_map[j];
mutation.site = site_id_map[mutation.site];
if (mutation.parent != TSK_NULL) {
mutation.parent = mutation_id_map[mutation.parent];
}
ret_id = tsk_mutation_table_add_row(&self->tables->mutations, mutation.site,
mutation.node, mutation.parent, mutation.time, mutation.derived_state,
mutation.derived_state_length, mutation.metadata,
mutation.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
mutation_id_map[j] = ret_id;
}
}
out:
tsk_safe_free(site_referenced);
tsk_safe_free(site_id_map);
tsk_safe_free(mutation_id_map);
return ret;
}
/* Flush the remaining non-edge and node data in the model to the
* output tables. */
static int TSK_WARN_UNUSED
simplifier_flush_output(simplifier_t *self)
{
int ret = 0;
/* TODO Migrations fit reasonably neatly into the pattern that we have here. We
* can consider references to populations from migration objects in the same way
* as from nodes, so that we only remove a population if its referenced by
* neither. Mapping the population IDs in migrations is then easy. In principle
* nodes are similar, but the semantics are slightly different because we've
* already allocated all the nodes by their references from edges. We then
* need to decide whether we remove migrations that reference unmapped nodes
* or whether to add these nodes back in (probably the former is the correct
* approach).*/
if (self->input_tables.migrations.num_rows != 0) {
ret = tsk_trace_error(TSK_ERR_SIMPLIFY_MIGRATIONS_NOT_SUPPORTED);
goto out;
}
ret = simplifier_output_sites(self);
if (ret != 0) {
goto out;
}
if (self->options & TSK_SIMPLIFY_FILTER_POPULATIONS) {
ret = simplifier_finalise_population_references(self);
if (ret != 0) {
goto out;
}
}
if (self->options & TSK_SIMPLIFY_FILTER_INDIVIDUALS) {
ret = simplifier_finalise_individual_references(self);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static void
simplifier_set_edge_sort_offset(simplifier_t *self, double youngest_root_time)
{
const tsk_edge_table_t edges = self->tables->edges;
const double *node_time = self->tables->nodes.time;
int64_t offset;
for (offset = 0; offset < (int64_t) edges.num_rows; offset++) {
if (node_time[edges.parent[offset]] >= youngest_root_time) {
break;
}
}
self->edge_sort_offset = offset;
}
static int TSK_WARN_UNUSED
simplifier_sort_edges(simplifier_t *self)
{
/* designated initialisers are guaranteed to set any missing fields to
* zero, so we don't need to set the rest of them. */
tsk_bookmark_t bookmark = {
.edges = (tsk_size_t) self->edge_sort_offset,
.sites = self->tables->sites.num_rows,
.mutations = self->tables->mutations.num_rows,
};
tsk_bug_assert(self->edge_sort_offset >= 0);
return tsk_table_collection_sort(self->tables, &bookmark, 0);
}
static int TSK_WARN_UNUSED
simplifier_insert_input_roots(simplifier_t *self)
{
int ret = 0;
tsk_id_t input_id, output_id;
tsk_segment_t *x;
tsk_size_t num_flushed_edges;
double youngest_root_time = DBL_MAX;
const double *node_time = self->tables->nodes.time;
for (input_id = 0; input_id < (tsk_id_t) self->input_tables.nodes.num_rows;
input_id++) {
x = self->ancestor_map_head[input_id];
if (x != NULL) {
output_id = self->node_id_map[input_id];
if (output_id == TSK_NULL) {
output_id = simplifier_record_node(self, input_id);
if (output_id < 0) {
ret = (int) output_id;
goto out;
}
}
youngest_root_time = TSK_MIN(youngest_root_time, node_time[output_id]);
while (x != NULL) {
if (x->node != output_id) {
ret = simplifier_record_edge(self, x->left, x->right, x->node);
if (ret != 0) {
goto out;
}
simplifier_map_mutations(
self, input_id, x->left, x->right, output_id);
}
x = x->next;
}
ret = simplifier_flush_edges(self, output_id, &num_flushed_edges);
if (ret != 0) {
goto out;
}
}
}
if (youngest_root_time != DBL_MAX) {
simplifier_set_edge_sort_offset(self, youngest_root_time);
}
out:
return ret;
}
static int TSK_WARN_UNUSED
simplifier_run(simplifier_t *self, tsk_id_t *node_map)
{
int ret = 0;
tsk_size_t j, start;
tsk_id_t parent, current_parent;
const tsk_edge_table_t *input_edges = &self->input_tables.edges;
tsk_size_t num_edges = input_edges->num_rows;
if (num_edges > 0) {
start = 0;
current_parent = input_edges->parent[0];
for (j = 0; j < num_edges; j++) {
parent = input_edges->parent[j];
if (parent != current_parent) {
ret = simplifier_process_parent_edges(self, current_parent, start, j);
if (ret != 0) {
goto out;
}
current_parent = parent;
start = j;
}
}
ret = simplifier_process_parent_edges(self, current_parent, start, num_edges);
if (ret != 0) {
goto out;
}
}
if (self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS) {
ret = simplifier_insert_input_roots(self);
if (ret != 0) {
goto out;
}
}
ret = simplifier_flush_output(self);
if (ret != 0) {
goto out;
}
if (node_map != NULL) {
/* Finally, output the new IDs for the nodes, if required. */
tsk_memcpy(node_map, self->node_id_map,
self->input_tables.nodes.num_rows * sizeof(tsk_id_t));
}
if (self->edge_sort_offset != TSK_NULL) {
tsk_bug_assert(self->options & TSK_SIMPLIFY_KEEP_INPUT_ROOTS);
ret = simplifier_sort_edges(self);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
/*************************
* table_collection
*************************/
typedef struct {
tsk_id_t index;
/* These are the sort keys in order */
double first;
double second;
tsk_id_t third;
tsk_id_t fourth;
} index_sort_t;
static int
cmp_index_sort(const void *a, const void *b)
{
const index_sort_t *ca = (const index_sort_t *) a;
const index_sort_t *cb = (const index_sort_t *) b;
int ret = (ca->first > cb->first) - (ca->first < cb->first);
if (ret == 0) {
ret = (ca->second > cb->second) - (ca->second < cb->second);
if (ret == 0) {
ret = (ca->third > cb->third) - (ca->third < cb->third);
if (ret == 0) {
ret = (ca->fourth > cb->fourth) - (ca->fourth < cb->fourth);
}
}
}
return ret;
}
static int
tsk_table_collection_check_offsets(const tsk_table_collection_t *self)
{
int ret = 0;
ret = check_offsets(self->nodes.num_rows, self->nodes.metadata_offset,
self->nodes.metadata_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->sites.num_rows, self->sites.ancestral_state_offset,
self->sites.ancestral_state_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->sites.num_rows, self->sites.metadata_offset,
self->sites.metadata_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->mutations.num_rows, self->mutations.derived_state_offset,
self->mutations.derived_state_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->mutations.num_rows, self->mutations.metadata_offset,
self->mutations.metadata_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->individuals.num_rows, self->individuals.metadata_offset,
self->individuals.metadata_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->provenances.num_rows, self->provenances.timestamp_offset,
self->provenances.timestamp_length, true);
if (ret != 0) {
goto out;
}
ret = check_offsets(self->provenances.num_rows, self->provenances.record_offset,
self->provenances.record_length, true);
if (ret != 0) {
goto out;
}
ret = 0;
out:
return ret;
}
static int
tsk_table_collection_check_node_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
double node_time;
tsk_id_t population, individual;
tsk_id_t num_populations = (tsk_id_t) self->populations.num_rows;
tsk_id_t num_individuals = (tsk_id_t) self->individuals.num_rows;
const bool check_population_refs = !(options & TSK_NO_CHECK_POPULATION_REFS);
for (j = 0; j < self->nodes.num_rows; j++) {
node_time = self->nodes.time[j];
if (!tsk_isfinite(node_time)) {
ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);
goto out;
}
if (check_population_refs) {
population = self->nodes.population[j];
if (population < TSK_NULL || population >= num_populations) {
ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);
goto out;
}
}
individual = self->nodes.individual[j];
if (individual < TSK_NULL || individual >= num_individuals) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
goto out;
}
}
out:
return ret;
}
static int
tsk_table_collection_check_edge_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
tsk_id_t parent, last_parent, child, last_child;
double left, last_left, right;
const double *time = self->nodes.time;
const double L = self->sequence_length;
const tsk_edge_table_t edges = self->edges;
const tsk_id_t num_nodes = (tsk_id_t) self->nodes.num_rows;
const bool check_ordering = !!(options & TSK_CHECK_EDGE_ORDERING);
bool *parent_seen = NULL;
if (check_ordering) {
parent_seen = tsk_calloc((tsk_size_t) num_nodes, sizeof(*parent_seen));
if (parent_seen == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
/* Just keeping compiler happy; these values don't matter. */
last_left = 0;
last_parent = 0;
last_child = 0;
for (j = 0; j < edges.num_rows; j++) {
parent = edges.parent[j];
child = edges.child[j];
left = edges.left[j];
right = edges.right[j];
/* Node ID integrity */
if (parent == TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_NULL_PARENT);
goto out;
}
if (parent < 0 || parent >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (child == TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_NULL_CHILD);
goto out;
}
if (child < 0 || child >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
/* Spatial requirements for edges */
if (!(tsk_isfinite(left) && tsk_isfinite(right))) {
ret = tsk_trace_error(TSK_ERR_GENOME_COORDS_NONFINITE);
goto out;
}
if (left < 0) {
ret = tsk_trace_error(TSK_ERR_LEFT_LESS_ZERO);
goto out;
}
if (right > L) {
ret = tsk_trace_error(TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);
goto out;
}
if (left >= right) {
ret = tsk_trace_error(TSK_ERR_BAD_EDGE_INTERVAL);
goto out;
}
/* time[child] must be < time[parent] */
if (time[child] >= time[parent]) {
ret = tsk_trace_error(TSK_ERR_BAD_NODE_TIME_ORDERING);
goto out;
}
if (check_ordering) {
if (parent_seen[parent]) {
ret = tsk_trace_error(TSK_ERR_EDGES_NONCONTIGUOUS_PARENTS);
goto out;
}
if (j > 0) {
/* Input data must sorted by (time[parent], parent, child, left). */
if (time[parent] < time[last_parent]) {
ret = tsk_trace_error(TSK_ERR_EDGES_NOT_SORTED_PARENT_TIME);
goto out;
}
if (time[parent] == time[last_parent]) {
if (parent == last_parent) {
if (child < last_child) {
ret = tsk_trace_error(TSK_ERR_EDGES_NOT_SORTED_CHILD);
goto out;
}
if (child == last_child) {
if (left == last_left) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_EDGES);
goto out;
} else if (left < last_left) {
ret = tsk_trace_error(TSK_ERR_EDGES_NOT_SORTED_LEFT);
goto out;
}
}
} else {
parent_seen[last_parent] = true;
}
}
}
last_parent = parent;
last_child = child;
last_left = left;
}
}
out:
tsk_safe_free(parent_seen);
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_check_site_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
double position;
const double L = self->sequence_length;
const tsk_site_table_t sites = self->sites;
const bool check_site_ordering = !!(options & TSK_CHECK_SITE_ORDERING);
const bool check_site_duplicates = !!(options & TSK_CHECK_SITE_DUPLICATES);
for (j = 0; j < sites.num_rows; j++) {
position = sites.position[j];
/* Spatial requirements */
if (!tsk_isfinite(position)) {
ret = tsk_trace_error(TSK_ERR_BAD_SITE_POSITION);
goto out;
}
if (position < 0 || position >= L) {
ret = tsk_trace_error(TSK_ERR_BAD_SITE_POSITION);
goto out;
}
if (j > 0) {
if (check_site_duplicates && sites.position[j - 1] == position) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SITE_POSITION);
goto out;
}
if (check_site_ordering && sites.position[j - 1] > position) {
ret = tsk_trace_error(TSK_ERR_UNSORTED_SITES);
goto out;
}
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_check_mutation_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
tsk_id_t parent_mut;
double mutation_time;
double last_known_time = INFINITY;
const tsk_mutation_table_t mutations = self->mutations;
const tsk_id_t num_nodes = (tsk_id_t) self->nodes.num_rows;
const tsk_id_t num_sites = (tsk_id_t) self->sites.num_rows;
const tsk_id_t num_mutations = (tsk_id_t) self->mutations.num_rows;
const double *node_time = self->nodes.time;
const bool check_mutation_ordering = !!(options & TSK_CHECK_MUTATION_ORDERING);
bool unknown_time;
int num_known_times = 0;
int num_unknown_times = 0;
for (j = 0; j < mutations.num_rows; j++) {
/* Basic reference integrity */
if (mutations.site[j] < 0 || mutations.site[j] >= num_sites) {
ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);
goto out;
}
if (mutations.node[j] < 0 || mutations.node[j] >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
/* Integrity check for mutation parent */
parent_mut = mutations.parent[j];
if (parent_mut < TSK_NULL || parent_mut >= num_mutations) {
ret = tsk_trace_error(TSK_ERR_MUTATION_OUT_OF_BOUNDS);
goto out;
}
if (parent_mut == (tsk_id_t) j) {
ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_EQUAL);
goto out;
}
/* Check that time is finite and not more recent than node time */
mutation_time = mutations.time[j];
unknown_time = tsk_is_unknown_time(mutation_time);
if (!unknown_time) {
if (!tsk_isfinite(mutation_time)) {
ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);
goto out;
}
if (mutation_time < node_time[mutations.node[j]]) {
ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_YOUNGER_THAN_NODE);
goto out;
}
}
/* reset checks when reaching a new site */
if (j > 0 && mutations.site[j - 1] != mutations.site[j]) {
last_known_time = INFINITY;
num_known_times = 0;
num_unknown_times = 0;
}
/* Check known/unknown times are not both present on a site */
if (unknown_time) {
num_unknown_times++;
} else {
num_known_times++;
}
if ((num_unknown_times > 0) && (num_known_times > 0)) {
ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN);
goto out;
}
/* check parent site agrees */
if (parent_mut != TSK_NULL) {
if (mutations.site[parent_mut] != mutations.site[j]) {
ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_DIFFERENT_SITE);
goto out;
}
/* If this mutation time is known, then the parent time
* must also be, or else the
* TSK_ERR_MUTATION_TIME_HAS_BOTH_KNOWN_AND_UNKNOWN check
* above will fail. */
if (!unknown_time && mutation_time > mutations.time[parent_mut]) {
ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_MUTATION);
goto out;
}
}
if (check_mutation_ordering) {
/* Check site ordering */
if (j > 0 && mutations.site[j - 1] > mutations.site[j]) {
ret = tsk_trace_error(TSK_ERR_UNSORTED_MUTATIONS);
goto out;
}
/* Check if parents are listed before their children */
if (parent_mut != TSK_NULL && parent_mut > (tsk_id_t) j) {
ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
goto out;
}
/* Check time ordering. We do this after the other checks above,
* so that more specific errors trigger first */
if (!unknown_time) {
if (mutation_time > last_known_time) {
ret = tsk_trace_error(TSK_ERR_UNSORTED_MUTATIONS);
goto out;
}
last_known_time = mutation_time;
}
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_check_migration_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
double left, right, time;
const double L = self->sequence_length;
const tsk_migration_table_t migrations = self->migrations;
const tsk_id_t num_nodes = (tsk_id_t) self->nodes.num_rows;
const tsk_id_t num_populations = (tsk_id_t) self->populations.num_rows;
const bool check_population_refs = !(options & TSK_NO_CHECK_POPULATION_REFS);
const bool check_migration_ordering = !!(options & TSK_CHECK_MIGRATION_ORDERING);
for (j = 0; j < migrations.num_rows; j++) {
if (migrations.node[j] < 0 || migrations.node[j] >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (check_population_refs) {
if (migrations.source[j] < 0 || migrations.source[j] >= num_populations) {
ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);
goto out;
}
if (migrations.dest[j] < 0 || migrations.dest[j] >= num_populations) {
ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);
goto out;
}
}
time = migrations.time[j];
if (!tsk_isfinite(time)) {
ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);
goto out;
}
if (j > 0) {
if (check_migration_ordering && migrations.time[j - 1] > time) {
ret = tsk_trace_error(TSK_ERR_UNSORTED_MIGRATIONS);
goto out;
}
}
left = migrations.left[j];
right = migrations.right[j];
/* Spatial requirements */
/* TODO it's a bit misleading to use the edge-specific errors here. */
if (!(tsk_isfinite(left) && tsk_isfinite(right))) {
ret = tsk_trace_error(TSK_ERR_GENOME_COORDS_NONFINITE);
goto out;
}
if (left < 0) {
ret = tsk_trace_error(TSK_ERR_LEFT_LESS_ZERO);
goto out;
}
if (right > L) {
ret = tsk_trace_error(TSK_ERR_RIGHT_GREATER_SEQ_LENGTH);
goto out;
}
if (left >= right) {
ret = tsk_trace_error(TSK_ERR_BAD_EDGE_INTERVAL);
goto out;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_check_individual_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j, k;
const tsk_individual_table_t individuals = self->individuals;
const tsk_id_t num_individuals = (tsk_id_t) individuals.num_rows;
const bool check_individual_ordering = options & TSK_CHECK_INDIVIDUAL_ORDERING;
for (j = 0; j < (tsk_size_t) num_individuals; j++) {
for (k = individuals.parents_offset[j]; k < individuals.parents_offset[j + 1];
k++) {
/* Check parent references are valid */
if (individuals.parents[k] != TSK_NULL
&& (individuals.parents[k] < 0
|| individuals.parents[k] >= num_individuals)) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_OUT_OF_BOUNDS);
goto out;
}
/* Check no-one is their own parent */
if (individuals.parents[k] == (tsk_id_t) j) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_SELF_PARENT);
goto out;
}
/* Check parents are ordered */
if (check_individual_ordering && individuals.parents[k] != TSK_NULL
&& individuals.parents[k] >= (tsk_id_t) j) {
ret = tsk_trace_error(TSK_ERR_UNSORTED_INDIVIDUALS);
goto out;
}
}
}
out:
return ret;
}
static tsk_id_t TSK_WARN_UNUSED
tsk_table_collection_check_tree_integrity(const tsk_table_collection_t *self)
{
tsk_id_t ret = 0;
tsk_size_t j, k;
tsk_id_t e, u, site, mutation;
double tree_left, tree_right;
const double sequence_length = self->sequence_length;
const tsk_id_t num_sites = (tsk_id_t) self->sites.num_rows;
const tsk_id_t num_mutations = (tsk_id_t) self->mutations.num_rows;
const tsk_size_t num_edges = self->edges.num_rows;
const double *restrict site_position = self->sites.position;
const tsk_id_t *restrict mutation_site = self->mutations.site;
const tsk_id_t *restrict mutation_node = self->mutations.node;
const double *restrict mutation_time = self->mutations.time;
const double *restrict node_time = self->nodes.time;
const tsk_id_t *restrict I = self->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->indexes.edge_removal_order;
const double *restrict edge_right = self->edges.right;
const double *restrict edge_left = self->edges.left;
const tsk_id_t *restrict edge_child = self->edges.child;
const tsk_id_t *restrict edge_parent = self->edges.parent;
tsk_id_t *restrict parent = NULL;
int8_t *restrict used_edges = NULL;
tsk_id_t num_trees = 0;
parent = tsk_malloc(self->nodes.num_rows * sizeof(*parent));
used_edges = tsk_malloc(num_edges * sizeof(*used_edges));
if (parent == NULL || used_edges == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, self->nodes.num_rows * sizeof(*parent));
tsk_memset(used_edges, 0, num_edges * sizeof(*used_edges));
tree_left = 0;
num_trees = 0;
j = 0;
k = 0;
site = 0;
mutation = 0;
tsk_bug_assert(I != NULL && O != NULL);
tsk_bug_assert(self->indexes.num_edges == num_edges);
while (j < num_edges || tree_left < sequence_length) {
while (k < num_edges && edge_right[O[k]] == tree_left) {
e = O[k];
if (used_edges[e] != 1) {
ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);
goto out;
}
parent[edge_child[e]] = TSK_NULL;
used_edges[e]++;
k++;
}
while (j < num_edges && edge_left[I[j]] == tree_left) {
e = I[j];
if (used_edges[e] != 0) {
ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);
goto out;
}
used_edges[e]++;
u = edge_child[e];
if (parent[u] != TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);
goto out;
}
parent[u] = edge_parent[e];
j++;
}
tree_right = sequence_length;
if (j < num_edges) {
tree_right = TSK_MIN(tree_right, edge_left[I[j]]);
}
if (k < num_edges) {
tree_right = TSK_MIN(tree_right, edge_right[O[k]]);
}
while (site < num_sites && site_position[site] < tree_right) {
while (mutation < num_mutations && mutation_site[mutation] == site) {
if (!tsk_is_unknown_time(mutation_time[mutation])
&& parent[mutation_node[mutation]] != TSK_NULL
&& node_time[parent[mutation_node[mutation]]]
<= mutation_time[mutation]) {
ret = tsk_trace_error(TSK_ERR_MUTATION_TIME_OLDER_THAN_PARENT_NODE);
goto out;
}
mutation++;
}
site++;
}
if (tree_right <= tree_left) {
ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);
goto out;
}
tree_left = tree_right;
/* This is technically possible; if we have 2**31 edges each defining
* a single tree, and there's a gap between each of these edges we
* would overflow this counter. */
if (num_trees == TSK_MAX_ID) {
ret = tsk_trace_error(TSK_ERR_TREE_OVERFLOW);
goto out;
}
num_trees++;
}
tsk_bug_assert(j == num_edges);
while (k < num_edges) {
/* At this point it must be that used_edges[O[k]] == 1,
* since otherwise we would have added a different edge twice,
* and so hit the error above. */
e = O[k];
if (edge_right[e] != sequence_length) {
ret = tsk_trace_error(TSK_ERR_TABLES_BAD_INDEXES);
goto out;
}
used_edges[e]++;
k++;
}
ret = num_trees;
out:
/* Can't use tsk_safe_free because of restrict*/
if (parent != NULL) {
free(parent);
}
if (used_edges != NULL) {
free(used_edges);
}
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_check_index_integrity(const tsk_table_collection_t *self)
{
int ret = 0;
tsk_id_t j;
const tsk_id_t num_edges = (tsk_id_t) self->edges.num_rows;
const tsk_id_t *edge_insertion_order = self->indexes.edge_insertion_order;
const tsk_id_t *edge_removal_order = self->indexes.edge_removal_order;
if (!tsk_table_collection_has_index(self, 0)) {
ret = tsk_trace_error(TSK_ERR_TABLES_NOT_INDEXED);
goto out;
}
for (j = 0; j < num_edges; j++) {
if (edge_insertion_order[j] < 0 || edge_insertion_order[j] >= num_edges) {
ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);
goto out;
}
if (edge_removal_order[j] < 0 || edge_removal_order[j] >= num_edges) {
ret = tsk_trace_error(TSK_ERR_EDGE_OUT_OF_BOUNDS);
goto out;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_compute_mutation_parents_to_array(
const tsk_table_collection_t *self, tsk_id_t *mutation_parent)
{
int ret = 0;
const tsk_id_t *I, *O;
const tsk_edge_table_t edges = self->edges;
const tsk_node_table_t nodes = self->nodes;
const tsk_site_table_t sites = self->sites;
const tsk_mutation_table_t mutations = self->mutations;
const tsk_id_t M = (tsk_id_t) edges.num_rows;
tsk_id_t tj, tk;
tsk_id_t *parent = NULL;
tsk_id_t *bottom_mutation = NULL;
tsk_id_t u;
double left, right;
tsk_id_t site;
/* Using unsigned values here avoids potentially undefined behaviour */
tsk_size_t j, mutation, first_mutation;
parent = tsk_malloc(nodes.num_rows * sizeof(*parent));
bottom_mutation = tsk_malloc(nodes.num_rows * sizeof(*bottom_mutation));
if (parent == NULL || bottom_mutation == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, nodes.num_rows * sizeof(*parent));
tsk_memset(bottom_mutation, 0xff, nodes.num_rows * sizeof(*bottom_mutation));
tsk_memset(mutation_parent, 0xff, self->mutations.num_rows * sizeof(tsk_id_t));
I = self->indexes.edge_insertion_order;
O = self->indexes.edge_removal_order;
tj = 0;
tk = 0;
site = 0;
mutation = 0;
left = 0;
while (tj < M || left < self->sequence_length) {
while (tk < M && edges.right[O[tk]] == left) {
parent[edges.child[O[tk]]] = TSK_NULL;
tk++;
}
while (tj < M && edges.left[I[tj]] == left) {
parent[edges.child[I[tj]]] = edges.parent[I[tj]];
tj++;
}
right = self->sequence_length;
if (tj < M) {
right = TSK_MIN(right, edges.left[I[tj]]);
}
if (tk < M) {
right = TSK_MIN(right, edges.right[O[tk]]);
}
/* Tree is now ready. We look at each site on this tree in turn */
while (site < (tsk_id_t) sites.num_rows && sites.position[site] < right) {
/* Create a mapping from mutations to nodes. If we see more than one
* mutation at a node, the previously seen one must be the parent
* of the current since we assume they are in order. */
first_mutation = mutation;
while (mutation < mutations.num_rows && mutations.site[mutation] == site) {
u = mutations.node[mutation];
if (bottom_mutation[u] != TSK_NULL) {
mutation_parent[mutation] = bottom_mutation[u];
}
bottom_mutation[u] = (tsk_id_t) mutation;
mutation++;
}
/* Make the common case of 1 mutation fast */
if (mutation > first_mutation + 1) {
/* If we have more than one mutation, compute the parent for each
* one by traversing up the tree until we find a node that has a
* mutation. */
for (j = first_mutation; j < mutation; j++) {
if (mutation_parent[j] == TSK_NULL) {
u = parent[mutations.node[j]];
while (u != TSK_NULL && bottom_mutation[u] == TSK_NULL) {
u = parent[u];
}
if (u != TSK_NULL) {
mutation_parent[j] = bottom_mutation[u];
}
}
}
}
/* Reset the mapping for the next site */
for (j = first_mutation; j < mutation; j++) {
u = mutations.node[j];
bottom_mutation[u] = TSK_NULL;
/* Check that we haven't violated the sortedness property */
if (mutation_parent[j] > (tsk_id_t) j) {
ret = tsk_trace_error(TSK_ERR_MUTATION_PARENT_AFTER_CHILD);
goto out;
}
}
site++;
}
/* Move on to the next tree */
left = right;
}
out:
tsk_safe_free(parent);
tsk_safe_free(bottom_mutation);
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_check_mutation_parents(const tsk_table_collection_t *self)
{
int ret = 0;
tsk_mutation_table_t mutations = self->mutations;
tsk_id_t *new_parents = NULL;
tsk_size_t j;
if (mutations.num_rows == 0) {
return ret;
}
new_parents = tsk_malloc(mutations.num_rows * sizeof(*new_parents));
if (new_parents == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_table_collection_compute_mutation_parents_to_array(self, new_parents);
if (ret != 0) {
goto out;
}
for (j = 0; j < mutations.num_rows; j++) {
if (mutations.parent[j] != new_parents[j]) {
ret = tsk_trace_error(TSK_ERR_BAD_MUTATION_PARENT);
goto out;
}
}
out:
tsk_safe_free(new_parents);
return ret;
}
tsk_id_t TSK_WARN_UNUSED
tsk_table_collection_check_integrity(
const tsk_table_collection_t *self, tsk_flags_t options)
{
tsk_id_t ret = 0;
int mut_ret = 0;
if (options & TSK_CHECK_MUTATION_PARENTS) {
/* If we're checking mutation parents, we need to check the trees first */
options |= TSK_CHECK_TREES;
}
if (options & TSK_CHECK_TREES) {
/* Checking the trees implies these checks */
options |= TSK_CHECK_EDGE_ORDERING | TSK_CHECK_SITE_ORDERING
| TSK_CHECK_SITE_DUPLICATES | TSK_CHECK_MUTATION_ORDERING
| TSK_CHECK_MIGRATION_ORDERING | TSK_CHECK_INDEXES;
}
if (!tsk_isfinite(self->sequence_length) || self->sequence_length <= 0) {
ret = tsk_trace_error(TSK_ERR_BAD_SEQUENCE_LENGTH);
goto out;
}
ret = tsk_table_collection_check_offsets(self);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_check_node_integrity(self, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_check_edge_integrity(self, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_check_site_integrity(self, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_check_mutation_integrity(self, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_check_migration_integrity(self, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_check_individual_integrity(self, options);
if (ret != 0) {
goto out;
}
if (options & TSK_CHECK_INDEXES) {
ret = tsk_table_collection_check_index_integrity(self);
if (ret != 0) {
goto out;
}
}
if (options & TSK_CHECK_TREES) {
ret = tsk_table_collection_check_tree_integrity(self);
if (ret < 0) {
goto out;
}
/* This check requires tree integrity so do it last */
if (options & TSK_CHECK_MUTATION_PARENTS) {
mut_ret = tsk_table_collection_check_mutation_parents(self);
if (mut_ret != 0) {
ret = mut_ret;
goto out;
}
}
}
out:
return ret;
}
void
tsk_table_collection_print_state(const tsk_table_collection_t *self, FILE *out)
{
fprintf(out, "Table collection state\n");
fprintf(out, "sequence_length = %f\n", self->sequence_length);
write_metadata_schema_header(
out, self->metadata_schema, self->metadata_schema_length);
fprintf(out, "#metadata#\n");
fprintf(out, "%.*s\n", (int) self->metadata_length, self->metadata);
fprintf(out, "#end#metadata\n");
fprintf(out, "#time_units#\n");
fprintf(out, "%.*s\n", (int) self->time_units_length, self->time_units);
fprintf(out, "#end#time_units\n");
tsk_individual_table_print_state(&self->individuals, out);
tsk_node_table_print_state(&self->nodes, out);
tsk_edge_table_print_state(&self->edges, out);
tsk_migration_table_print_state(&self->migrations, out);
tsk_site_table_print_state(&self->sites, out);
tsk_mutation_table_print_state(&self->mutations, out);
tsk_population_table_print_state(&self->populations, out);
tsk_provenance_table_print_state(&self->provenances, out);
}
int TSK_WARN_UNUSED
tsk_table_collection_init(tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_flags_t edge_options = 0;
tsk_memset(self, 0, sizeof(*self));
if (options & TSK_TC_NO_EDGE_METADATA) {
edge_options |= TSK_TABLE_NO_METADATA;
}
/* Set default time_units value */
ret = tsk_table_collection_set_time_units(
self, TSK_TIME_UNITS_UNKNOWN, strlen(TSK_TIME_UNITS_UNKNOWN));
if (ret != 0) {
goto out;
}
ret = tsk_node_table_init(&self->nodes, 0);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_init(&self->edges, edge_options);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_init(&self->migrations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_init(&self->sites, 0);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_init(&self->mutations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_init(&self->individuals, 0);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_init(&self->populations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_init(&self->provenances, 0);
if (ret != 0) {
goto out;
}
ret = tsk_reference_sequence_init(&self->reference_sequence, 0);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int
tsk_table_collection_free(tsk_table_collection_t *self)
{
tsk_individual_table_free(&self->individuals);
tsk_node_table_free(&self->nodes);
tsk_edge_table_free(&self->edges);
tsk_migration_table_free(&self->migrations);
tsk_site_table_free(&self->sites);
tsk_mutation_table_free(&self->mutations);
tsk_population_table_free(&self->populations);
tsk_provenance_table_free(&self->provenances);
tsk_reference_sequence_free(&self->reference_sequence);
tsk_safe_free(self->indexes.edge_insertion_order);
tsk_safe_free(self->indexes.edge_removal_order);
tsk_safe_free(self->file_uuid);
tsk_safe_free(self->time_units);
tsk_safe_free(self->metadata);
tsk_safe_free(self->metadata_schema);
return 0;
}
bool
tsk_table_collection_equals(const tsk_table_collection_t *self,
const tsk_table_collection_t *other, tsk_flags_t options)
{
bool ret = self->sequence_length == other->sequence_length
&& self->time_units_length == other->time_units_length
&& tsk_memcmp(self->time_units, other->time_units,
self->time_units_length * sizeof(char))
== 0;
if (!(options & TSK_CMP_IGNORE_TABLES)) {
ret = ret
&& tsk_individual_table_equals(
&self->individuals, &other->individuals, options)
&& tsk_node_table_equals(&self->nodes, &other->nodes, options)
&& tsk_edge_table_equals(&self->edges, &other->edges, options)
&& tsk_migration_table_equals(
&self->migrations, &other->migrations, options)
&& tsk_site_table_equals(&self->sites, &other->sites, options)
&& tsk_mutation_table_equals(&self->mutations, &other->mutations, options)
&& tsk_population_table_equals(
&self->populations, &other->populations, options);
/* TSK_CMP_IGNORE_TABLES implies TSK_CMP_IGNORE_PROVENANCE */
if (!(options & TSK_CMP_IGNORE_PROVENANCE)) {
ret = ret
&& tsk_provenance_table_equals(
&self->provenances, &other->provenances, options);
}
}
/* TSK_CMP_IGNORE_TS_METADATA is implied by TSK_CMP_IGNORE_METADATA */
if (options & TSK_CMP_IGNORE_METADATA) {
options |= TSK_CMP_IGNORE_TS_METADATA;
}
if (!(options & TSK_CMP_IGNORE_TS_METADATA)) {
ret = ret
&& (self->metadata_length == other->metadata_length
&& self->metadata_schema_length == other->metadata_schema_length
&& tsk_memcmp(self->metadata, other->metadata,
self->metadata_length * sizeof(char))
== 0
&& tsk_memcmp(self->metadata_schema, other->metadata_schema,
self->metadata_schema_length * sizeof(char))
== 0);
}
if (!(options & TSK_CMP_IGNORE_REFERENCE_SEQUENCE)) {
ret = ret
&& tsk_reference_sequence_equals(
&self->reference_sequence, &other->reference_sequence, options);
}
return ret;
}
int
tsk_table_collection_set_time_units(
tsk_table_collection_t *self, const char *time_units, tsk_size_t time_units_length)
{
return replace_string(
&self->time_units, &self->time_units_length, time_units, time_units_length);
}
int
tsk_table_collection_set_metadata(
tsk_table_collection_t *self, const char *metadata, tsk_size_t metadata_length)
{
return replace_string(
&self->metadata, &self->metadata_length, metadata, metadata_length);
}
int
tsk_table_collection_takeset_metadata(
tsk_table_collection_t *self, char *metadata, tsk_size_t metadata_length)
{
return takeset_string(
&self->metadata, &self->metadata_length, metadata, metadata_length);
}
int
tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length)
{
return replace_string(&self->metadata_schema, &self->metadata_schema_length,
metadata_schema, metadata_schema_length);
}
int
tsk_table_collection_set_indexes(tsk_table_collection_t *self,
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order)
{
int ret = 0;
tsk_size_t index_size = self->edges.num_rows * sizeof(tsk_id_t);
tsk_table_collection_drop_index(self, 0);
self->indexes.edge_insertion_order = tsk_malloc(index_size);
self->indexes.edge_removal_order = tsk_malloc(index_size);
if (self->indexes.edge_insertion_order == NULL
|| self->indexes.edge_removal_order == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(self->indexes.edge_insertion_order, edge_insertion_order, index_size);
tsk_memcpy(self->indexes.edge_removal_order, edge_removal_order, index_size);
self->indexes.num_edges = self->edges.num_rows;
out:
return ret;
}
int
tsk_table_collection_takeset_indexes(tsk_table_collection_t *self,
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order)
{
int ret = 0;
if (edge_insertion_order == NULL || edge_removal_order == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
tsk_table_collection_drop_index(self, 0);
self->indexes.edge_insertion_order = edge_insertion_order;
self->indexes.edge_removal_order = edge_removal_order;
self->indexes.num_edges = self->edges.num_rows;
out:
return ret;
}
bool
tsk_table_collection_has_index(
const tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
return self->indexes.edge_insertion_order != NULL
&& self->indexes.edge_removal_order != NULL
&& self->indexes.num_edges == self->edges.num_rows;
}
bool
tsk_table_collection_has_reference_sequence(const tsk_table_collection_t *self)
{
return !tsk_reference_sequence_is_null(&self->reference_sequence);
}
int
tsk_table_collection_drop_index(
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
tsk_safe_free(self->indexes.edge_insertion_order);
tsk_safe_free(self->indexes.edge_removal_order);
self->indexes.edge_insertion_order = NULL;
self->indexes.edge_removal_order = NULL;
self->indexes.num_edges = 0;
return 0;
}
int TSK_WARN_UNUSED
tsk_table_collection_build_index(
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = TSK_ERR_GENERIC;
tsk_id_t ret_id;
tsk_size_t j;
double *time = self->nodes.time;
index_sort_t *sort_buff = NULL;
tsk_id_t parent;
/* For build indexes to make sense we must have referential integrity and
* sorted edges */
ret_id = tsk_table_collection_check_integrity(self, TSK_CHECK_EDGE_ORDERING);
if (ret_id != 0) {
ret = (int) ret_id;
goto out;
}
tsk_table_collection_drop_index(self, 0);
self->indexes.edge_insertion_order
= tsk_malloc(self->edges.num_rows * sizeof(tsk_id_t));
self->indexes.edge_removal_order
= tsk_malloc(self->edges.num_rows * sizeof(tsk_id_t));
sort_buff = tsk_malloc(self->edges.num_rows * sizeof(index_sort_t));
if (self->indexes.edge_insertion_order == NULL
|| self->indexes.edge_removal_order == NULL || sort_buff == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* sort by left and increasing time to give us the order in which
* records should be inserted */
for (j = 0; j < self->edges.num_rows; j++) {
sort_buff[j].index = (tsk_id_t) j;
sort_buff[j].first = self->edges.left[j];
parent = self->edges.parent[j];
sort_buff[j].second = time[parent];
sort_buff[j].third = parent;
sort_buff[j].fourth = self->edges.child[j];
}
qsort(
sort_buff, (size_t) self->edges.num_rows, sizeof(index_sort_t), cmp_index_sort);
for (j = 0; j < self->edges.num_rows; j++) {
self->indexes.edge_insertion_order[j] = sort_buff[j].index;
}
/* sort by right and decreasing parent time to give us the order in which
* records should be removed. */
for (j = 0; j < self->edges.num_rows; j++) {
sort_buff[j].index = (tsk_id_t) j;
sort_buff[j].first = self->edges.right[j];
parent = self->edges.parent[j];
sort_buff[j].second = -time[parent];
sort_buff[j].third = -parent;
sort_buff[j].fourth = -self->edges.child[j];
}
qsort(
sort_buff, (size_t) self->edges.num_rows, sizeof(index_sort_t), cmp_index_sort);
for (j = 0; j < self->edges.num_rows; j++) {
self->indexes.edge_removal_order[j] = sort_buff[j].index;
}
self->indexes.num_edges = self->edges.num_rows;
ret = 0;
out:
tsk_safe_free(sort_buff);
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_set_file_uuid(tsk_table_collection_t *self, const char *uuid)
{
int ret = 0;
tsk_safe_free(self->file_uuid);
self->file_uuid = NULL;
if (uuid != NULL) {
/* Allow space for \0 so we can print it as a string */
self->file_uuid = tsk_malloc(TSK_UUID_SIZE + 1);
if (self->file_uuid == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(self->file_uuid, uuid, TSK_UUID_SIZE);
self->file_uuid[TSK_UUID_SIZE] = '\0';
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_copy(const tsk_table_collection_t *self,
tsk_table_collection_t *dest, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_table_collection_init(dest, options);
if (ret != 0) {
goto out;
}
}
ret = tsk_node_table_copy(&self->nodes, &dest->nodes, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_copy(&self->edges, &dest->edges, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_copy(&self->migrations, &dest->migrations, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_copy(&self->sites, &dest->sites, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_copy(&self->mutations, &dest->mutations, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_copy(&self->individuals, &dest->individuals, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_copy(&self->populations, &dest->populations, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_copy(&self->provenances, &dest->provenances, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
dest->sequence_length = self->sequence_length;
if (tsk_table_collection_has_index(self, 0)) {
ret = tsk_table_collection_set_indexes(
dest, self->indexes.edge_insertion_order, self->indexes.edge_removal_order);
if (ret != 0) {
goto out;
}
}
ret = tsk_table_collection_set_time_units(
dest, self->time_units, self->time_units_length);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_set_metadata(dest, self->metadata, self->metadata_length);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_set_metadata_schema(
dest, self->metadata_schema, self->metadata_schema_length);
if (ret != 0) {
goto out;
}
ret = tsk_reference_sequence_copy(
&self->reference_sequence, &dest->reference_sequence, options);
if (ret != 0) {
goto out;
}
if (options & TSK_COPY_FILE_UUID) {
/* The UUID should only be generated on writing to a file (see the call
* to generate_uuid in tsk_table_collection_write_format_data) and
* no other writing access is supported. We only read the value from
* the file, and raise an error if it's the wrong length there. Thus,
* finding a UUID value of any other length here is undefined behaviour.
*/
tsk_bug_assert(
self->file_uuid == NULL || strlen(self->file_uuid) == TSK_UUID_SIZE);
ret = tsk_table_collection_set_file_uuid(dest, self->file_uuid);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_read_format_data(tsk_table_collection_t *self, kastore_t *store)
{
int ret = 0;
size_t len;
uint32_t *version = NULL;
int8_t *format_name = NULL;
int8_t *uuid = NULL;
double *L = NULL;
char *time_units = NULL;
char *metadata = NULL;
char *metadata_schema = NULL;
size_t time_units_length, metadata_length, metadata_schema_length;
/* TODO we could simplify this function quite a bit if we use the
* read_table_properties infrastructure. We would need to add the
* ability to have non-optional columns to that though. */
ret = kastore_gets_int8(store, "format/name", &format_name, &len);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (len != TSK_FILE_FORMAT_NAME_LENGTH) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
if (tsk_memcmp(TSK_FILE_FORMAT_NAME, format_name, TSK_FILE_FORMAT_NAME_LENGTH)
!= 0) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
ret = kastore_gets_uint32(store, "format/version", &version, &len);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (len != 2) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
if (version[0] < TSK_FILE_FORMAT_VERSION_MAJOR) {
ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_OLD);
goto out;
}
if (version[0] > TSK_FILE_FORMAT_VERSION_MAJOR) {
ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_NEW);
goto out;
}
ret = kastore_gets_float64(store, "sequence_length", &L, &len);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (len != 1) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
if (L[0] <= 0.0) {
ret = tsk_trace_error(TSK_ERR_BAD_SEQUENCE_LENGTH);
goto out;
}
self->sequence_length = L[0];
ret = kastore_gets_int8(store, "uuid", &uuid, &len);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (len != TSK_UUID_SIZE) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
ret = tsk_table_collection_set_file_uuid(self, (const char *) uuid);
if (ret != 0) {
goto out;
}
ret = kastore_containss(store, "time_units");
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets_int8(
store, "time_units", (int8_t **) &time_units, &time_units_length);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
ret = tsk_table_collection_set_time_units(
self, time_units, (tsk_size_t) time_units_length);
if (ret != 0) {
goto out;
}
}
ret = kastore_containss(store, "metadata");
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets_int8(
store, "metadata", (int8_t **) &metadata, &metadata_length);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
ret = tsk_table_collection_takeset_metadata(
self, metadata, (tsk_size_t) metadata_length);
if (ret != 0) {
goto out;
}
metadata = NULL;
}
ret = kastore_containss(store, "metadata_schema");
if (ret < 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
if (ret == 1) {
ret = kastore_gets_int8(store, "metadata_schema", (int8_t **) &metadata_schema,
(size_t *) &metadata_schema_length);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
ret = tsk_table_collection_set_metadata_schema(
self, metadata_schema, (tsk_size_t) metadata_schema_length);
if (ret != 0) {
goto out;
}
}
out:
if ((ret ^ (1 << TSK_KAS_ERR_BIT)) == KAS_ERR_KEY_NOT_FOUND) {
ret = tsk_trace_error(TSK_ERR_REQUIRED_COL_NOT_FOUND);
}
tsk_safe_free(version);
tsk_safe_free(format_name);
tsk_safe_free(uuid);
tsk_safe_free(L);
tsk_safe_free(time_units);
tsk_safe_free(metadata_schema);
tsk_safe_free(metadata);
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_dump_indexes(const tsk_table_collection_t *self, kastore_t *store,
tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
write_table_col_t cols[] = {
{ "indexes/edge_insertion_order", NULL, self->indexes.num_edges,
TSK_ID_STORAGE_TYPE },
{ "indexes/edge_removal_order", NULL, self->indexes.num_edges,
TSK_ID_STORAGE_TYPE },
{ .name = NULL },
};
if (tsk_table_collection_has_index(self, 0)) {
cols[0].array = self->indexes.edge_insertion_order;
cols[1].array = self->indexes.edge_removal_order;
ret = write_table_cols(store, cols, 0);
}
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_load_indexes(tsk_table_collection_t *self, kastore_t *store)
{
int ret = 0;
tsk_id_t *edge_insertion_order = NULL;
tsk_id_t *edge_removal_order = NULL;
tsk_size_t num_rows;
read_table_col_t cols[] = {
{ "indexes/edge_insertion_order", (void **) &edge_insertion_order,
TSK_ID_STORAGE_TYPE, TSK_COL_OPTIONAL },
{ "indexes/edge_removal_order", (void **) &edge_removal_order,
TSK_ID_STORAGE_TYPE, TSK_COL_OPTIONAL },
{ .name = NULL },
};
num_rows = TSK_NUM_ROWS_UNSET;
ret = read_table_cols(store, &num_rows, cols, 0);
if (ret != 0) {
goto out;
}
if ((edge_insertion_order == NULL) != (edge_removal_order == NULL)) {
ret = tsk_trace_error(TSK_ERR_BOTH_COLUMNS_REQUIRED);
goto out;
}
if (edge_insertion_order != NULL) {
if (num_rows != self->edges.num_rows) {
ret = tsk_trace_error(TSK_ERR_FILE_FORMAT);
goto out;
}
ret = tsk_table_collection_takeset_indexes(
self, edge_insertion_order, edge_removal_order);
if (ret != 0) {
goto out;
}
}
edge_insertion_order = NULL;
edge_removal_order = NULL;
out:
tsk_safe_free(edge_insertion_order);
tsk_safe_free(edge_removal_order);
return ret;
}
static int
tsk_table_collection_load_reference_sequence(
tsk_table_collection_t *self, kastore_t *store)
{
int ret = 0;
char *data = NULL;
char *url = NULL;
char *metadata = NULL;
char *metadata_schema = NULL;
tsk_size_t data_length = 0, url_length, metadata_length, metadata_schema_length;
read_table_property_t properties[] = {
{ "reference_sequence/data", (void **) &data, &data_length, KAS_UINT8,
TSK_COL_OPTIONAL },
{ "reference_sequence/url", (void **) &url, &url_length, KAS_UINT8,
TSK_COL_OPTIONAL },
{ "reference_sequence/metadata", (void **) &metadata, &metadata_length,
KAS_UINT8, TSK_COL_OPTIONAL },
{ "reference_sequence/metadata_schema", (void **) &metadata_schema,
&metadata_schema_length, KAS_UINT8, TSK_COL_OPTIONAL },
{ .name = NULL },
};
ret = read_table_properties(store, properties, 0);
if (ret != 0) {
goto out;
}
if (data != NULL) {
ret = tsk_reference_sequence_takeset_data(
&self->reference_sequence, data, (tsk_size_t) data_length);
if (ret != 0) {
goto out;
}
data = NULL;
}
if (metadata != NULL) {
ret = tsk_reference_sequence_takeset_metadata(
&self->reference_sequence, metadata, (tsk_size_t) metadata_length);
if (ret != 0) {
goto out;
}
metadata = NULL;
}
if (metadata_schema != NULL) {
ret = tsk_reference_sequence_set_metadata_schema(&self->reference_sequence,
metadata_schema, (tsk_size_t) metadata_schema_length);
if (ret != 0) {
goto out;
}
}
if (url != NULL) {
ret = tsk_reference_sequence_set_url(
&self->reference_sequence, url, (tsk_size_t) url_length);
if (ret != 0) {
goto out;
}
}
out:
free_read_table_mem(NULL, NULL, properties);
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_loadf_inited(
tsk_table_collection_t *self, FILE *file, tsk_flags_t options)
{
int ret = 0;
kastore_t store;
int kas_flags = KAS_READ_ALL;
if ((options & TSK_LOAD_SKIP_TABLES)
|| (options & TSK_LOAD_SKIP_REFERENCE_SEQUENCE)) {
kas_flags = 0;
}
kas_flags = kas_flags | KAS_GET_TAKES_OWNERSHIP;
ret = kastore_openf(&store, file, "r", kas_flags);
if (ret != 0) {
if (ret == KAS_ERR_EOF) {
/* KAS_ERR_EOF means that we tried to read a store from the stream
* and we hit EOF immediately without reading any bytes. We signal
* this back to the client, which allows it to read an indefinite
* number of stores from a stream */
ret = tsk_trace_error(TSK_ERR_EOF);
} else {
ret = tsk_set_kas_error(ret);
}
goto out;
}
ret = tsk_table_collection_read_format_data(self, &store);
if (ret != 0) {
goto out;
}
if (!(options & TSK_LOAD_SKIP_TABLES)) {
ret = tsk_node_table_load(&self->nodes, &store);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_load(&self->edges, &store);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_load(&self->sites, &store);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_load(&self->mutations, &store);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_load(&self->migrations, &store);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_load(&self->individuals, &store);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_load(&self->populations, &store);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_load(&self->provenances, &store);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_load_indexes(self, &store);
if (ret != 0) {
goto out;
}
} else {
ret = tsk_table_collection_build_index(self, 0);
if (ret != 0) {
goto out;
}
}
if (!(options & TSK_LOAD_SKIP_REFERENCE_SEQUENCE)) {
ret = tsk_table_collection_load_reference_sequence(self, &store);
if (ret != 0) {
goto out;
}
}
ret = kastore_close(&store);
if (ret != 0) {
goto out;
}
out:
/* If we're exiting on an error, we ignore any further errors that might come
* from kastore. In the nominal case, closing an already-closed store is a
* safe noop */
kastore_close(&store);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_loadf(tsk_table_collection_t *self, FILE *file, tsk_flags_t options)
{
int ret = 0;
if (!(options & TSK_NO_INIT)) {
ret = tsk_table_collection_init(self, options);
if (ret != 0) {
goto out;
}
}
ret = tsk_table_collection_loadf_inited(self, file, options);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_load(
tsk_table_collection_t *self, const char *filename, tsk_flags_t options)
{
int ret = 0;
FILE *file = NULL;
if (!(options & TSK_NO_INIT)) {
ret = tsk_table_collection_init(self, options);
if (ret != 0) {
goto out;
}
}
file = fopen(filename, "rb");
if (file == NULL) {
ret = tsk_trace_error(TSK_ERR_IO);
goto out;
}
ret = tsk_table_collection_loadf_inited(self, file, options);
if (ret != 0) {
goto out;
}
if (fclose(file) != 0) {
ret = tsk_trace_error(TSK_ERR_IO);
goto out;
}
file = NULL;
out:
if (file != NULL) {
/* Ignore any additional errors we might get when closing the file
* in error conditions */
fclose(file);
}
return ret;
}
static int TSK_WARN_UNUSED
tsk_table_collection_dump_reference_sequence(const tsk_table_collection_t *self,
kastore_t *store, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
const tsk_reference_sequence_t *ref = &self->reference_sequence;
write_table_col_t write_cols[] = {
{ "reference_sequence/data", (void *) ref->data, ref->data_length, KAS_UINT8 },
{ "reference_sequence/url", (void *) ref->url, ref->url_length, KAS_UINT8 },
{ "reference_sequence/metadata", (void *) ref->metadata, ref->metadata_length,
KAS_UINT8 },
{ "reference_sequence/metadata_schema", (void *) ref->metadata_schema,
ref->metadata_schema_length, KAS_UINT8 },
{ .name = NULL },
};
if (tsk_table_collection_has_reference_sequence(self)) {
ret = write_table_cols(store, write_cols, 0);
}
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_dump(
const tsk_table_collection_t *self, const char *filename, tsk_flags_t options)
{
int ret = 0;
FILE *file = fopen(filename, "wb");
if (file == NULL) {
ret = tsk_trace_error(TSK_ERR_IO);
goto out;
}
ret = tsk_table_collection_dumpf(self, file, options);
if (ret != 0) {
goto out;
}
if (fclose(file) != 0) {
ret = tsk_trace_error(TSK_ERR_IO);
goto out;
}
file = NULL;
out:
if (file != NULL) {
/* Ignore any additional errors we might get when closing the file
* in error conditions */
fclose(file);
/* If an error occurred make sure that the filename is removed */
remove(filename);
}
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_dumpf(
const tsk_table_collection_t *self, FILE *file, tsk_flags_t options)
{
int ret = 0;
kastore_t store;
char uuid[TSK_UUID_SIZE + 1]; // Must include space for trailing null.
write_table_col_t format_columns[] = {
{ "format/name", (const void *) &TSK_FILE_FORMAT_NAME,
TSK_FILE_FORMAT_NAME_LENGTH, KAS_INT8 },
{ "format/version",
(const void *) &(uint32_t[]) {
TSK_FILE_FORMAT_VERSION_MAJOR, TSK_FILE_FORMAT_VERSION_MINOR },
2, KAS_UINT32 },
{ "sequence_length", (const void *) &self->sequence_length, 1, KAS_FLOAT64 },
{ "uuid", (void *) uuid, TSK_UUID_SIZE, KAS_INT8 },
{ "time_units", (void *) self->time_units, self->time_units_length, KAS_INT8 },
{ "metadata", (void *) self->metadata, self->metadata_length, KAS_INT8 },
{ "metadata_schema", (void *) self->metadata_schema,
self->metadata_schema_length, KAS_INT8 },
{ .name = NULL },
};
tsk_memset(&store, 0, sizeof(store));
ret = kastore_openf(&store, file, "w", 0);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
/* Write format data */
ret = tsk_generate_uuid(uuid, 0);
if (ret != 0) {
goto out;
}
ret = write_table_cols(&store, format_columns, options);
if (ret != 0) {
goto out;
}
/* All of these functions will set the kas_error internally, so we don't have
* to modify the return value. */
ret = tsk_node_table_dump(&self->nodes, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_dump(&self->edges, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_dump(&self->sites, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_dump(&self->migrations, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_dump(&self->mutations, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_dump(&self->individuals, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_dump(&self->populations, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_dump(&self->provenances, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_dump_indexes(self, &store, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_dump_reference_sequence(self, &store, options);
if (ret != 0) {
goto out;
}
ret = kastore_close(&store);
if (ret != 0) {
ret = tsk_set_kas_error(ret);
goto out;
}
out:
/* It's safe to close a kastore twice. */
if (ret != 0) {
kastore_close(&store);
}
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_simplify(tsk_table_collection_t *self, const tsk_id_t *samples,
tsk_size_t num_samples, tsk_flags_t options, tsk_id_t *node_map)
{
int ret = 0;
simplifier_t simplifier;
tsk_id_t *local_samples = NULL;
tsk_id_t u;
/* Avoid calling to simplifier_free with uninit'd memory on error branches */
tsk_memset(&simplifier, 0, sizeof(simplifier_t));
if ((options & TSK_SIMPLIFY_KEEP_UNARY)
&& (options & TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS)) {
ret = tsk_trace_error(TSK_ERR_KEEP_UNARY_MUTUALLY_EXCLUSIVE);
goto out;
}
/* For now we don't bother with edge metadata, but it can easily be
* implemented. */
if (self->edges.metadata_length > 0) {
ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
goto out;
}
if (samples == NULL) {
local_samples = tsk_malloc(self->nodes.num_rows * sizeof(*local_samples));
if (local_samples == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
num_samples = 0;
for (u = 0; u < (tsk_id_t) self->nodes.num_rows; u++) {
if (!!(self->nodes.flags[u] & TSK_NODE_IS_SAMPLE)) {
local_samples[num_samples] = u;
num_samples++;
}
}
samples = local_samples;
}
ret = simplifier_init(&simplifier, samples, num_samples, self, options);
if (ret != 0) {
goto out;
}
ret = simplifier_run(&simplifier, node_map);
if (ret != 0) {
goto out;
}
if (!!(options & TSK_DEBUG)) {
simplifier_print_state(&simplifier, tsk_get_debug_stream());
}
/* The indexes are invalidated now so drop them */
ret = tsk_table_collection_drop_index(self, 0);
out:
simplifier_free(&simplifier);
tsk_safe_free(local_samples);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_link_ancestors(tsk_table_collection_t *self, tsk_id_t *samples,
tsk_size_t num_samples, tsk_id_t *ancestors, tsk_size_t num_ancestors,
tsk_flags_t TSK_UNUSED(options), tsk_edge_table_t *result)
{
int ret = 0;
ancestor_mapper_t ancestor_mapper;
tsk_memset(&ancestor_mapper, 0, sizeof(ancestor_mapper_t));
if (self->edges.metadata_length > 0) {
ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
goto out;
}
ret = ancestor_mapper_init(
&ancestor_mapper, samples, num_samples, ancestors, num_ancestors, self, result);
if (ret != 0) {
goto out;
}
ret = ancestor_mapper_run(&ancestor_mapper);
if (ret != 0) {
goto out;
}
out:
ancestor_mapper_free(&ancestor_mapper);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_ibd_within(const tsk_table_collection_t *self,
tsk_identity_segments_t *result, const tsk_id_t *samples, tsk_size_t num_samples,
double min_span, double max_time, tsk_flags_t options)
{
int ret = 0;
tsk_ibd_finder_t ibd_finder;
ret = tsk_identity_segments_init(result, self->nodes.num_rows, options);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_span, max_time);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_init_within(&ibd_finder, samples, num_samples);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_run(&ibd_finder);
if (ret != 0) {
goto out;
}
if (!!(options & TSK_DEBUG)) {
tsk_ibd_finder_print_state(&ibd_finder, tsk_get_debug_stream());
}
out:
tsk_ibd_finder_free(&ibd_finder);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_ibd_between(const tsk_table_collection_t *self,
tsk_identity_segments_t *result, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_span,
double max_time, tsk_flags_t options)
{
int ret = 0;
tsk_ibd_finder_t ibd_finder;
ret = tsk_identity_segments_init(result, self->nodes.num_rows, options);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_init(&ibd_finder, self, result, min_span, max_time);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_init_between(
&ibd_finder, num_sample_sets, sample_set_sizes, sample_sets);
if (ret != 0) {
goto out;
}
ret = tsk_ibd_finder_run(&ibd_finder);
if (ret != 0) {
goto out;
}
if (!!(options & TSK_DEBUG)) {
tsk_ibd_finder_print_state(&ibd_finder, tsk_get_debug_stream());
}
out:
tsk_ibd_finder_free(&ibd_finder);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_sort(
tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options)
{
int ret = 0;
tsk_table_sorter_t sorter;
ret = tsk_table_sorter_init(&sorter, self, options);
if (ret != 0) {
goto out;
}
ret = tsk_table_sorter_run(&sorter, start);
if (ret != 0) {
goto out;
}
out:
tsk_table_sorter_free(&sorter);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_canonicalise(tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_id_t k;
tsk_id_t *nodes = NULL;
tsk_table_sorter_t sorter;
tsk_flags_t subset_options = options & TSK_SUBSET_KEEP_UNREFERENCED;
ret = tsk_table_sorter_init(&sorter, self, 0);
if (ret != 0) {
goto out;
}
sorter.sort_mutations = tsk_table_sorter_sort_mutations;
sorter.sort_individuals = tsk_table_sorter_sort_individuals_canonical;
nodes = tsk_malloc(self->nodes.num_rows * sizeof(*nodes));
if (nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (k = 0; k < (tsk_id_t) self->nodes.num_rows; k++) {
nodes[k] = k;
}
ret = tsk_table_collection_subset(self, nodes, self->nodes.num_rows, subset_options);
if (ret != 0) {
goto out;
}
ret = tsk_table_sorter_run(&sorter, NULL);
if (ret != 0) {
goto out;
}
out:
tsk_safe_free(nodes);
tsk_table_sorter_free(&sorter);
return ret;
}
/*
* Remove any sites with duplicate positions, retaining only the *first*
* one. Assumes the tables have been sorted, throwing an error if not.
*/
int TSK_WARN_UNUSED
tsk_table_collection_deduplicate_sites(
tsk_table_collection_t *self, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t ret_id;
tsk_size_t j;
/* Map of old site IDs to new site IDs. */
tsk_id_t *site_id_map = NULL;
tsk_site_table_t copy;
tsk_site_t row, last_row;
/* Early exit if there's 0 rows. We don't exit early for one row because
* we would then skip error checking, making the semantics inconsistent. */
if (self->sites.num_rows == 0) {
return 0;
}
/* Must allocate the site table first for tsk_site_table_free to be safe */
ret = tsk_site_table_copy(&self->sites, ©, 0);
if (ret != 0) {
goto out;
}
ret_id = tsk_table_collection_check_integrity(self, TSK_CHECK_SITE_ORDERING);
if (ret_id != 0) {
ret = (int) ret_id;
goto out;
}
site_id_map = tsk_malloc(copy.num_rows * sizeof(*site_id_map));
if (site_id_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_site_table_clear(&self->sites);
if (ret != 0) {
goto out;
}
last_row.position = -1;
site_id_map[0] = 0;
for (j = 0; j < copy.num_rows; j++) {
tsk_site_table_get_row_unsafe(©, (tsk_id_t) j, &row);
if (row.position != last_row.position) {
ret_id
= tsk_site_table_add_row(&self->sites, row.position, row.ancestral_state,
row.ancestral_state_length, row.metadata, row.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
site_id_map[j] = (tsk_id_t) self->sites.num_rows - 1;
last_row = row;
}
if (self->sites.num_rows < copy.num_rows) {
// Remap sites in the mutation table
// (but only if there's been any changed sites)
for (j = 0; j < self->mutations.num_rows; j++) {
self->mutations.site[j] = site_id_map[self->mutations.site[j]];
}
}
ret = 0;
out:
tsk_site_table_free(©);
tsk_safe_free(site_id_map);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_compute_mutation_parents(
tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
tsk_mutation_table_t *mutations = &self->mutations;
tsk_id_t *parent_backup = NULL;
bool restore_parents = false;
if (!(options & TSK_NO_CHECK_INTEGRITY)) {
if (mutations->num_rows > 0) {
/* We need to wipe the parent column before computing, as otherwise invalid
* parents can cause integrity checks to fail. We take a copy to restore on
* error */
parent_backup = tsk_malloc(mutations->num_rows * sizeof(*parent_backup));
if (parent_backup == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memcpy(parent_backup, mutations->parent,
mutations->num_rows * sizeof(*parent_backup));
/* Set the parent pointers to TSK_NULL */
tsk_memset(mutations->parent, 0xff,
mutations->num_rows * sizeof(*mutations->parent));
restore_parents = true;
}
/* Safe to cast here as we're not counting trees */
ret = (int) tsk_table_collection_check_integrity(self, TSK_CHECK_TREES);
if (ret < 0) {
goto out;
}
}
ret = tsk_table_collection_compute_mutation_parents_to_array(
self, self->mutations.parent);
if (ret != 0) {
goto out;
}
out:
if (ret != 0 && restore_parents) {
tsk_memcpy(mutations->parent, parent_backup,
mutations->num_rows * sizeof(*parent_backup));
}
tsk_safe_free(parent_backup);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_compute_mutation_times(
tsk_table_collection_t *self, double *random, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_id_t num_trees;
const tsk_id_t *restrict I = self->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->indexes.edge_removal_order;
const tsk_edge_table_t edges = self->edges;
const tsk_node_table_t nodes = self->nodes;
const tsk_site_table_t sites = self->sites;
const tsk_mutation_table_t mutations = self->mutations;
const tsk_id_t M = (tsk_id_t) edges.num_rows;
tsk_id_t tj, tk;
tsk_id_t *parent = NULL;
double *numerator = NULL;
double *denominator = NULL;
tsk_id_t u;
double left, right, parent_time;
tsk_id_t site;
/* Using unsigned values here avoids potentially undefined behaviour */
tsk_size_t j, mutation, first_mutation;
tsk_bookmark_t skip_edges = { 0, 0, self->edges.num_rows, 0, 0, 0, 0, 0 };
/* The random param is for future usage */
if (random != NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
/* First set the times to TSK_UNKNOWN_TIME so that check will succeed */
for (j = 0; j < mutations.num_rows; j++) {
mutations.time[j] = TSK_UNKNOWN_TIME;
}
/* TSK_CHECK_MUTATION_PARENTS isn't needed here as we're not using the parents */
num_trees = tsk_table_collection_check_integrity(self, TSK_CHECK_TREES);
if (num_trees < 0) {
ret = (int) num_trees;
goto out;
}
parent = tsk_malloc(nodes.num_rows * sizeof(*parent));
numerator = tsk_malloc(nodes.num_rows * sizeof(*numerator));
denominator = tsk_malloc(nodes.num_rows * sizeof(*denominator));
if (parent == NULL || numerator == NULL || denominator == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, nodes.num_rows * sizeof(*parent));
tsk_memset(numerator, 0, nodes.num_rows * sizeof(*numerator));
tsk_memset(denominator, 0, nodes.num_rows * sizeof(*denominator));
tj = 0;
tk = 0;
site = 0;
mutation = 0;
left = 0;
while (tj < M || left < self->sequence_length) {
while (tk < M && edges.right[O[tk]] == left) {
parent[edges.child[O[tk]]] = TSK_NULL;
tk++;
}
while (tj < M && edges.left[I[tj]] == left) {
parent[edges.child[I[tj]]] = edges.parent[I[tj]];
tj++;
}
right = self->sequence_length;
if (tj < M) {
right = TSK_MIN(right, edges.left[I[tj]]);
}
if (tk < M) {
right = TSK_MIN(right, edges.right[O[tk]]);
}
/* Tree is now ready. We look at each site on this tree in turn */
while (site < (tsk_id_t) sites.num_rows && sites.position[site] < right) {
first_mutation = mutation;
/* Count how many mutations each edge has to get our
denominator */
while (mutation < mutations.num_rows && mutations.site[mutation] == site) {
denominator[mutations.node[mutation]]++;
mutation++;
}
/* Go over the mutations again assigning times. As the sorting
requirements guarantee that parents are before children, we assign
oldest first */
for (j = first_mutation; j < mutation; j++) {
u = mutations.node[j];
numerator[u]++;
if (parent[u] == TSK_NULL) {
/* This mutation is above a root */
mutations.time[j] = nodes.time[u];
} else {
parent_time = nodes.time[parent[u]];
mutations.time[j] = parent_time
- (parent_time - nodes.time[u]) * numerator[u]
/ (denominator[u] + 1);
}
}
/* Reset the book-keeping for the next site */
for (j = first_mutation; j < mutation; j++) {
u = mutations.node[j];
numerator[u] = 0;
denominator[u] = 0;
}
site++;
}
/* Move on to the next tree */
left = right;
}
/* Now that mutations have times their sort order may have been invalidated, so
* re-sort. Safe to cast the result to an int here because we're not counting
* trees. */
ret = (int) tsk_table_collection_check_integrity(self, TSK_CHECK_MUTATION_ORDERING);
if (ret == TSK_ERR_UNSORTED_MUTATIONS) {
ret = tsk_table_collection_sort(self, &skip_edges, 0);
if (ret != 0) {
goto out;
}
} else if (ret < 0) {
goto out;
}
out:
tsk_safe_free(parent);
tsk_safe_free(numerator);
tsk_safe_free(denominator);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_delete_older(
tsk_table_collection_t *self, double time, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_edge_t edge;
tsk_mutation_t mutation;
tsk_migration_t migration;
tsk_edge_table_t edges;
tsk_mutation_table_t mutations;
tsk_migration_table_t migrations;
const double *restrict node_time = self->nodes.time;
tsk_id_t j, ret_id, parent;
double mutation_time;
tsk_id_t *mutation_map = NULL;
memset(&edges, 0, sizeof(edges));
memset(&mutations, 0, sizeof(mutations));
memset(&migrations, 0, sizeof(migrations));
ret = tsk_edge_table_copy(&self->edges, &edges, 0);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_clear(&self->edges);
if (ret != 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) edges.num_rows; j++) {
tsk_edge_table_get_row_unsafe(&edges, j, &edge);
if (node_time[edge.parent] <= time) {
ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,
edge.parent, edge.child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
/* Calling x_table_free multiple times is safe, so get rid of the
* extra edge table memory as soon as we can. */
tsk_edge_table_free(&edges);
mutation_map = tsk_malloc(self->mutations.num_rows * sizeof(*mutation_map));
if (mutation_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_mutation_table_copy(&self->mutations, &mutations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_clear(&self->mutations);
if (ret != 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) mutations.num_rows; j++) {
tsk_mutation_table_get_row_unsafe(&mutations, j, &mutation);
mutation_time = tsk_is_unknown_time(mutation.time) ? node_time[mutation.node]
: mutation.time;
mutation_map[j] = TSK_NULL;
if (mutation_time < time) {
ret_id = tsk_mutation_table_add_row(&self->mutations, mutation.site,
mutation.node, mutation.parent, mutation.time, mutation.derived_state,
mutation.derived_state_length, mutation.metadata,
mutation.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
mutation_map[j] = ret_id;
}
}
tsk_mutation_table_free(&mutations);
for (j = 0; j < (tsk_id_t) self->mutations.num_rows; j++) {
parent = self->mutations.parent[j];
if (parent != TSK_NULL) {
self->mutations.parent[j] = mutation_map[parent];
}
}
ret = tsk_migration_table_copy(&self->migrations, &migrations, 0);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_clear(&self->migrations);
if (ret != 0) {
goto out;
}
for (j = 0; j < (tsk_id_t) migrations.num_rows; j++) {
tsk_migration_table_get_row_unsafe(&migrations, j, &migration);
if (migration.time < time) {
ret_id = tsk_migration_table_add_row(&self->migrations, migration.left,
migration.right, migration.node, migration.source, migration.dest,
migration.time, migration.metadata, migration.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
tsk_migration_table_free(&migrations);
out:
tsk_edge_table_free(&edges);
tsk_mutation_table_free(&mutations);
tsk_migration_table_free(&migrations);
tsk_safe_free(mutation_map);
return ret;
}
int
tsk_table_collection_record_num_rows(
const tsk_table_collection_t *self, tsk_bookmark_t *position)
{
position->individuals = self->individuals.num_rows;
position->nodes = self->nodes.num_rows;
position->edges = self->edges.num_rows;
position->migrations = self->migrations.num_rows;
position->sites = self->sites.num_rows;
position->mutations = self->mutations.num_rows;
position->populations = self->populations.num_rows;
position->provenances = self->provenances.num_rows;
return 0;
}
int TSK_WARN_UNUSED
tsk_table_collection_truncate(tsk_table_collection_t *tables, tsk_bookmark_t *position)
{
int ret = 0;
ret = tsk_table_collection_drop_index(tables, 0);
if (ret != 0) {
goto out;
}
ret = tsk_individual_table_truncate(&tables->individuals, position->individuals);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_truncate(&tables->nodes, position->nodes);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_truncate(&tables->edges, position->edges);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_truncate(&tables->migrations, position->migrations);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_truncate(&tables->sites, position->sites);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_truncate(&tables->mutations, position->mutations);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_truncate(&tables->populations, position->populations);
if (ret != 0) {
goto out;
}
ret = tsk_provenance_table_truncate(&tables->provenances, position->provenances);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_clear(tsk_table_collection_t *self, tsk_flags_t options)
{
int ret = 0;
bool clear_provenance = !!(options & TSK_CLEAR_PROVENANCE);
bool clear_metadata_schemas = !!(options & TSK_CLEAR_METADATA_SCHEMAS);
bool clear_ts_metadata = !!(options & TSK_CLEAR_TS_METADATA_AND_SCHEMA);
tsk_bookmark_t rows_to_retain
= { .provenances = clear_provenance ? 0 : self->provenances.num_rows };
ret = tsk_table_collection_truncate(self, &rows_to_retain);
if (ret != 0) {
goto out;
}
if (clear_metadata_schemas) {
ret = tsk_individual_table_set_metadata_schema(&self->individuals, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_node_table_set_metadata_schema(&self->nodes, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_edge_table_set_metadata_schema(&self->edges, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_migration_table_set_metadata_schema(&self->migrations, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_site_table_set_metadata_schema(&self->sites, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_set_metadata_schema(&self->mutations, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_population_table_set_metadata_schema(&self->populations, "", 0);
if (ret != 0) {
goto out;
}
}
if (clear_ts_metadata) {
ret = tsk_table_collection_set_metadata(self, "", 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_set_metadata_schema(self, "", 0);
if (ret != 0) {
goto out;
}
}
out:
return ret;
}
static int
tsk_table_collection_add_and_remap_node(tsk_table_collection_t *self,
const tsk_table_collection_t *other, tsk_id_t node_id, tsk_id_t *individual_map,
tsk_id_t *population_map, tsk_id_t *node_map, bool add_populations)
{
int ret = 0;
tsk_id_t ret_id, new_ind, new_pop;
tsk_node_t node;
tsk_individual_t ind;
tsk_population_t pop;
ret = tsk_node_table_get_row(&other->nodes, node_id, &node);
if (ret < 0) {
goto out;
}
new_ind = TSK_NULL;
if (node.individual != TSK_NULL) {
if (individual_map[node.individual] == TSK_NULL) {
ret = tsk_individual_table_get_row(
&other->individuals, node.individual, &ind);
if (ret < 0) {
goto out;
}
ret_id = tsk_individual_table_add_row(&self->individuals, ind.flags,
ind.location, ind.location_length, ind.parents, ind.parents_length,
ind.metadata, ind.metadata_length);
if (ret < 0) {
ret = (int) ret_id;
goto out;
}
individual_map[node.individual] = ret_id;
}
new_ind = individual_map[node.individual];
}
new_pop = TSK_NULL;
if (node.population != TSK_NULL) {
// keep same pops if add_populations is False
if (!add_populations) {
population_map[node.population] = node.population;
}
if (population_map[node.population] == TSK_NULL) {
ret = tsk_population_table_get_row(
&other->populations, node.population, &pop);
if (ret < 0) {
goto out;
}
ret_id = tsk_population_table_add_row(
&self->populations, pop.metadata, pop.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
population_map[node.population] = ret_id;
}
new_pop = population_map[node.population];
}
ret_id = tsk_node_table_add_row(&self->nodes, node.flags, node.time, new_pop,
new_ind, node.metadata, node.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
node_map[node.id] = ret_id;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_subset(tsk_table_collection_t *self, const tsk_id_t *nodes,
tsk_size_t num_nodes, tsk_flags_t options)
{
int ret = 0;
tsk_id_t ret_id, j, k, parent_ind, new_parent, new_child, new_node, site_id;
tsk_size_t num_parents;
tsk_individual_t ind;
tsk_edge_t edge;
tsk_id_t *node_map = NULL;
tsk_id_t *individual_map = NULL;
tsk_id_t *population_map = NULL;
tsk_id_t *site_map = NULL;
tsk_id_t *mutation_map = NULL;
tsk_table_collection_t tables;
tsk_population_t pop;
tsk_site_t site;
tsk_mutation_t mut;
bool keep_unreferenced = !!(options & TSK_SUBSET_KEEP_UNREFERENCED);
bool no_change_populations = !!(options & TSK_SUBSET_NO_CHANGE_POPULATIONS);
ret = tsk_table_collection_copy(self, &tables, 0);
if (ret != 0) {
goto out;
}
/* Not calling TSK_CHECK_TREES so casting to int is safe */
ret = (int) tsk_table_collection_check_integrity(self, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_clear(self, 0);
if (ret != 0) {
goto out;
}
node_map = tsk_malloc(tables.nodes.num_rows * sizeof(*node_map));
individual_map = tsk_malloc(tables.individuals.num_rows * sizeof(*individual_map));
population_map = tsk_malloc(tables.populations.num_rows * sizeof(*population_map));
site_map = tsk_malloc(tables.sites.num_rows * sizeof(*site_map));
mutation_map = tsk_malloc(tables.mutations.num_rows * sizeof(*mutation_map));
if (node_map == NULL || individual_map == NULL || population_map == NULL
|| site_map == NULL || mutation_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(node_map, 0xff, tables.nodes.num_rows * sizeof(*node_map));
tsk_memset(
individual_map, 0xff, tables.individuals.num_rows * sizeof(*individual_map));
tsk_memset(
population_map, 0xff, tables.populations.num_rows * sizeof(*population_map));
tsk_memset(site_map, 0xff, tables.sites.num_rows * sizeof(*site_map));
tsk_memset(mutation_map, 0xff, tables.mutations.num_rows * sizeof(*mutation_map));
if (no_change_populations) {
ret = tsk_population_table_copy(
&tables.populations, &self->populations, TSK_NO_INIT);
if (ret < 0) {
goto out;
}
for (k = 0; k < (tsk_id_t) tables.populations.num_rows; k++) {
population_map[k] = k;
}
}
// First do individuals so they stay in the same order.
// So we can remap individual parents and not rely on sortedness,
// we first check who to keep; then build the individual map, and
// finally populate the tables.
if (keep_unreferenced) {
for (k = 0; k < (tsk_id_t) tables.individuals.num_rows; k++) {
// put a non-NULL value here; fill in the actual order next
individual_map[k] = 0;
}
} else {
for (k = 0; k < (tsk_id_t) num_nodes; k++) {
if (nodes[k] < 0 || nodes[k] >= (tsk_id_t) tables.nodes.num_rows) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
j = tables.nodes.individual[nodes[k]];
if (j != TSK_NULL) {
individual_map[j] = 0;
}
}
}
j = 0;
for (k = 0; k < (tsk_id_t) tables.individuals.num_rows; k++) {
if (individual_map[k] != TSK_NULL) {
individual_map[k] = j;
j++;
}
}
for (k = 0; k < (tsk_id_t) tables.individuals.num_rows; k++) {
if (individual_map[k] != TSK_NULL) {
tsk_individual_table_get_row_unsafe(&tables.individuals, k, &ind);
num_parents = 0;
for (j = 0; j < (tsk_id_t) ind.parents_length; j++) {
parent_ind = ind.parents[j];
new_parent = parent_ind;
if (parent_ind != TSK_NULL) {
new_parent = individual_map[parent_ind];
}
if ((parent_ind == TSK_NULL) || (new_parent != TSK_NULL)) {
/* Beware: this modifies the parents column of tables.individuals
* in-place! But it's OK as we don't use it again. */
ind.parents[num_parents] = new_parent;
num_parents++;
}
}
ret_id = tsk_individual_table_add_row(&self->individuals, ind.flags,
ind.location, ind.location_length, ind.parents, num_parents,
ind.metadata, ind.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
tsk_bug_assert(individual_map[k] == ret_id);
}
}
// Nodes and populations
for (k = 0; k < (tsk_id_t) num_nodes; k++) {
ret = tsk_table_collection_add_and_remap_node(
self, &tables, nodes[k], individual_map, population_map, node_map, true);
if (ret < 0) {
goto out;
}
}
/* TODO: Subset the migrations table. We would need to make sure
* that we don't remove populations that are referenced, so it would
* need to be done before the next code block. */
if (tables.migrations.num_rows != 0) {
ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
goto out;
}
if (keep_unreferenced) {
// Keep unused populations
for (k = 0; k < (tsk_id_t) tables.populations.num_rows; k++) {
if (population_map[k] == TSK_NULL) {
tsk_population_table_get_row_unsafe(&tables.populations, k, &pop);
ret_id = tsk_population_table_add_row(
&self->populations, pop.metadata, pop.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
}
// Edges
for (k = 0; k < (tsk_id_t) tables.edges.num_rows; k++) {
tsk_edge_table_get_row_unsafe(&tables.edges, k, &edge);
new_parent = node_map[edge.parent];
new_child = node_map[edge.child];
if ((new_parent != TSK_NULL) && (new_child != TSK_NULL)) {
ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,
new_parent, new_child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
// Mutations and sites
// Make a first pass through to build the mutation_map so that
// mutation parent can be remapped even if the table is not in order.
j = 0;
for (k = 0; k < (tsk_id_t) tables.mutations.num_rows; k++) {
if (node_map[tables.mutations.node[k]] != TSK_NULL) {
mutation_map[k] = j;
j++;
site_id = tables.mutations.site[k];
if (site_map[site_id] == TSK_NULL) {
// Insert a temporary non-NULL value
site_map[site_id] = 1;
}
}
}
// Keep retained sites in their original order
j = 0;
for (k = 0; k < (tsk_id_t) tables.sites.num_rows; k++) {
if (keep_unreferenced || site_map[k] != TSK_NULL) {
tsk_site_table_get_row_unsafe(&tables.sites, k, &site);
ret_id = tsk_site_table_add_row(&self->sites, site.position,
site.ancestral_state, site.ancestral_state_length, site.metadata,
site.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
site_map[k] = j;
j++;
}
}
for (k = 0; k < (tsk_id_t) tables.mutations.num_rows; k++) {
tsk_mutation_table_get_row_unsafe(&tables.mutations, k, &mut);
new_node = node_map[mut.node];
if (new_node != TSK_NULL) {
new_parent = TSK_NULL;
if (mut.parent != TSK_NULL) {
new_parent = mutation_map[mut.parent];
}
ret_id = tsk_mutation_table_add_row(&self->mutations, site_map[mut.site],
new_node, new_parent, mut.time, mut.derived_state,
mut.derived_state_length, mut.metadata, mut.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
tsk_bug_assert(mutation_map[mut.id] == ret_id);
}
if (ret < 0) {
goto out;
}
}
ret = 0;
out:
tsk_safe_free(node_map);
tsk_safe_free(individual_map);
tsk_safe_free(population_map);
tsk_safe_free(site_map);
tsk_safe_free(mutation_map);
tsk_table_collection_free(&tables);
return ret;
}
static int
tsk_check_subset_equality(tsk_table_collection_t *self,
const tsk_table_collection_t *other, const tsk_id_t *other_node_mapping,
tsk_size_t num_shared_nodes)
{
int ret = 0;
tsk_id_t k, i;
tsk_id_t *self_nodes = NULL;
tsk_id_t *other_nodes = NULL;
tsk_table_collection_t self_copy;
tsk_table_collection_t other_copy;
tsk_memset(&self_copy, 0, sizeof(self_copy));
tsk_memset(&other_copy, 0, sizeof(other_copy));
self_nodes = tsk_malloc(num_shared_nodes * sizeof(*self_nodes));
other_nodes = tsk_malloc(num_shared_nodes * sizeof(*other_nodes));
if (self_nodes == NULL || other_nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
i = 0;
for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {
if (other_node_mapping[k] != TSK_NULL) {
self_nodes[i] = other_node_mapping[k];
other_nodes[i] = k;
i++;
}
}
ret = tsk_table_collection_copy(self, &self_copy, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_copy(other, &other_copy, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_subset(&self_copy, self_nodes, num_shared_nodes, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_subset(&other_copy, other_nodes, num_shared_nodes, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_canonicalise(&self_copy, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_canonicalise(&other_copy, 0);
if (ret != 0) {
goto out;
}
if (!tsk_table_collection_equals(&self_copy, &other_copy,
TSK_CMP_IGNORE_TS_METADATA | TSK_CMP_IGNORE_PROVENANCE
| TSK_CMP_IGNORE_REFERENCE_SEQUENCE)) {
ret = tsk_trace_error(TSK_ERR_UNION_DIFF_HISTORIES);
goto out;
}
out:
tsk_table_collection_free(&self_copy);
tsk_table_collection_free(&other_copy);
tsk_safe_free(other_nodes);
tsk_safe_free(self_nodes);
return ret;
}
int TSK_WARN_UNUSED
tsk_table_collection_union(tsk_table_collection_t *self,
const tsk_table_collection_t *other, const tsk_id_t *other_node_mapping,
tsk_flags_t options)
{
int ret = 0;
tsk_id_t ret_id, k, i, new_parent, new_child;
tsk_size_t num_shared_nodes = 0;
tsk_size_t num_individuals_self = self->individuals.num_rows;
tsk_edge_t edge;
tsk_mutation_t mut;
tsk_site_t site;
tsk_id_t *node_map = NULL;
tsk_id_t *individual_map = NULL;
tsk_id_t *population_map = NULL;
tsk_id_t *site_map = NULL;
bool add_populations = !(options & TSK_UNION_NO_ADD_POP);
bool check_shared_portion = !(options & TSK_UNION_NO_CHECK_SHARED);
bool all_edges = !!(options & TSK_UNION_ALL_EDGES);
bool all_mutations = !!(options & TSK_UNION_ALL_MUTATIONS);
/* Not calling TSK_CHECK_TREES so casting to int is safe */
ret = (int) tsk_table_collection_check_integrity(self, 0);
if (ret != 0) {
goto out;
}
ret = (int) tsk_table_collection_check_integrity(other, 0);
if (ret != 0) {
goto out;
}
for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {
if (other_node_mapping[k] >= (tsk_id_t) self->nodes.num_rows
|| other_node_mapping[k] < TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_UNION_BAD_MAP);
goto out;
}
if (other_node_mapping[k] != TSK_NULL) {
num_shared_nodes++;
}
}
if (check_shared_portion) {
ret = tsk_check_subset_equality(
self, other, other_node_mapping, num_shared_nodes);
if (ret != 0) {
goto out;
}
}
// Maps relating the IDs in other to the new IDs in self.
node_map = tsk_malloc(other->nodes.num_rows * sizeof(*node_map));
individual_map = tsk_malloc(other->individuals.num_rows * sizeof(*individual_map));
population_map = tsk_malloc(other->populations.num_rows * sizeof(*population_map));
site_map = tsk_malloc(other->sites.num_rows * sizeof(*site_map));
if (node_map == NULL || individual_map == NULL || population_map == NULL
|| site_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(node_map, 0xff, other->nodes.num_rows * sizeof(*node_map));
tsk_memset(
individual_map, 0xff, other->individuals.num_rows * sizeof(*individual_map));
tsk_memset(
population_map, 0xff, other->populations.num_rows * sizeof(*population_map));
tsk_memset(site_map, 0xff, other->sites.num_rows * sizeof(*site_map));
/* We have to map the individuals who are linked to nodes in the intersection first
as otherwise an individual linked to one node in the intersection and one in
`other` would be duplicated. We assume that the individual in `self` takes
priority.
*/
for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {
if (other_node_mapping[k] != TSK_NULL
&& other->nodes.individual[k] != TSK_NULL) {
individual_map[other->nodes.individual[k]]
= self->nodes.individual[other_node_mapping[k]];
}
}
// nodes, individuals, populations
for (k = 0; k < (tsk_id_t) other->nodes.num_rows; k++) {
if (other_node_mapping[k] != TSK_NULL) {
node_map[k] = other_node_mapping[k];
} else {
ret = tsk_table_collection_add_and_remap_node(self, other, k, individual_map,
population_map, node_map, add_populations);
if (ret < 0) {
goto out;
}
}
}
/* Now we know the full individual map we can remap the parents of the new
* individuals*/
for (k = (tsk_id_t) self->individuals.parents_offset[num_individuals_self];
k < (tsk_id_t) self->individuals.parents_length; k++) {
if (self->individuals.parents[k] != TSK_NULL) {
self->individuals.parents[k] = individual_map[self->individuals.parents[k]];
}
}
// edges
for (k = 0; k < (tsk_id_t) other->edges.num_rows; k++) {
tsk_edge_table_get_row_unsafe(&other->edges, k, &edge);
if (all_edges || (other_node_mapping[edge.parent] == TSK_NULL)
|| (other_node_mapping[edge.child] == TSK_NULL)) {
new_parent = node_map[edge.parent];
new_child = node_map[edge.child];
ret_id = tsk_edge_table_add_row(&self->edges, edge.left, edge.right,
new_parent, new_child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
}
// sites
// first do the "disjoint" (all_mutations) case, where we just add all sites;
// otherwise we want to just add sites for new mutations
if (all_mutations) {
for (k = 0; k < (tsk_id_t) other->sites.num_rows; k++) {
tsk_site_table_get_row_unsafe(&other->sites, k, &site);
ret_id = tsk_site_table_add_row(&self->sites, site.position,
site.ancestral_state, site.ancestral_state_length, site.metadata,
site.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
site_map[site.id] = ret_id;
}
}
// mutations (and maybe sites)
i = 0;
for (k = 0; k < (tsk_id_t) other->sites.num_rows; k++) {
tsk_site_table_get_row_unsafe(&other->sites, k, &site);
while ((i < (tsk_id_t) other->mutations.num_rows)
&& (other->mutations.site[i] == site.id)) {
tsk_mutation_table_get_row_unsafe(&other->mutations, i, &mut);
if (all_mutations || (other_node_mapping[mut.node] == TSK_NULL)) {
if (site_map[site.id] == TSK_NULL) {
ret_id = tsk_site_table_add_row(&self->sites, site.position,
site.ancestral_state, site.ancestral_state_length, site.metadata,
site.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
site_map[site.id] = ret_id;
}
// the parents will be recomputed later
new_parent = TSK_NULL;
ret_id = tsk_mutation_table_add_row(&self->mutations, site_map[site.id],
node_map[mut.node], new_parent, mut.time, mut.derived_state,
mut.derived_state_length, mut.metadata, mut.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
i++;
}
}
/* TODO: Union of the Migrations Table. The only hindrance to performing the
* union operation on Migrations Tables is that tsk_table_collection_sort
* does not sort migrations by time, and instead throws an error. */
if (self->migrations.num_rows != 0 || other->migrations.num_rows != 0) {
ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
goto out;
}
// sorting, deduplicating, and computing parents
ret = tsk_table_collection_sort(self, 0, 0);
if (ret < 0) {
goto out;
}
ret = tsk_table_collection_deduplicate_sites(self, 0);
if (ret < 0) {
goto out;
}
// need to sort again since after deduplicating sites, mutations
// may not be sorted by time within sites
ret = tsk_table_collection_sort(self, 0, 0);
if (ret < 0) {
goto out;
}
ret = tsk_table_collection_build_index(self, 0);
if (ret < 0) {
goto out;
}
ret = tsk_table_collection_compute_mutation_parents(self, 0);
if (ret < 0) {
goto out;
}
out:
tsk_safe_free(node_map);
tsk_safe_free(individual_map);
tsk_safe_free(population_map);
tsk_safe_free(site_map);
return ret;
}
static int
cmp_edge_cl(const void *a, const void *b)
{
const tsk_edge_t *ia = (const tsk_edge_t *) a;
const tsk_edge_t *ib = (const tsk_edge_t *) b;
int ret = (ia->parent > ib->parent) - (ia->parent < ib->parent);
if (ret == 0) {
ret = (ia->child > ib->child) - (ia->child < ib->child);
if (ret == 0) {
ret = (ia->left > ib->left) - (ia->left < ib->left);
}
}
return ret;
}
/* Squash the edges in the specified array in place. The output edges will
* be sorted by (child_id, left).
*/
int TSK_WARN_UNUSED
tsk_squash_edges(tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output_edges)
{
int ret = 0;
tsk_size_t j, k, l;
if (num_edges < 2) {
*num_output_edges = num_edges;
return ret;
}
qsort(edges, (size_t) num_edges, sizeof(tsk_edge_t), cmp_edge_cl);
j = 0;
l = 0;
for (k = 1; k < num_edges; k++) {
if (edges[k - 1].metadata_length > 0) {
ret = tsk_trace_error(TSK_ERR_CANT_PROCESS_EDGES_WITH_METADATA);
goto out;
}
/* Check for overlapping edges. */
if (edges[k - 1].parent == edges[k].parent
&& edges[k - 1].child == edges[k].child
&& edges[k - 1].right > edges[k].left) {
ret = tsk_trace_error(TSK_ERR_BAD_EDGES_CONTRADICTORY_CHILDREN);
goto out;
}
/* Add squashed edge. */
if (edges[k - 1].parent != edges[k].parent || edges[k - 1].right != edges[k].left
|| edges[j].child != edges[k].child) {
edges[l].left = edges[j].left;
edges[l].right = edges[k - 1].right;
edges[l].parent = edges[j].parent;
edges[l].child = edges[j].child;
j = k;
l++;
}
}
edges[l].left = edges[j].left;
edges[l].right = edges[k - 1].right;
edges[l].parent = edges[j].parent;
edges[l].child = edges[j].child;
*num_output_edges = (tsk_size_t) l + 1;
out:
return ret;
}
================================================
FILE: c/tskit/tables.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
* Copyright (c) 2017-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* @file tables.h
* @brief Tskit Tables API.
*/
#ifndef TSK_TABLES_H
#define TSK_TABLES_H
#ifdef __cplusplus
extern "C" {
#endif
#include
#include
#include
#include
#include
/****************************************************************************/
/* Definitions for the basic objects */
/****************************************************************************/
/**
@brief A single individual defined by a row in the individual table.
@rst
See the :ref:`data model ` section for the definition of
an individual and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Bitwise flags. */
tsk_flags_t flags;
/** @brief Spatial location. The number of dimensions is defined by
* ``location_length``. */
const double *location;
/** @brief Number of spatial dimensions. */
tsk_size_t location_length;
/** @brief IDs of the parents. The number of parents given by ``parents_length``*/
tsk_id_t *parents;
/** @brief Number of parents. */
tsk_size_t parents_length;
/** @brief Metadata. */
const char *metadata;
/** @brief Size of the metadata in bytes. */
tsk_size_t metadata_length;
/** @brief An array of the nodes associated with this individual */
const tsk_id_t *nodes;
/** @brief The number of nodes associated with this individual*/
tsk_size_t nodes_length;
} tsk_individual_t;
/**
@brief A single node defined by a row in the node table.
@rst
See the :ref:`data model ` section for the definition of
a node and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Bitwise flags. */
tsk_flags_t flags;
/** @brief Time. */
double time;
/** @brief Population ID. */
tsk_id_t population;
/** @brief Individual ID. */
tsk_id_t individual;
/** @brief Metadata. */
const char *metadata;
/** @brief Size of the metadata in bytes. */
tsk_size_t metadata_length;
} tsk_node_t;
/**
@brief A single edge defined by a row in the edge table.
@rst
See the :ref:`data model ` section for the definition of
an edge and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Parent node ID. */
tsk_id_t parent;
/** @brief Child node ID. */
tsk_id_t child;
/** @brief Left coordinate. */
double left;
/** @brief Right coordinate. */
double right;
/** @brief Metadata. */
const char *metadata;
/** @brief Size of the metadata in bytes. */
tsk_size_t metadata_length;
} tsk_edge_t;
/**
@brief A single mutation defined by a row in the mutation table.
@rst
See the :ref:`data model ` section for the definition of
a mutation and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Site ID. */
tsk_id_t site;
/** @brief Node ID. */
tsk_id_t node;
/** @brief Parent mutation ID. */
tsk_id_t parent;
/** @brief Mutation time. */
double time;
/** @brief Derived state. */
const char *derived_state;
/** @brief Size of the derived state in bytes. */
tsk_size_t derived_state_length;
/** @brief Metadata. */
const char *metadata;
/** @brief Size of the metadata in bytes. */
tsk_size_t metadata_length;
/** @brief The ID of the edge that this mutation lies on, or TSK_NULL
if there is no corresponding edge.*/
tsk_id_t edge;
/** @brief Inherited state. */
const char *inherited_state;
/** @brief Size of the inherited state in bytes. */
tsk_size_t inherited_state_length;
} tsk_mutation_t;
/**
@brief A single site defined by a row in the site table.
@rst
See the :ref:`data model ` section for the definition of
a site and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Position coordinate. */
double position;
/** @brief Ancestral state. */
const char *ancestral_state;
/** @brief Ancestral state length in bytes. */
tsk_size_t ancestral_state_length;
/** @brief Metadata. */
const char *metadata;
/** @brief Metadata length in bytes. */
tsk_size_t metadata_length;
/** @brief An array of this site's mutations */
const tsk_mutation_t *mutations;
/** @brief The number of mutations at this site */
tsk_size_t mutations_length;
} tsk_site_t;
/**
@brief A single migration defined by a row in the migration table.
@rst
See the :ref:`data model ` section for the definition of
a migration and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Source population ID. */
tsk_id_t source;
/** @brief Destination population ID. */
tsk_id_t dest;
/** @brief Node ID. */
tsk_id_t node;
/** @brief Left coordinate. */
double left;
/** @brief Right coordinate. */
double right;
/** @brief Time. */
double time;
/** @brief Metadata. */
const char *metadata;
/** @brief Size of the metadata in bytes. */
tsk_size_t metadata_length;
} tsk_migration_t;
/**
@brief A single population defined by a row in the population table.
@rst
See the :ref:`data model ` section for the definition of
a population and its properties.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief Metadata. */
const char *metadata;
/** @brief Metadata length in bytes. */
tsk_size_t metadata_length;
} tsk_population_t;
/**
@brief A single provenance defined by a row in the provenance table.
@rst
See the :ref:`data model ` section for the definition of
a provenance object and its properties. See the :ref:`sec_provenance` section
for more information on how provenance records should be structured.
@endrst
*/
typedef struct {
/** @brief Non-negative ID value corresponding to table row. */
tsk_id_t id;
/** @brief The timestamp. */
const char *timestamp;
/** @brief The timestamp length in bytes. */
tsk_size_t timestamp_length;
/** @brief The record. */
const char *record;
/** @brief The record length in bytes. */
tsk_size_t record_length;
} tsk_provenance_t;
/****************************************************************************/
/* Table definitions */
/****************************************************************************/
/**
@brief The individual table.
@rst
See the individual :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
/** @brief The total length of the location column. */
tsk_size_t location_length;
tsk_size_t max_location_length;
tsk_size_t max_location_length_increment;
/** @brief The total length of the parent column. */
tsk_size_t parents_length;
tsk_size_t max_parents_length;
tsk_size_t max_parents_length_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The flags column. */
tsk_flags_t *flags;
/** @brief The location column. */
double *location;
/** @brief The location_offset column. */
tsk_size_t *location_offset;
/** @brief The parents column. */
tsk_id_t *parents;
/** @brief The parents_offset column. */
tsk_size_t *parents_offset;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
} tsk_individual_table_t;
/**
@brief The node table.
@rst
See the node :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The flags column. */
tsk_flags_t *flags;
/** @brief The time column. */
double *time;
/** @brief The population column. */
tsk_id_t *population;
/** @brief The individual column. */
tsk_id_t *individual;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
} tsk_node_table_t;
/**
@brief The edge table.
@rst
See the edge :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The left column. */
double *left;
/** @brief The right column. */
double *right;
/** @brief The parent column. */
tsk_id_t *parent;
/** @brief The child column. */
tsk_id_t *child;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
/** @brief Flags for this table */
tsk_flags_t options;
} tsk_edge_table_t;
/**
@brief The migration table.
@rst
See the migration :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The source column. */
tsk_id_t *source;
/** @brief The dest column. */
tsk_id_t *dest;
/** @brief The node column. */
tsk_id_t *node;
/** @brief The left column. */
double *left;
/** @brief The right column. */
double *right;
/** @brief The time column. */
double *time;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
} tsk_migration_table_t;
/**
@brief The site table.
@rst
See the site :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
tsk_size_t ancestral_state_length;
tsk_size_t max_ancestral_state_length;
tsk_size_t max_ancestral_state_length_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The position column. */
double *position;
/** @brief The ancestral_state column. */
char *ancestral_state;
/** @brief The ancestral_state_offset column. */
tsk_size_t *ancestral_state_offset;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
} tsk_site_table_t;
/**
@brief The mutation table.
@rst
See the mutation :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
tsk_size_t derived_state_length;
tsk_size_t max_derived_state_length;
tsk_size_t max_derived_state_length_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The node column. */
tsk_id_t *node;
/** @brief The site column. */
tsk_id_t *site;
/** @brief The parent column. */
tsk_id_t *parent;
/** @brief The time column. */
double *time;
/** @brief The derived_state column. */
char *derived_state;
/** @brief The derived_state_offset column. */
tsk_size_t *derived_state_offset;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
} tsk_mutation_table_t;
/**
@brief The population table.
@rst
See the population :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
/** @brief The total length of the metadata column. */
tsk_size_t metadata_length;
tsk_size_t max_metadata_length;
tsk_size_t max_metadata_length_increment;
tsk_size_t metadata_schema_length;
/** @brief The metadata column. */
char *metadata;
/** @brief The metadata_offset column. */
tsk_size_t *metadata_offset;
/** @brief The metadata schema */
char *metadata_schema;
} tsk_population_table_t;
/**
@brief The provenance table.
@rst
See the provenance :ref:`table definition ` for
details of the columns in this table.
@endrst
*/
typedef struct {
/** @brief The number of rows in this table. */
tsk_size_t num_rows;
tsk_size_t max_rows;
tsk_size_t max_rows_increment;
/** @brief The total length of the timestamp column. */
tsk_size_t timestamp_length;
tsk_size_t max_timestamp_length;
tsk_size_t max_timestamp_length_increment;
/** @brief The total length of the record column. */
tsk_size_t record_length;
tsk_size_t max_record_length;
tsk_size_t max_record_length_increment;
/** @brief The timestamp column. */
char *timestamp;
/** @brief The timestamp_offset column. */
tsk_size_t *timestamp_offset;
/** @brief The record column. */
char *record;
/** @brief The record_offset column. */
tsk_size_t *record_offset;
} tsk_provenance_table_t;
typedef struct {
char *data;
tsk_size_t data_length;
char *url;
tsk_size_t url_length;
char *metadata;
tsk_size_t metadata_length;
char *metadata_schema;
tsk_size_t metadata_schema_length;
} tsk_reference_sequence_t;
/**
@brief A collection of tables defining the data for a tree sequence.
*/
typedef struct {
/** @brief The sequence length defining the tree sequence's coordinate space */
double sequence_length;
char *file_uuid;
/** @brief The units of the time dimension */
char *time_units;
tsk_size_t time_units_length;
/** @brief The tree-sequence metadata */
char *metadata;
tsk_size_t metadata_length;
/** @brief The metadata schema */
char *metadata_schema;
tsk_size_t metadata_schema_length;
tsk_reference_sequence_t reference_sequence;
/** @brief The individual table */
tsk_individual_table_t individuals;
/** @brief The node table */
tsk_node_table_t nodes;
/** @brief The edge table */
tsk_edge_table_t edges;
/** @brief The migration table */
tsk_migration_table_t migrations;
/** @brief The site table */
tsk_site_table_t sites;
/** @brief The mutation table */
tsk_mutation_table_t mutations;
/** @brief The population table */
tsk_population_table_t populations;
/** @brief The provenance table */
tsk_provenance_table_t provenances;
struct {
tsk_id_t *edge_insertion_order;
tsk_id_t *edge_removal_order;
tsk_size_t num_edges;
} indexes;
} tsk_table_collection_t;
/**
@brief A bookmark recording the position of all the tables in a table collection.
*/
typedef struct {
/** @brief The position in the individual table. */
tsk_size_t individuals;
/** @brief The position in the node table. */
tsk_size_t nodes;
/** @brief The position in the edge table. */
tsk_size_t edges;
/** @brief The position in the migration table. */
tsk_size_t migrations;
/** @brief The position in the site table. */
tsk_size_t sites;
/** @brief The position in the mutation table. */
tsk_size_t mutations;
/** @brief The position in the population table. */
tsk_size_t populations;
/** @brief The position in the provenance table. */
tsk_size_t provenances;
} tsk_bookmark_t;
/**
@brief Low-level table sorting method.
*/
typedef struct _tsk_table_sorter_t {
/** @brief The input tables that are being sorted. */
tsk_table_collection_t *tables;
/** @brief The edge sorting function. If set to NULL, edges are not sorted. */
int (*sort_edges)(struct _tsk_table_sorter_t *self, tsk_size_t start);
/** @brief The mutation sorting function. */
int (*sort_mutations)(struct _tsk_table_sorter_t *self);
/** @brief The individual sorting function. */
int (*sort_individuals)(struct _tsk_table_sorter_t *self);
/** @brief An opaque pointer for use by client code */
void *user_data;
/** @brief Mapping from input site IDs to output site IDs */
tsk_id_t *site_id_map;
} tsk_table_sorter_t;
/* Structs for IBD finding.
* TODO: document properly
* */
/* Note for tskit developers: it's perhaps a bit confusing/pointless to
* have the tsk_identity_segment_t struct as well as the internal tsk_segment_t
* struct (which is identical). However, we may want to implement either
* segment type differently in future, and since the tsk_identity_segment_t
* is part of the public API we want to allow the freedom for the different
* structures to evolve over time */
typedef struct _tsk_identity_segment_t {
double left;
double right;
struct _tsk_identity_segment_t *next;
tsk_id_t node;
} tsk_identity_segment_t;
typedef struct {
tsk_size_t num_segments;
double total_span;
tsk_identity_segment_t *head;
tsk_identity_segment_t *tail;
} tsk_identity_segment_list_t;
typedef struct {
tsk_size_t num_nodes;
tsk_avl_tree_int_t pair_map;
tsk_size_t num_segments;
double total_span;
tsk_blkalloc_t heap;
bool store_segments;
bool store_pairs;
} tsk_identity_segments_t;
/* Diff iterator. */
typedef struct _tsk_edge_list_node_t {
tsk_edge_t edge;
struct _tsk_edge_list_node_t *next;
struct _tsk_edge_list_node_t *prev;
} tsk_edge_list_node_t;
typedef struct {
tsk_edge_list_node_t *head;
tsk_edge_list_node_t *tail;
} tsk_edge_list_t;
/****************************************************************************/
/* Common function options */
/****************************************************************************/
/**
@defgroup API_FLAGS_SIMPLIFY_GROUP :c:func:`tsk_table_collection_simplify` and
:c:func:`tsk_treeseq_simplify` specific flags.
@{
*/
/** Remove sites from the output if there are no mutations that reference them.*/
#define TSK_SIMPLIFY_FILTER_SITES (1 << 0)
/** Remove populations from the output if there are no nodes or migrations that
reference them. */
#define TSK_SIMPLIFY_FILTER_POPULATIONS (1 << 1)
/** Remove individuals from the output if there are no nodes that reference them.*/
#define TSK_SIMPLIFY_FILTER_INDIVIDUALS (1 << 2)
/** Do not remove nodes from the output if there are no edges that reference
them and do not reorder nodes so that the samples are nodes 0 to num_samples - 1.
Note that this flag is negated compared to other filtering options because
the default behaviour is to filter unreferenced nodes and reorder to put samples
first.
*/
#define TSK_SIMPLIFY_NO_FILTER_NODES (1 << 7)
/**
Do not update the sample status of nodes as a result of simplification.
*/
#define TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS (1 << 8)
/**
Reduce the topological information in the tables to the minimum necessary to
represent the trees that contain sites. If there are zero sites this will
result in an zero output edges. When the number of sites is greater than zero,
every tree in the output tree sequence will contain at least one site.
For a given site, the topology of the tree containing that site will be
identical (up to node ID remapping) to the topology of the corresponding tree
in the input.
*/
#define TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY (1 << 3)
/**
By default simplify removes unary nodes (i.e., nodes with exactly one child)
along the path from samples to root. If this option is specified such unary
nodes will be preserved in the output.
*/
#define TSK_SIMPLIFY_KEEP_UNARY (1 << 4)
/**
By default simplify removes all topology ancestral the MRCAs of the samples.
This option inserts edges from these MRCAs back to the roots of the input
trees.
*/
#define TSK_SIMPLIFY_KEEP_INPUT_ROOTS (1 << 5)
/**
@rst
This acts like :c:macro:`TSK_SIMPLIFY_KEEP_UNARY` (and is mutually exclusive with that
flag). It keeps unary nodes, but only if the unary node is referenced from an individual.
@endrst
*/
#define TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS (1 << 6)
/** @} */
/**
@defgroup API_FLAGS_SUBSET_GROUP :c:func:`tsk_table_collection_subset` specific flags.
@{
*/
/**If this flag is provided, the population table will not be changed in any way.*/
#define TSK_SUBSET_NO_CHANGE_POPULATIONS (1 << 0)
/**
@rst
If this flag is provided, then unreferenced sites, individuals, and populations
will not be removed. If so, the site and individual tables will not be changed,
and (unless :c:macro:`TSK_SUBSET_NO_CHANGE_POPULATIONS` is also provided) unreferenced
populations will be placed last, in their original order.
@endrst
*/
#define TSK_SUBSET_KEEP_UNREFERENCED (1 << 1)
/** @} */
/**
@defgroup API_FLAGS_CHECK_INTEGRITY_GROUP :c:func:`tsk_table_collection_check_integrity`
specific flags.
@{
*/
/** Check edge ordering constraints for a tree sequence. */
#define TSK_CHECK_EDGE_ORDERING (1 << 0)
/** Check that sites are in non-decreasing position order. */
#define TSK_CHECK_SITE_ORDERING (1 << 1)
/**Check for any duplicate site positions. */
#define TSK_CHECK_SITE_DUPLICATES (1 << 2)
/**
Check constraints on the ordering of mutations. Any non-null
mutation parents and known times are checked for ordering
constraints.
*/
#define TSK_CHECK_MUTATION_ORDERING (1 << 3)
/**Check individual parents are before children, where specified. */
#define TSK_CHECK_INDIVIDUAL_ORDERING (1 << 4)
/**Check migrations are ordered by time. */
#define TSK_CHECK_MIGRATION_ORDERING (1 << 5)
/**Check that the table indexes exist, and contain valid edge references. */
#define TSK_CHECK_INDEXES (1 << 6)
/**
All checks needed to define a valid tree sequence. Note that
this implies all of the above checks.
*/
#define TSK_CHECK_TREES (1 << 7)
/**
Check mutation parents are consistent with topology.
Implies TSK_CHECK_TREES.
*/
#define TSK_CHECK_MUTATION_PARENTS (1 << 8)
/* Leave room for more positive check flags */
/**
Do not check integrity of references to populations. This
can be safely combined with the other checks.
*/
#define TSK_NO_CHECK_POPULATION_REFS (1 << 12)
/** @} */
/**
@defgroup API_FLAGS_LOAD_INIT_GROUP Flags used by load and init methods.
@{
*/
/* These flags are for table collection load or init, or used as
flags on table collection or individual tables.
* As flags are passed though from load to init they share a namespace */
/** Skip reading tables, and only load top-level information. */
#define TSK_LOAD_SKIP_TABLES (1 << 0)
/** Do not load reference sequence. */
#define TSK_LOAD_SKIP_REFERENCE_SEQUENCE (1 << 1)
/**
@rst
Do not allocate space to store metadata in this table. Operations
attempting to add non-empty metadata to the table will fail
with error TSK_ERR_METADATA_DISABLED.
@endrst
*/
#define TSK_TABLE_NO_METADATA (1 << 2)
/**
@rst
Do not allocate space to store metadata in the edge table. Operations
attempting to add non-empty metadata to the edge table will fail
with error TSK_ERR_METADATA_DISABLED.
@endrst
*/
#define TSK_TC_NO_EDGE_METADATA (1 << 3)
/** @} */
/* Flags for dump tables */
/* We may not want to document this flag, but it's useful for testing
* so we put it high up in the bit space, below the common options */
#define TSK_DUMP_FORCE_OFFSET_64 (1 << 27)
/**
@defgroup API_FLAGS_COPY_GROUP Flags used by :c:func:`tsk_table_collection_copy`.
@{
*/
/** Copy the file uuid, by default this is not copied. */
#define TSK_COPY_FILE_UUID (1 << 0)
/** @} */
/**
@defgroup API_FLAGS_UNION_GROUP Flags used by :c:func:`tsk_table_collection_union`.
@{
*/
/**
By default, union checks that the portion of shared history between
``self`` and ``other``, as implied by ``other_node_mapping``, are indeed
equivalent. It does so by subsetting both ``self`` and ``other`` on the
equivalent nodes specified in ``other_node_mapping``, and then checking for
equality of the subsets.
*/
#define TSK_UNION_NO_CHECK_SHARED (1 << 0)
/**
By default, all nodes new to ``self`` are assigned new populations. If this
option is specified, nodes that are added to ``self`` will retain the
population IDs they have in ``other``.
*/
#define TSK_UNION_NO_ADD_POP (1 << 1)
/**
By default, union only adds edges adjacent to a newly added node;
this option adds all edges.
*/
#define TSK_UNION_ALL_EDGES (1 << 2)
/**
By default, union only adds only mutations on newly added edges, and
sites for those mutations; this option adds all mutations and all sites.
*/
#define TSK_UNION_ALL_MUTATIONS (1 << 3)
/** @} */
/**
@defgroup API_FLAGS_CMP_GROUP Flags used by :c:func:`tsk_table_collection_equals`.
@{
*/
/**
Do not include the top-level tree sequence metadata and metadata schemas
in the comparison.
*/
#define TSK_CMP_IGNORE_TS_METADATA (1 << 0)
/** Do not include the provenance table in comparison. */
#define TSK_CMP_IGNORE_PROVENANCE (1 << 1)
/**
@rst
Do not include metadata when comparing the table collections.
This includes both the top-level tree sequence metadata as well as the
metadata for each of the tables (i.e, :c:macro:`TSK_CMP_IGNORE_TS_METADATA` is implied).
All metadata schemas are also ignored.
@endrst
*/
#define TSK_CMP_IGNORE_METADATA (1 << 2)
/**
@rst
Do not include the timestamp information when comparing the provenance
tables. This has no effect if :c:macro:`TSK_CMP_IGNORE_PROVENANCE` is specified.
@endrst
*/
#define TSK_CMP_IGNORE_TIMESTAMPS (1 << 3)
/**
Do not include any tables in the comparison, thus comparing only the
top-level information of the table collections being compared.
*/
#define TSK_CMP_IGNORE_TABLES (1 << 4)
/** Do not include the reference sequence in the comparison. */
#define TSK_CMP_IGNORE_REFERENCE_SEQUENCE (1 << 5)
/** @} */
/**
@defgroup API_FLAGS_CLEAR_GROUP Flags used by :c:func:`tsk_table_collection_clear`.
@{
*/
/** Additionally clear the table metadata schemas*/
#define TSK_CLEAR_METADATA_SCHEMAS (1 << 0)
/** Additionally clear the tree-sequence metadata and schema*/
#define TSK_CLEAR_TS_METADATA_AND_SCHEMA (1 << 1)
/** Additionally clear the provenance table*/
#define TSK_CLEAR_PROVENANCE (1 << 2)
/** @} */
/* For the edge diff iterator */
#define TSK_INCLUDE_TERMINAL (1 << 0)
/** @brief Value returned by seeking methods when they have successfully
seeked to a non-null tree.
@ingroup TREE_API_SEEKING_GROUP
*/
#define TSK_TREE_OK 1
/****************************************************************************/
/* Function signatures */
/****************************************************************************/
/**
@defgroup INDIVIDUAL_TABLE_API_GROUP Individual table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_individual_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_init(tsk_individual_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_individual_table_t object.
@return Always returns 0.
*/
int tsk_individual_table_free(tsk_individual_table_t *self);
/**
@brief Adds a row to this individual table.
@rst
Add a new individual with the specified ``flags``, ``location``, ``parents`` and
``metadata`` to the table. Copies of the ``location``, ``parents`` and ``metadata``
parameters are taken immediately. See the :ref:`table definition
` for details of the columns in this table.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param flags The bitwise flags for the new individual.
@param location A pointer to a double array representing the spatial location
of the new individual. Can be ``NULL`` if ``location_length`` is 0.
@param location_length The number of dimensions in the locations position.
Note this the number of elements in the corresponding double array
not the number of bytes.
@param parents A pointer to a ``tsk_id`` array representing the parents
of the new individual. Can be ``NULL`` if ``parents_length`` is 0.
@param parents_length The number of parents.
Note this the number of elements in the corresponding ``tsk_id`` array
not the number of bytes.
@param metadata The metadata to be associated with the new individual. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added individual on success,
or a negative value on failure.
*/
tsk_id_t tsk_individual_table_add_row(tsk_individual_table_t *self, tsk_flags_t flags,
const double *location, tsk_size_t location_length, const tsk_id_t *parents,
tsk_size_t parents_length, const char *metadata, tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. Copies of the ``location``, ``parents`` and ``metadata``
parameters are taken immediately. See the :ref:`table definition
` for details of the columns in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param index The row to update.
@param flags The bitwise flags for the individual.
@param location A pointer to a double array representing the spatial location
of the new individual. Can be ``NULL`` if ``location_length`` is 0.
@param location_length The number of dimensions in the locations position.
Note this the number of elements in the corresponding double array
not the number of bytes.
@param parents A pointer to a ``tsk_id`` array representing the parents
of the new individual. Can be ``NULL`` if ``parents_length`` is 0.
@param parents_length The number of parents.
Note this the number of elements in the corresponding ``tsk_id`` array
not the number of bytes.
@param metadata The metadata to be associated with the new individual. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_update_row(tsk_individual_table_t *self, tsk_id_t index,
tsk_flags_t flags, const double *location, tsk_size_t location_length,
const tsk_id_t *parents, tsk_size_t parents_length, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_individual_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_clear(tsk_individual_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_individual_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_truncate(tsk_individual_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is
and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_individual_table_t object where rows are to be added.
@param other A pointer to a tsk_individual_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_extend(tsk_individual_table_t *self,
const tsk_individual_table_t *other, tsk_size_t num_rows,
const tsk_id_t *row_indexes, tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
The values in the ``parents`` column are updated according to this map, so that
reference integrity within the table is maintained. As a consequence of this,
the values in the ``parents`` column for kept rows are bounds-checked and an
error raised if they are not valid. Rows that are deleted are not checked for
parent ID integrity.
If an attempt is made to delete rows that are referred to by the ``parents``
column of rows that are retained, an error is raised.
These error conditions are checked before any alterations to the table are
made.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_keep_rows(tsk_individual_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param other A pointer to a tsk_individual_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_individual_table_equals(const tsk_individual_table_t *self,
const tsk_individual_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
Indexes that are present are also copied to the destination table.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param dest A pointer to a tsk_individual_table_t object. If the TSK_NO_INIT
option is specified, this must be an initialised individual table. If not, it must be an
uninitialised individual table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_copy(const tsk_individual_table_t *self,
tsk_individual_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified individual struct to reflect the values in the specified row.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_individual_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_get_row(
const tsk_individual_table_t *self, tsk_id_t index, tsk_individual_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_set_metadata_schema(tsk_individual_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_individual_table_t object.
@param out The stream to write the summary to.
*/
void tsk_individual_table_print_state(const tsk_individual_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param flags The array of tsk_flag_t flag values to be copied.
@param location The array of double location values to be copied.
@param location_offset The array of tsk_size_t location offset values to be copied.
@param parents The array of tsk_id_t parent values to be copied.
@param parents_offset The array of tsk_size_t parent offset values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_set_columns(tsk_individual_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *location, const tsk_size_t *location_offset,
const tsk_id_t *parents, const tsk_size_t *parents_offset, const char *metadata,
const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param num_rows The number of rows to copy from the specifed arrays
@param flags The array of tsk_flag_t flag values to be copied.
@param location The array of double location values to be copied.
@param location_offset The array of tsk_size_t location offset values to be copied.
@param parents The array of tsk_id_t parent values to be copied.
@param parents_offset The array of tsk_size_t parent offset values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_append_columns(tsk_individual_table_t *self,
tsk_size_t num_rows, const tsk_flags_t *flags, const double *location,
const tsk_size_t *location_offset, const tsk_id_t *parents,
const tsk_size_t *parents_offset, const char *metadata,
const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_set_max_rows_increment(
tsk_individual_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_set_max_metadata_length_increment(
tsk_individual_table_t *self, tsk_size_t max_metadata_length_increment);
/**
@brief Controls the pre-allocation strategy for the location column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param max_location_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_set_max_location_length_increment(
tsk_individual_table_t *self, tsk_size_t max_location_length_increment);
/**
@brief Controls the pre-allocation strategy for the parents column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param max_parents_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_individual_table_set_max_parents_length_increment(
tsk_individual_table_t *self, tsk_size_t max_parents_length_increment);
/** @} */
/* Undocumented methods */
int tsk_individual_table_dump_text(const tsk_individual_table_t *self, FILE *out);
/**
@defgroup NODE_TABLE_API_GROUP Node table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_node_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_init(tsk_node_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_node_table_t object.
@return Always returns 0.
*/
int tsk_node_table_free(tsk_node_table_t *self);
/**
@brief Adds a row to this node table.
@rst
Add a new node with the specified ``flags``, ``time``, ``population``,
``individual`` and ``metadata`` to the table. A copy of the ``metadata`` parameter
is taken immediately. See the :ref:`table definition `
for details of the columns in this table.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param flags The bitwise flags for the new node.
@param time The time for the new node.
@param population The population for the new node. Set to TSK_NULL if not
known.
@param individual The individual for the new node. Set to TSK_NULL if not
known.
@param metadata The metadata to be associated with the new node. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added node on success,
or a negative value on failure.
*/
tsk_id_t tsk_node_table_add_row(tsk_node_table_t *self, tsk_flags_t flags, double time,
tsk_id_t population, tsk_id_t individual, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. A copy of the ``metadata`` parameter is taken immediately. See the
:ref:`table definition ` for details of the columns
in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param index The row to update.
@param flags The bitwise flags for the node.
@param time The time for the node.
@param population The population for the node. Set to TSK_NULL if not known.
@param individual The individual for the node. Set to TSK_NULL if not known.
@param metadata The metadata to be associated with the node. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_update_row(tsk_node_table_t *self, tsk_id_t index, tsk_flags_t flags,
double time, tsk_id_t population, tsk_id_t individual, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_node_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_node_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_clear(tsk_node_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_node_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_truncate(tsk_node_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is
and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_node_table_t object where rows are to be added.
@param other A pointer to a tsk_node_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_extend(tsk_node_table_t *self, const tsk_node_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_keep_rows(tsk_node_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
@endrst
@param self A pointer to a tsk_node_table_t object.
@param other A pointer to a tsk_node_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_node_table_equals(
const tsk_node_table_t *self, const tsk_node_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the TSK_NO_INIT option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param dest A pointer to a tsk_node_table_t object. If the TSK_NO_INIT option
is specified, this must be an initialised node table. If not, it must
be an uninitialised node table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_copy(
const tsk_node_table_t *self, tsk_node_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified node struct to reflect the values in the specified row.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_node_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_get_row(
const tsk_node_table_t *self, tsk_id_t index, tsk_node_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_set_metadata_schema(tsk_node_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_node_table_t object.
@param out The stream to write the summary to.
*/
void tsk_node_table_print_state(const tsk_node_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param flags The array of tsk_flag_t values to be copied.
@param time The array of double time values to be copied.
@param population The array of tsk_id_t population values to be copied.
@param individual The array of tsk_id_t individual values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_set_columns(tsk_node_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *time, const tsk_id_t *population,
const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_node_table_t object.
@param num_rows The number of rows to copy from the specifed arrays
@param flags The array of tsk_flag_t values to be copied.
@param time The array of double time values to be copied.
@param population The array of tsk_id_t population values to be copied.
@param individual The array of tsk_id_t individual values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_append_columns(tsk_node_table_t *self, tsk_size_t num_rows,
const tsk_flags_t *flags, const double *time, const tsk_id_t *population,
const tsk_id_t *individual, const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_node_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_set_max_rows_increment(
tsk_node_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_node_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_node_table_set_max_metadata_length_increment(
tsk_node_table_t *self, tsk_size_t max_metadata_length_increment);
/** @} */
/* Undocumented methods */
int tsk_node_table_dump_text(const tsk_node_table_t *self, FILE *out);
/**
@defgroup EDGE_TABLE_API_GROUP Edge table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
**Options**
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_TABLE_NO_METADATA`
@endrst
@param self A pointer to an uninitialised tsk_edge_table_t object.
@param options Allocation time options.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_init(tsk_edge_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_edge_table_t object.
@return Always returns 0.
*/
int tsk_edge_table_free(tsk_edge_table_t *self);
/**
@brief Adds a row to this edge table.
@rst
Add a new edge with the specified ``left``, ``right``, ``parent``, ``child`` and
``metadata`` to the table. See the :ref:`table definition `
for details of the columns in this table.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param left The left coordinate for the new edge.
@param right The right coordinate for the new edge.
@param parent The parent node for the new edge.
@param child The child node for the new edge.
@param metadata The metadata to be associated with the new edge. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added edge on success,
or a negative value on failure.
*/
tsk_id_t tsk_edge_table_add_row(tsk_edge_table_t *self, double left, double right,
tsk_id_t parent, tsk_id_t child, const char *metadata, tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. A copy of the ``metadata`` parameter is taken immediately. See the
:ref:`table definition ` for details of the columns
in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param index The row to update.
@param left The left coordinate for the edge.
@param right The right coordinate for the edge.
@param parent The parent node for the edge.
@param child The child node for the edge.
@param metadata The metadata to be associated with the edge. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_update_row(tsk_edge_table_t *self, tsk_id_t index, double left,
double right, tsk_id_t parent, tsk_id_t child, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_edge_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_clear(tsk_edge_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_edge_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_truncate(tsk_edge_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is ``NULL``,
append the first ``num_rows`` from ``other`` to this table. Note that metadata is copied
as-is and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_edge_table_t object where rows are to be added.
@param other A pointer to a tsk_edge_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_extend(tsk_edge_table_t *self, const tsk_edge_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_keep_rows(tsk_edge_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param other A pointer to a tsk_edge_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_edge_table_equals(
const tsk_edge_table_t *self, const tsk_edge_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param dest A pointer to a tsk_edge_table_t object. If the TSK_NO_INIT option
is specified, this must be an initialised edge table. If not, it must
be an uninitialised edge table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_copy(
const tsk_edge_table_t *self, tsk_edge_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified edge struct to reflect the values in the specified row.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_edge_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_get_row(
const tsk_edge_table_t *self, tsk_id_t index, tsk_edge_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param metadata_schema A pointer to a char array
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_set_metadata_schema(tsk_edge_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_edge_table_t object.
@param out The stream to write the summary to.
*/
void tsk_edge_table_print_state(const tsk_edge_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param left The array of double left values to be copied.
@param right The array of double right values to be copied.
@param parent The array of tsk_id_t parent values to be copied.
@param child The array of tsk_id_t child values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_set_columns(tsk_edge_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *parent,
const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param left The array of double left values to be copied.
@param right The array of double right values to be copied.
@param parent The array of tsk_id_t parent values to be copied.
@param child The array of tsk_id_t child values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
*/
int tsk_edge_table_append_columns(tsk_edge_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *parent,
const tsk_id_t *child, const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_set_max_rows_increment(
tsk_edge_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_edge_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_set_max_metadata_length_increment(
tsk_edge_table_t *self, tsk_size_t max_metadata_length_increment);
/**
@brief Squash adjacent edges in-place
@rst
Sorts, then condenses the table into the smallest possible number of rows by
combining any adjacent edges. A pair of edges is said to be `adjacent` if
they have the same parent and child nodes, and if the left coordinate of
one of the edges is equal to the right coordinate of the other edge.
This process is performed in-place so that any set of adjacent edges is
replaced by a single edge. The new edge will have the same parent and child
node, a left coordinate equal to the smallest left coordinate in the set,
and a right coordinate equal to the largest right coordinate in the set.
The new edge table will be sorted in the canonical order (P, C, L, R).
.. note::
Note that this method will fail if any edges have non-empty metadata.
@endrst
@param self A pointer to a tsk_edge_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_edge_table_squash(tsk_edge_table_t *self);
/** @} */
/* Undocumented methods */
int tsk_edge_table_dump_text(const tsk_edge_table_t *self, FILE *out);
/**
@defgroup MIGRATION_TABLE_API_GROUP Migration table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_migration_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_init(tsk_migration_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_migration_table_t object.
@return Always returns 0.
*/
int tsk_migration_table_free(tsk_migration_table_t *self);
/**
@brief Adds a row to this migration table.
@rst
Add a new migration with the specified ``left``, ``right``, ``node``,
``source``, ``dest``, ``time`` and ``metadata`` to the table.
See the :ref:`table definition `
for details of the columns in this table.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param left The left coordinate for the new migration.
@param right The right coordinate for the new migration.
@param node The node ID for the new migration.
@param source The source population ID for the new migration.
@param dest The destination population ID for the new migration.
@param time The time for the new migration.
@param metadata The metadata to be associated with the new migration. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added migration on success,
or a negative value on failure.
*/
tsk_id_t tsk_migration_table_add_row(tsk_migration_table_t *self, double left,
double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest, double time,
const char *metadata, tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. A copy of the ``metadata`` parameter is taken immediately. See the
:ref:`table definition ` for details of the columns
in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param index The row to update.
@param left The left coordinate for the migration.
@param right The right coordinate for the migration.
@param node The node ID for the migration.
@param source The source population ID for the migration.
@param dest The destination population ID for the migration.
@param time The time for the migration.
@param metadata The metadata to be associated with the migration. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_update_row(tsk_migration_table_t *self, tsk_id_t index,
double left, double right, tsk_id_t node, tsk_id_t source, tsk_id_t dest,
double time, const char *metadata, tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_migration_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_clear(tsk_migration_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_migration_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_truncate(tsk_migration_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is
and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_migration_table_t object where rows are to be added.
@param other A pointer to a tsk_migration_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_extend(tsk_migration_table_t *self,
const tsk_migration_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes,
tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_keep_rows(tsk_migration_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param other A pointer to a tsk_migration_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_migration_table_equals(const tsk_migration_table_t *self,
const tsk_migration_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param dest A pointer to a tsk_migration_table_t object. If the TSK_NO_INIT
option is specified, this must be an initialised migration table. If not, it must be an
uninitialised migration table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_copy(
const tsk_migration_table_t *self, tsk_migration_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified migration struct to reflect the values in the specified row.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_migration_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_get_row(
const tsk_migration_table_t *self, tsk_id_t index, tsk_migration_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_set_metadata_schema(tsk_migration_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_migration_table_t object.
@param out The stream to write the summary to.
*/
void tsk_migration_table_print_state(const tsk_migration_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param left The array of double left values to be copied.
@param right The array of double right values to be copied.
@param node The array of tsk_id_t node values to be copied.
@param source The array of tsk_id_t source values to be copied.
@param dest The array of tsk_id_t dest values to be copied.
@param time The array of double time values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_set_columns(tsk_migration_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *node,
const tsk_id_t *source, const tsk_id_t *dest, const double *time,
const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param num_rows The number of rows to copy from the specifed arrays
@param left The array of double left values to be copied.
@param right The array of double right values to be copied.
@param node The array of tsk_id_t node values to be copied.
@param source The array of tsk_id_t source values to be copied.
@param dest The array of tsk_id_t dest values to be copied.
@param time The array of double time values to be copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_append_columns(tsk_migration_table_t *self, tsk_size_t num_rows,
const double *left, const double *right, const tsk_id_t *node,
const tsk_id_t *source, const tsk_id_t *dest, const double *time,
const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_set_max_rows_increment(
tsk_migration_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_migration_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_migration_table_set_max_metadata_length_increment(
tsk_migration_table_t *self, tsk_size_t max_metadata_length_increment);
/** @} */
/* Undocumented methods */
int tsk_migration_table_dump_text(const tsk_migration_table_t *self, FILE *out);
/**
@defgroup SITE_TABLE_API_GROUP Site table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_site_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_init(tsk_site_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_site_table_t object.
@return Always returns 0.
*/
int tsk_site_table_free(tsk_site_table_t *self);
/**
@brief Adds a row to this site table.
@rst
Add a new site with the specified ``position``, ``ancestral_state``
and ``metadata`` to the table. Copies of ``ancestral_state`` and ``metadata``
are immediately taken. See the :ref:`table definition `
for details of the columns in this table.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param position The position coordinate for the new site.
@param ancestral_state The ancestral_state for the new site.
@param ancestral_state_length The length of the ancestral_state in bytes.
@param metadata The metadata to be associated with the new site. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added site on success,
or a negative value on failure.
*/
tsk_id_t tsk_site_table_add_row(tsk_site_table_t *self, double position,
const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. Copies of the ``ancestral_state`` and ``metadata`` parameters are taken
immediately. See the :ref:`table definition ` for
details of the columns in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param index The row to update.
@param position The position coordinate for the site.
@param ancestral_state The ancestral_state for the site.
@param ancestral_state_length The length of the ancestral_state in bytes.
@param metadata The metadata to be associated with the site. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_update_row(tsk_site_table_t *self, tsk_id_t index, double position,
const char *ancestral_state, tsk_size_t ancestral_state_length, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_site_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_site_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_clear(tsk_site_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_site_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_truncate(tsk_site_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is
and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_site_table_t object where rows are to be added.
@param other A pointer to a tsk_site_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_extend(tsk_site_table_t *self, const tsk_site_table_t *other,
tsk_size_t num_rows, const tsk_id_t *row_indexes, tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_keep_rows(tsk_site_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
@endrst
@param self A pointer to a tsk_site_table_t object.
@param other A pointer to a tsk_site_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_site_table_equals(
const tsk_site_table_t *self, const tsk_site_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param dest A pointer to a tsk_site_table_t object. If the TSK_NO_INIT option
is specified, this must be an initialised site table. If not, it must
be an uninitialised site table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_copy(
const tsk_site_table_t *self, tsk_site_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified site struct to reflect the values in the specified row.
This function always sets the ``mutations`` and ``mutations_length``
fields in the parameter :c:struct:`tsk_site_t` to ``NULL`` and ``0`` respectively.
To get access to the mutations for a particular site, please use the
tree sequence method, :c:func:`tsk_treeseq_get_site`.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_site_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_get_row(
const tsk_site_table_t *self, tsk_id_t index, tsk_site_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_set_metadata_schema(tsk_site_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_site_table_t object.
@param out The stream to write the summary to.
*/
void tsk_site_table_print_state(const tsk_site_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param position The array of double position values to be copied.
@param ancestral_state The array of char ancestral state values to be copied.
@param ancestral_state_offset The array of tsk_size_t ancestral state offset values to be
copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_set_columns(tsk_site_table_t *self, tsk_size_t num_rows,
const double *position, const char *ancestral_state,
const tsk_size_t *ancestral_state_offset, const char *metadata,
const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_site_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param position The array of double position values to be copied.
@param ancestral_state The array of char ancestral state values to be copied.
@param ancestral_state_offset The array of tsk_size_t ancestral state offset values to be
copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_append_columns(tsk_site_table_t *self, tsk_size_t num_rows,
const double *position, const char *ancestral_state,
const tsk_size_t *ancestral_state_offset, const char *metadata,
const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_site_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_set_max_rows_increment(
tsk_site_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_site_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_set_max_metadata_length_increment(
tsk_site_table_t *self, tsk_size_t max_metadata_length_increment);
/**
@brief Controls the pre-allocation strategy for the ancestral_state column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_site_table_t object.
@param max_ancestral_state_length_increment The number of bytes to pre-allocate, or zero
for the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_site_table_set_max_ancestral_state_length_increment(
tsk_site_table_t *self, tsk_size_t max_ancestral_state_length_increment);
/** @} */
/* Undocumented methods */
int tsk_site_table_dump_text(const tsk_site_table_t *self, FILE *out);
/**
@defgroup MUTATION_TABLE_API_GROUP Mutation table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_mutation_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_init(tsk_mutation_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_mutation_table_t object.
@return Always returns 0.
*/
int tsk_mutation_table_free(tsk_mutation_table_t *self);
/**
@brief Adds a row to this mutation table.
@rst
Add a new mutation with the specified ``site``, ``parent``, ``derived_state``
and ``metadata`` to the table. Copies of ``derived_state`` and ``metadata``
are immediately taken. See the :ref:`table definition `
for details of the columns in this table.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param site The site ID for the new mutation.
@param node The ID of the node this mutation occurs over.
@param parent The ID of the parent mutation.
@param time The time of the mutation.
@param derived_state The derived_state for the new mutation.
@param derived_state_length The length of the derived_state in bytes.
@param metadata The metadata to be associated with the new mutation. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added mutation on success,
or a negative value on failure.
*/
tsk_id_t tsk_mutation_table_add_row(tsk_mutation_table_t *self, tsk_id_t site,
tsk_id_t node, tsk_id_t parent, double time, const char *derived_state,
tsk_size_t derived_state_length, const char *metadata, tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. Copies of the ``derived_state`` and ``metadata`` parameters are taken
immediately. See the :ref:`table definition ` for
details of the columns in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param index The row to update.
@param site The site ID for the mutation.
@param node The ID of the node this mutation occurs over.
@param parent The ID of the parent mutation.
@param time The time of the mutation.
@param derived_state The derived_state for the mutation.
@param derived_state_length The length of the derived_state in bytes.
@param metadata The metadata to be associated with the mutation. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_update_row(tsk_mutation_table_t *self, tsk_id_t index,
tsk_id_t site, tsk_id_t node, tsk_id_t parent, double time,
const char *derived_state, tsk_size_t derived_state_length, const char *metadata,
tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_mutation_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_clear(tsk_mutation_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_mutation_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_truncate(tsk_mutation_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is
and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_mutation_table_t object where rows are to be added.
@param other A pointer to a tsk_mutation_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_extend(tsk_mutation_table_t *self,
const tsk_mutation_table_t *other, tsk_size_t num_rows, const tsk_id_t *row_indexes,
tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
The values in the ``parent`` column are updated according to this map, so that
reference integrity within the table is maintained. As a consequence of this,
the values in the ``parent`` column for kept rows are bounds-checked and an
error raised if they are not valid. Rows that are deleted are not checked for
parent ID integrity.
If an attempt is made to delete rows that are referred to by the ``parent``
column of rows that are retained, an error is raised.
These error conditions are checked before any alterations to the table are
made.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_keep_rows(tsk_mutation_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param other A pointer to a tsk_mutation_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_mutation_table_equals(const tsk_mutation_table_t *self,
const tsk_mutation_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param dest A pointer to a tsk_mutation_table_t object. If the TSK_NO_INIT
option is specified, this must be an initialised mutation table. If not, it must be an
uninitialised mutation table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_copy(
const tsk_mutation_table_t *self, tsk_mutation_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified mutation struct to reflect the values in the specified row.
This function always sets the ``edge`` field in parameter
:c:struct:`tsk_mutation_t` to ``TSK_NULL``. To determine the ID of
the edge associated with a particular mutation, please use the
tree sequence method, :c:func:`tsk_treeseq_get_mutation`.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_mutation_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_get_row(
const tsk_mutation_table_t *self, tsk_id_t index, tsk_mutation_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_set_metadata_schema(tsk_mutation_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_mutation_table_t object.
@param out The stream to write the summary to.
*/
void tsk_mutation_table_print_state(const tsk_mutation_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param site The array of tsk_id_t site values to be copied.
@param node The array of tsk_id_t node values to be copied.
@param parent The array of tsk_id_t parent values to be copied.
@param time The array of double time values to be copied.
@param derived_state The array of char derived_state values to be copied.
@param derived_state_offset The array of tsk_size_t derived state offset values to be
copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_set_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,
const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,
const double *time, const char *derived_state,
const tsk_size_t *derived_state_offset, const char *metadata,
const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param site The array of tsk_id_t site values to be copied.
@param node The array of tsk_id_t node values to be copied.
@param parent The array of tsk_id_t parent values to be copied.
@param time The array of double time values to be copied.
@param derived_state The array of char derived_state values to be copied.
@param derived_state_offset The array of tsk_size_t derived state offset values to be
copied.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_append_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,
const tsk_id_t *site, const tsk_id_t *node, const tsk_id_t *parent,
const double *time, const char *derived_state,
const tsk_size_t *derived_state_offset, const char *metadata,
const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_set_max_rows_increment(
tsk_mutation_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_set_max_metadata_length_increment(
tsk_mutation_table_t *self, tsk_size_t max_metadata_length_increment);
/**
@brief Controls the pre-allocation strategy for the derived_state column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_mutation_table_t object.
@param max_derived_state_length_increment The number of bytes to pre-allocate, or zero
for the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_mutation_table_set_max_derived_state_length_increment(
tsk_mutation_table_t *self, tsk_size_t max_derived_state_length_increment);
/** @} */
/* Undocumented methods */
int tsk_mutation_table_dump_text(const tsk_mutation_table_t *self, FILE *out);
/**
@defgroup POPULATION_TABLE_API_GROUP Population table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_population_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_init(tsk_population_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_population_table_t object.
@return Always returns 0.
*/
int tsk_population_table_free(tsk_population_table_t *self);
/**
@brief Adds a row to this population table.
@rst
Add a new population with the specified ``metadata`` to the table. A copy of the
``metadata`` is immediately taken. See the :ref:`table definition
` for details of the columns in this table.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param metadata The metadata to be associated with the new population. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return the ID of the newly added population on success,
or a negative value on failure.
*/
tsk_id_t tsk_population_table_add_row(
tsk_population_table_t *self, const char *metadata, tsk_size_t metadata_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. A copy of the ``metadata`` parameter is taken immediately. See the
:ref:`table definition ` for details of the
columns in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param index The row to update.
@param metadata The metadata to be associated with the population. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``metadata_length`` is 0.
@param metadata_length The size of the metadata array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_update_row(tsk_population_table_t *self, tsk_id_t index,
const char *metadata, tsk_size_t metadata_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_population_table_free` to free the table's internal resources. Note that the
metadata schema is not cleared.
@endrst
@param self A pointer to a tsk_population_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_clear(tsk_population_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_population_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_truncate(tsk_population_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table. Note that metadata is copied as-is
and is not checked for compatibility with any existing schema on this table.
@endrst
@param self A pointer to a tsk_population_table_t object where rows are to be added.
@param other A pointer to a tsk_population_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_extend(tsk_population_table_t *self,
const tsk_population_table_t *other, tsk_size_t num_rows,
const tsk_id_t *row_indexes, tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_keep_rows(tsk_population_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns,
and their metadata schemas are byte-wise identical.
- :c:macro:`TSK_CMP_IGNORE_METADATA`
Do not include metadata in the comparison. Note that as metadata is the
only column in the population table, two population tables are considered
equal if they have the same number of rows if this flag is specified.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param other A pointer to a tsk_population_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_population_table_equals(const tsk_population_table_t *self,
const tsk_population_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param dest A pointer to a tsk_population_table_t object. If the TSK_NO_INIT
option is specified, this must be an initialised population table. If not, it must be an
uninitialised population table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_copy(const tsk_population_table_t *self,
tsk_population_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified population struct to reflect the values in the specified row.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_population_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_get_row(
const tsk_population_table_t *self, tsk_id_t index, tsk_population_t *row);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table, replacing any existing.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_set_metadata_schema(tsk_population_table_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_population_table_t object.
@param out The stream to write the summary to.
*/
void tsk_population_table_print_state(const tsk_population_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_set_columns(tsk_population_table_t *self, tsk_size_t num_rows,
const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_population_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param metadata The array of char metadata values to be copied.
@param metadata_offset The array of tsk_size_t metadata offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_append_columns(tsk_population_table_t *self,
tsk_size_t num_rows, const char *metadata, const tsk_size_t *metadata_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_population_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_set_max_rows_increment(
tsk_population_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the metadata column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_population_table_t object.
@param max_metadata_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_population_table_set_max_metadata_length_increment(
tsk_population_table_t *self, tsk_size_t max_metadata_length_increment);
/** @} */
/* Undocumented methods */
int tsk_population_table_dump_text(const tsk_population_table_t *self, FILE *out);
/**
@defgroup PROVENANCE_TABLE_API_GROUP Provenance table API.
@{
*/
/**
@brief Initialises the table by allocating the internal memory.
@rst
This must be called before any operations are performed on the table.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
@endrst
@param self A pointer to an uninitialised tsk_provenance_table_t object.
@param options Allocation time options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_init(tsk_provenance_table_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table.
@param self A pointer to an initialised tsk_provenance_table_t object.
@return Always returns 0.
*/
int tsk_provenance_table_free(tsk_provenance_table_t *self);
/**
@brief Adds a row to this provenance table.
@rst
Add a new provenance with the specified ``timestamp`` and ``record`` to the table.
Copies of the ``timestamp`` and ``record`` are immediately taken.
See the :ref:`table definition `
for details of the columns in this table.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param timestamp The timestamp to be associated with the new provenance. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``timestamp_length`` is 0.
@param timestamp_length The size of the timestamp array in bytes.
@param record The record to be associated with the new provenance. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``record_length`` is 0.
@param record_length The size of the record array in bytes.
@return Return the ID of the newly added provenance on success,
or a negative value on failure.
*/
tsk_id_t tsk_provenance_table_add_row(tsk_provenance_table_t *self,
const char *timestamp, tsk_size_t timestamp_length, const char *record,
tsk_size_t record_length);
/**
@brief Updates the row at the specified index.
@rst
Rewrite the row at the specified index in this table to use the specified
values. Copies of the ``timestamp`` and ``record`` parameters are taken
immediately. See the :ref:`table definition `
for details of the columns in this table.
.. warning::
Because of the way that ragged columns are encoded, this method requires a
full rewrite of the internal column memory in worst case, and would
therefore be inefficient for bulk updates for such columns. However, if the
sizes of all ragged column values are unchanged in the updated row, this
method is guaranteed to only update the memory for the row in question.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param index The row to update.
@param timestamp The timestamp to be associated with new provenance. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``timestamp_length`` is 0.
@param timestamp_length The size of the timestamp array in bytes.
@param record The record to be associated with the provenance. This
is a pointer to arbitrary memory. Can be ``NULL`` if ``record_length`` is 0.
@param record_length The size of the record array in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_update_row(tsk_provenance_table_t *self, tsk_id_t index,
const char *timestamp, tsk_size_t timestamp_length, const char *record,
tsk_size_t record_length);
/**
@brief Clears this table, setting the number of rows to zero.
@rst
No memory is freed as a result of this operation; please use
:c:func:`tsk_provenance_table_free` to free the table's internal resources.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_clear(tsk_provenance_table_t *self);
/**
@brief Truncates this table so that only the first num_rows are retained.
@param self A pointer to a tsk_provenance_table_t object.
@param num_rows The number of rows to retain in the table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_truncate(tsk_provenance_table_t *self, tsk_size_t num_rows);
/**
@brief Extends this table by appending rows copied from another table.
@rst
Appends the rows at the specified indexes from the table ``other`` to the end of this
table. Row indexes can be repeated and in any order. If ``row_indexes`` is NULL, append
the first ``num_rows`` from ``other`` to this table.
@endrst
@param self A pointer to a tsk_provenance_table_t object where rows are to be added.
@param other A pointer to a tsk_provenance_table_t object where rows are copied from.
@param num_rows The number of rows from ``other`` to append to this table.
@param row_indexes Array of row indexes in ``other``. If ``NULL`` is passed then the
first ``num_rows`` of ``other`` are used.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_extend(tsk_provenance_table_t *self,
const tsk_provenance_table_t *other, tsk_size_t num_rows,
const tsk_id_t *row_indexes, tsk_flags_t options);
/**
@brief Subset this table by keeping rows according to a boolean mask.
@rst
Deletes rows from this table and optionally return the mapping from IDs in
the current table to the updated table. Rows are kept or deleted according to
the specified boolean array ``keep`` such that for each row ``j`` if
``keep[j]`` is false (zero) the row is deleted, and otherwise the row is
retained. Thus, ``keep`` must be an array of at least ``num_rows``
:c:type:`bool` values.
If the ``id_map`` argument is non-null, this array will be updated to represent
the mapping between IDs before and after row deletion. For row ``j``,
``id_map[j]`` will contain the new ID for row ``j`` if it is retained, or
:c:macro:`TSK_NULL` if the row has been removed. Thus, ``id_map`` must be an
array of at least ``num_rows`` :c:type:`tsk_id_t` values.
.. warning::
C++ users need to be careful to specify the correct type when
passing in values for the ``keep`` array,
using ``std::vector`` and not ``std::vector``,
as the latter may not be correct size.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param keep Array of boolean flags describing whether a particular
row should be kept or not. Must be at least ``num_rows`` long.
@param options Bitwise option flags. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@param id_map An array in which to store the mapping between new
and old IDs. If NULL, this will be ignored.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_keep_rows(tsk_provenance_table_t *self, const tsk_bool_t *keep,
tsk_flags_t options, tsk_id_t *id_map);
/**
@brief Returns true if the data in the specified table is identical to the data
in this table.
@rst
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) tables are
considered equal if they are byte-wise identical in all columns.
- :c:macro:`TSK_CMP_IGNORE_TIMESTAMPS`
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param other A pointer to a tsk_provenance_table_t object.
@param options Bitwise comparison options.
@return Return true if the specified table is equal to this table.
*/
bool tsk_provenance_table_equals(const tsk_provenance_table_t *self,
const tsk_provenance_table_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table into the specified destination.
@rst
By default the method initialises the specified destination table. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param dest A pointer to a tsk_provenance_table_t object. If the TSK_NO_INIT
option is specified, this must be an initialised provenance table. If not, it must be an
uninitialised provenance table.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_copy(const tsk_provenance_table_t *self,
tsk_provenance_table_t *dest, tsk_flags_t options);
/**
@brief Get the row at the specified index.
@rst
Updates the specified provenance struct to reflect the values in the specified row.
Pointers to memory within this struct are handled by the table and should **not**
be freed by client code. These pointers are guaranteed to be valid until the
next operation that modifies the table (e.g., by adding a new row), but not afterwards.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param index The requested table row.
@param row A pointer to a tsk_provenance_t struct that is updated to reflect the
values in the specified row.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_get_row(
const tsk_provenance_table_t *self, tsk_id_t index, tsk_provenance_t *row);
/**
@brief Print out the state of this table to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_provenance_table_t object.
@param out The stream to write the summary to.
*/
void tsk_provenance_table_print_state(const tsk_provenance_table_t *self, FILE *out);
/**
@brief Replace this table's data by copying from a set of column arrays
@rst
Clears the data columns of this table and then copies column data from the specified
set of arrays. The supplied arrays should all contain data on the same number of rows.
The metadata schema is not affected.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param timestamp The array of char timestamp values to be copied.
@param timestamp_offset The array of tsk_size_t timestamp offset values to be copied.
@param record The array of char record values to be copied.
@param record_offset The array of tsk_size_t record offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_set_columns(tsk_provenance_table_t *self, tsk_size_t num_rows,
const char *timestamp, const tsk_size_t *timestamp_offset, const char *record,
const tsk_size_t *record_offset);
/**
@brief Extends this table by copying from a set of column arrays
@rst
Copies column data from the specified set of arrays to create new rows at the end of the
table. The supplied arrays should all contain data on the same number of rows. The
metadata schema is not affected.
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param num_rows The number of rows to copy from the specifed arrays.
@param timestamp The array of char timestamp values to be copied.
@param timestamp_offset The array of tsk_size_t timestamp offset values to be copied.
@param record The array of char record values to be copied.
@param record_offset The array of tsk_size_t record offset values to be copied.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_append_columns(tsk_provenance_table_t *self,
tsk_size_t num_rows, const char *timestamp, const tsk_size_t *timestamp_offset,
const char *record, const tsk_size_t *record_offset);
/**
@brief Controls the pre-allocation strategy for this table
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param max_rows_increment The number of rows to pre-allocate, or zero for the default
doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_set_max_rows_increment(
tsk_provenance_table_t *self, tsk_size_t max_rows_increment);
/**
@brief Controls the pre-allocation strategy for the timestamp column
@rst
Set a fixed pre-allocation size, or use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param max_timestamp_length_increment The number of bytes to pre-allocate, or zero for
the default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_set_max_timestamp_length_increment(
tsk_provenance_table_t *self, tsk_size_t max_timestamp_length_increment);
/**
@brief Controls the pre-allocation strategy for the record column
@rst
Set a fixed pre-allocation size, use the default doubling strategy.
See :ref:`sec_c_api_memory_allocation_strategy` for details on the default
pre-allocation strategy,
@endrst
@param self A pointer to a tsk_provenance_table_t object.
@param max_record_length_increment The number of bytes to pre-allocate, or zero for the
default doubling strategy.
@return Return 0 on success or a negative value on failure.
*/
int tsk_provenance_table_set_max_record_length_increment(
tsk_provenance_table_t *self, tsk_size_t max_record_length_increment);
/** @} */
/* Undocumented methods */
int tsk_provenance_table_dump_text(const tsk_provenance_table_t *self, FILE *out);
/****************************************************************************/
/* Table collection .*/
/****************************************************************************/
/**
@defgroup TABLE_COLLECTION_API_GROUP Table collection API.
@{
*/
/**
@brief Initialises the table collection by allocating the internal memory
and initialising all the constituent tables.
@rst
This must be called before any operations are performed on the table
collection. See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
**Options**
Options can be specified by providing bitwise flags:
- :c:macro:`TSK_TC_NO_EDGE_METADATA`
@endrst
@param self A pointer to an uninitialised tsk_table_collection_t object.
@param options Allocation time options as above.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_init(tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Free the internal memory for the specified table collection.
@param self A pointer to an initialised tsk_table_collection_t object.
@return Always returns 0.
*/
int tsk_table_collection_free(tsk_table_collection_t *self);
/**
@brief Clears data tables (and optionally provenances and metadata) in
this table collection.
@rst
By default this operation clears all tables except the provenance table, retaining
table metadata schemas and the tree-sequence level metadata and schema.
No memory is freed as a result of this operation; please use
:c:func:`tsk_table_collection_free` to free internal resources.
**Options**
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_CLEAR_PROVENANCE`
- :c:macro:`TSK_CLEAR_METADATA_SCHEMAS`
- :c:macro:`TSK_CLEAR_TS_METADATA_AND_SCHEMA`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise clearing options.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_clear(tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Returns true if the data in the specified table collection is equal
to the data in this table collection.
@rst
Returns true if the two table collections are equal. The indexes are
not considered as these are derived from the tables. We also do not
consider the ``file_uuid``, since it is a property of the file that set
of tables is stored in.
**Options**
Options to control the comparison can be specified by providing one or
more of the following bitwise flags. By default (options=0) two table
collections are considered equal if all of the tables are byte-wise
identical, and the sequence lengths, metadata and metadata schemas
of the two table collections are identical.
- :c:macro:`TSK_CMP_IGNORE_PROVENANCE`
- :c:macro:`TSK_CMP_IGNORE_METADATA`
- :c:macro:`TSK_CMP_IGNORE_TS_METADATA`
- :c:macro:`TSK_CMP_IGNORE_TIMESTAMPS`
- :c:macro:`TSK_CMP_IGNORE_TABLES`
- :c:macro:`TSK_CMP_IGNORE_REFERENCE_SEQUENCE`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param other A pointer to a tsk_table_collection_t object.
@param options Bitwise comparison options.
@return Return true if the specified table collection is equal to this table.
*/
bool tsk_table_collection_equals(const tsk_table_collection_t *self,
const tsk_table_collection_t *other, tsk_flags_t options);
/**
@brief Copies the state of this table collection into the specified destination.
@rst
By default the method initialises the specified destination table collection. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
**Options**
Options can be specified by providing bitwise flags:
:c:macro:`TSK_COPY_FILE_UUID`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param dest A pointer to a tsk_table_collection_t object. If the TSK_NO_INIT
option is specified, this must be an initialised table collection. If not, it must be an
uninitialised table collection.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_copy(const tsk_table_collection_t *self,
tsk_table_collection_t *dest, tsk_flags_t options);
/**
@brief Print out the state of this table collection to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_table_collection_t object.
@param out The stream to write the summary to.
*/
void tsk_table_collection_print_state(const tsk_table_collection_t *self, FILE *out);
/**
@brief Load a table collection from a file path.
@rst
Loads the data from the specified file into this table collection.
By default, the table collection is also initialised.
The resources allocated must be freed using
:c:func:`tsk_table_collection_free` even in error conditions.
If the :c:macro:`TSK_NO_INIT` option is set, the table collection is
not initialised, allowing an already initialised table collection to
be overwritten with the data from a file.
If the file contains multiple table collections, this function will load
the first. Please see the :c:func:`tsk_table_collection_loadf` for details
on how to sequentially load table collections from a stream.
If the :c:macro:`TSK_LOAD_SKIP_TABLES` option is set, only the non-table information from
the table collection will be read, leaving all tables with zero rows and no
metadata or schema.
If the :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE` option is set, the table collection is
read without loading the reference sequence.
**Options**
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_NO_INIT`
- :c:macro:`TSK_LOAD_SKIP_TABLES`
- :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE`
**Examples**
.. code-block:: c
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_load(&tables, "data.trees", 0);
if (ret != 0) {
fprintf(stderr, "Load error:%s\n", tsk_strerror(ret));
exit(EXIT_FAILURE);
}
@endrst
@param self A pointer to an uninitialised tsk_table_collection_t object
if the TSK_NO_INIT option is not set (default), or an initialised
tsk_table_collection_t otherwise.
@param filename A NULL terminated string containing the filename.
@param options Bitwise options. See above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_load(
tsk_table_collection_t *self, const char *filename, tsk_flags_t options);
/**
@brief Load a table collection from a stream.
@rst
Loads a tables definition from the specified file stream to this table
collection. By default, the table collection is also initialised.
The resources allocated must be freed using
:c:func:`tsk_table_collection_free` even in error conditions.
If the :c:macro:`TSK_NO_INIT` option is set, the table collection is
not initialised, allowing an already initialised table collection to
be overwritten with the data from a file.
The stream can be an arbitrary file descriptor, for example a network socket.
No seek operations are performed.
If the stream contains multiple table collection definitions, this function
will load the next table collection from the stream. If the stream contains no
more table collection definitions the error value :c:macro:`TSK_ERR_EOF` will
be returned. Note that EOF is only returned in the case where zero bytes are
read from the stream --- malformed files or other errors will result in
different error conditions. Please see the
:ref:`sec_c_api_examples_file_streaming` section for an example of how to
sequentially load tree sequences from a stream.
Please note that this streaming behaviour is not supported if the
:c:macro:`TSK_LOAD_SKIP_TABLES` or :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE` option is
set. If the :c:macro:`TSK_LOAD_SKIP_TABLES` option is set, only the non-table information
from the table collection will be read, leaving all tables with zero rows and no metadata
or schema. If the :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE` option is set, the table
collection is read without loading the reference sequence. When attempting to read from a
stream with multiple table collection definitions and either of these two options set,
the requested information from the first table collection will be read on the first call
to :c:func:`tsk_table_collection_loadf`, with subsequent calls leading to errors.
**Options**
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_NO_INIT`
- :c:macro:`TSK_LOAD_SKIP_TABLES`
- :c:macro:`TSK_LOAD_SKIP_REFERENCE_SEQUENCE`
@endrst
@param self A pointer to an uninitialised tsk_table_collection_t object
if the TSK_NO_INIT option is not set (default), or an initialised
tsk_table_collection_t otherwise.
@param file A FILE stream opened in an appropriate mode for reading (e.g.
"r", "r+" or "w+") positioned at the beginning of a table collection
definition.
@param options Bitwise options. See above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_loadf(
tsk_table_collection_t *self, FILE *file, tsk_flags_t options);
/**
@brief Write a table collection to file.
@rst
Writes the data from this table collection to the specified file.
If an error occurs the file path is deleted, ensuring that only complete
and well formed files will be written.
**Examples**
.. code-block:: c
int ret;
tsk_table_collection_t tables;
ret = tsk_table_collection_init(&tables, 0);
error_check(ret);
tables.sequence_length = 1.0;
// Write out the empty tree sequence
ret = tsk_table_collection_dump(&tables, "empty.trees", 0);
error_check(ret);
@endrst
@param self A pointer to an initialised tsk_table_collection_t object.
@param filename A NULL terminated string containing the filename.
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_dump(
const tsk_table_collection_t *self, const char *filename, tsk_flags_t options);
/**
@brief Write a table collection to a stream.
@rst
Writes the data from this table collection to the specified FILE stream.
Semantics are identical to :c:func:`tsk_table_collection_dump`.
Please see the :ref:`sec_c_api_examples_file_streaming` section for an example
of how to sequentially dump and load tree sequences from a stream.
@endrst
@param self A pointer to an initialised tsk_table_collection_t object.
@param file A FILE stream opened in an appropriate mode for writing (e.g.
"w", "a", "r+" or "w+").
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_dumpf(
const tsk_table_collection_t *self, FILE *file, tsk_flags_t options);
/**
@brief Record the number of rows in each table in the specified tsk_bookmark_t object.
@param self A pointer to an initialised tsk_table_collection_t object.
@param bookmark A pointer to a tsk_bookmark_t which is updated to contain the number of
rows in all tables.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_record_num_rows(
const tsk_table_collection_t *self, tsk_bookmark_t *bookmark);
/**
@brief Truncates the tables in this table collection according to the specified bookmark.
@rst
Truncate the tables in this collection so that each one has the number
of rows specified in the parameter :c:type:`tsk_bookmark_t`. Use the
:c:func:`tsk_table_collection_record_num_rows` function to record the
number rows for each table in a table collection at a particular time.
@endrst
@param self A pointer to a tsk_individual_table_t object.
@param bookmark The number of rows to retain in each table.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_truncate(
tsk_table_collection_t *self, tsk_bookmark_t *bookmark);
/**
@brief Sorts the tables in this collection.
@rst
Some of the tables in a table collection must satisfy specific sortedness requirements
in order to define a :ref:`valid tree sequence `.
This method sorts the ``edge``, ``site``, ``mutation`` and ``individual`` tables such
that these requirements are guaranteed to be fulfilled. The ``node``, ``population``
and ``provenance`` tables do not have any sortedness requirements, and are therefore
ignored by this method.
.. note:: The current implementation **may** sort in such a way that exceeds
these requirements, but this behaviour should not be relied upon and later
versions may weaken the level of sortedness. However, the method does **guarantee**
that the resulting tables describes a valid tree sequence.
.. warning:: Sorting migrations is currently not supported and an error will be raised
if a table collection containing a non-empty migration table is specified.
The specified :c:type:`tsk_bookmark_t` allows us to specify a start position
for sorting in each of the tables; rows before this value are assumed to already be
in sorted order and this information is used to make sorting more efficient.
Positions in tables that are not sorted (``node``, ``population``
and ``provenance``) are ignored and can be set to arbitrary values.
.. warning:: The current implementation only supports specifying a start
position for the ``edge`` table and in a limited form for the
``site``, ``mutation`` and ``individual`` tables. Specifying a non-zero
``migration``, start position results in an error. The start positions for the
``site``, ``mutation`` and ``individual`` tables can either be 0 or the length of the
respective tables, allowing these tables to either be fully sorted, or not sorted at
all.
The table collection will always be unindexed after sort successfully completes.
For more control over the sorting process, see the :ref:`sec_c_api_low_level_sorting`
section.
**Options**
Options can be specified by providing one or more of the following bitwise
flags:
:c:macro:`TSK_NO_CHECK_INTEGRITY`
Do not run integrity checks using
:c:func:`tsk_table_collection_check_integrity` before sorting,
potentially leading to a small reduction in execution time. This
performance optimisation should not be used unless the calling code can
guarantee reference integrity within the table collection. References
to rows not in the table or bad offsets will result in undefined
behaviour.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param start The position to begin sorting in each table; all rows less than this
position must fulfill the tree sequence sortedness requirements. If this is
NULL, sort all rows.
@param options Sort options.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_sort(
tsk_table_collection_t *self, const tsk_bookmark_t *start, tsk_flags_t options);
/**
@brief Sorts the individual table in this collection.
@rst
Sorts the individual table in place, so that parents come before children,
and the parent column is remapped as required. Node references to individuals
are also updated.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Sort options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_individual_topological_sort(
tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Puts the tables into canonical form.
@rst
Put tables into canonical form such that randomly reshuffled tables
are guaranteed to always be sorted in the same order, and redundant
information is removed. The canonical sorting exceeds the usual
tree sequence sortedness requirements.
**Options**:
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_SUBSET_KEEP_UNREFERENCED`
@endrst
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_canonicalise(tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Simplify the tables to remove redundant information.
@rst
Simplification transforms the tables to remove redundancy and canonicalise
tree sequence data. See the :ref:`simplification ` tutorial for
more details.
A mapping from the node IDs in the table before simplification to their equivalent
values after simplification can be obtained via the ``node_map`` argument. If this
is non NULL, ``node_map[u]`` will contain the new ID for node ``u`` after simplification,
or :c:macro:`TSK_NULL` if the node has been removed. Thus, ``node_map`` must be an array
of at least ``self->nodes.num_rows`` :c:type:`tsk_id_t` values.
If the `TSK_SIMPLIFY_NO_FILTER_NODES` option is specified, the node table will be
unaltered except for changing the sample status of nodes (but see the
`TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS` option below) and to update references
to other tables that may have changed as a result of filtering (see below).
The ``node_map`` (if specified) will always be the identity mapping, such that
``node_map[u] == u`` for all nodes. Note also that the order of the list of
samples is not important in this case.
When a table is not filtered (i.e., if the `TSK_SIMPLIFY_NO_FILTER_NODES`
option is provided or the `TSK_SIMPLIFY_FILTER_SITES`,
`TSK_SIMPLIFY_FILTER_POPULATIONS` or `TSK_SIMPLIFY_FILTER_INDIVIDUALS`
options are *not* provided) the corresponding table is modified as
little as possible, and all pointers are guaranteed to remain valid
after simplification. The only changes made to an unfiltered table are
to update any references to tables that may have changed (for example,
remapping population IDs in the node table if
`TSK_SIMPLIFY_FILTER_POPULATIONS` was specified) or altering the
sample status flag of nodes.
.. note:: It is possible for populations and individuals to be filtered
even if `TSK_SIMPLIFY_NO_FILTER_NODES` is specified because there
may be entirely unreferenced entities in the input tables, which
are not affected by whether we filter nodes or not.
By default, the node sample flags are updated by unsetting the
:c:macro:`TSK_NODE_IS_SAMPLE` flag for all nodes and subsequently setting it
for the nodes provided as input to this function. The
`TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS` option will prevent this from occuring,
making it the responsibility of calling code to keep track of the ultimate
sample status of nodes. Using this option in conjunction with
`TSK_SIMPLIFY_NO_FILTER_NODES` (and without the
`TSK_SIMPLIFY_FILTER_POPULATIONS` and `TSK_SIMPLIFY_FILTER_INDIVIDUALS`
options) guarantees that the node table will not be written to during the
lifetime of this function.
The table collection will always be unindexed after simplify successfully completes.
.. note:: Migrations are currently not supported by simplify, and an error will
be raised if we attempt call simplify on a table collection with greater
than zero migrations. See ``_
**Options**:
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_SIMPLIFY_FILTER_SITES`
- :c:macro:`TSK_SIMPLIFY_FILTER_POPULATIONS`
- :c:macro:`TSK_SIMPLIFY_FILTER_INDIVIDUALS`
- :c:macro:`TSK_SIMPLIFY_NO_FILTER_NODES`
- :c:macro:`TSK_SIMPLIFY_NO_UPDATE_SAMPLE_FLAGS`
- :c:macro:`TSK_SIMPLIFY_REDUCE_TO_SITE_TOPOLOGY`
- :c:macro:`TSK_SIMPLIFY_KEEP_UNARY`
- :c:macro:`TSK_SIMPLIFY_KEEP_INPUT_ROOTS`
- :c:macro:`TSK_SIMPLIFY_KEEP_UNARY_IN_INDIVIDUALS`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param samples Either NULL or an array of num_samples distinct and valid node IDs.
If non-null the nodes in this array will be marked as samples in the output.
If NULL, the num_samples parameter is ignored and the samples in the output
will be the same as the samples in the input. This is equivalent to populating
the samples array with all of the sample nodes in the input in increasing
order of ID.
@param num_samples The number of node IDs in the input samples array. Ignored
if the samples array is NULL.
@param options Simplify options; see above for the available bitwise flags.
For the default behaviour, a value of 0 should be provided.
@param node_map If not NULL, this array will be filled to define the mapping
between nodes IDs in the table collection before and after simplification.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_simplify(tsk_table_collection_t *self, const tsk_id_t *samples,
tsk_size_t num_samples, tsk_flags_t options, tsk_id_t *node_map);
/**
@brief Subsets and reorders a table collection according to an array of nodes.
@rst
Reduces the table collection to contain only the entries referring to
the provided list of nodes, with nodes reordered according to the order
they appear in the ``nodes`` argument. Specifically, this subsets and reorders
each of the tables as follows (but see options, below):
1. Nodes: if in the list of nodes, and in the order provided.
2. Individuals: if referred to by a retained node.
3. Populations: if referred to by a retained node, and in the order first seen
when traversing the list of retained nodes.
4. Edges: if both parent and child are retained nodes.
5. Mutations: if the mutation's node is a retained node.
6. Sites: if any mutations remain at the site after removing mutations.
Retained individuals, edges, mutations, and sites appear in the same
order as in the original tables. Note that only the information *directly*
associated with the provided nodes is retained - for instance,
subsetting to nodes=[A, B] does not retain nodes ancestral to A and B,
and only retains the individuals A and B are in, and not their parents.
This function does *not* require the tables to be sorted.
.. note:: Migrations are currently not supported by subset, and an error will
be raised if we attempt call subset on a table collection with greater
than zero migrations.
**Options**:
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_SUBSET_NO_CHANGE_POPULATIONS`
- :c:macro:`TSK_SUBSET_KEEP_UNREFERENCED`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param nodes An array of num_nodes valid node IDs.
@param num_nodes The number of node IDs in the input nodes array.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_subset(tsk_table_collection_t *self, const tsk_id_t *nodes,
tsk_size_t num_nodes, tsk_flags_t options);
/**
@brief Forms the node-wise union of two table collections.
@rst
Expands this table collection by adding the non-shared portions of another table
collection to itself. The ``other_node_mapping`` encodes which nodes in ``other`` are
equivalent to a node in ``self``. The positions in the ``other_node_mapping`` array
correspond to node ids in ``other``, and the elements encode the equivalent
node id in ``self`` or :c:macro:`TSK_NULL` if the node is exclusive to ``other``. Nodes
that are exclusive ``other`` are added to ``self``, along with:
1. Individuals which are new to ``self``.
2. Edges whose parent or child are new to ``self``.
3. Sites which were not present in ``self``.
4. Mutations whose nodes are new to ``self``.
By default, populations of newly added nodes are assumed to be new populations,
and added to the population table as well.
The behavior can be changed by the flags ``TSK_UNION_ALL_EDGES`` and
``TSK_UNION_ALL_MUTATIONS``, which will (respectively) add *all* edges
or *all* sites and mutations instead.
This operation will also sort the resulting tables, so the tables may change
even if nothing new is added, if the original tables were not sorted.
.. note:: Migrations are currently not supported by union, and an error will
be raised if we attempt call union on a table collection with migrations.
**Options**:
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_UNION_NO_CHECK_SHARED`
- :c:macro:`TSK_UNION_NO_ADD_POP`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param other A pointer to a tsk_table_collection_t object.
@param other_node_mapping An array of node IDs that relate nodes in other to nodes in
self: the k-th element of other_node_mapping should be the index of the equivalent
node in self, or TSK_NULL if the node is not present in self (in which case it
will be added to self).
@param options Union options; see above for the available bitwise flags.
For the default behaviour, a value of 0 should be provided.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_union(tsk_table_collection_t *self,
const tsk_table_collection_t *other, const tsk_id_t *other_node_mapping,
tsk_flags_t options);
/**
@brief Set the time_units
@rst
Copies the time_units string to this table collection, replacing any existing.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param time_units A pointer to a char array.
@param time_units_length The size of the time units string in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_set_time_units(
tsk_table_collection_t *self, const char *time_units, tsk_size_t time_units_length);
/**
@brief Set the metadata
@rst
Copies the metadata string to this table collection, replacing any existing.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param metadata A pointer to a char array.
@param metadata_length The size of the metadata in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_set_metadata(
tsk_table_collection_t *self, const char *metadata, tsk_size_t metadata_length);
/**
@brief Set the metadata schema
@rst
Copies the metadata schema string to this table collection, replacing any existing.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param metadata_schema A pointer to a char array.
@param metadata_schema_length The size of the metadata schema in bytes.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_set_metadata_schema(tsk_table_collection_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
/**
@brief Returns true if this table collection is indexed.
@rst
This method returns true if the table collection has an index
for the edge table. It guarantees that the index exists, and that
it is for the same number of edges that are in the edge table. It
does *not* guarantee that the index is valid (i.e., if the rows
in the edge have been permuted in some way since the index was built).
See the :ref:`sec_c_api_table_indexes` section for details on the index
life-cycle.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return true if there is an index present for this table collection.
*/
bool tsk_table_collection_has_index(
const tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Deletes the indexes for this table collection.
@rst
Unconditionally drop the indexes that may be present for this table collection. It
is not an error to call this method on an unindexed table collection.
See the :ref:`sec_c_api_table_indexes` section for details on the index
life-cycle.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Always returns 0.
*/
int tsk_table_collection_drop_index(tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Builds indexes for this table collection.
@rst
Builds the tree traversal :ref:`indexes ` for this table
collection. Any existing index is first dropped using
:c:func:`tsk_table_collection_drop_index`. See the
:ref:`sec_c_api_table_indexes` section for details on the index life-cycle.
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_collection_build_index(tsk_table_collection_t *self, tsk_flags_t options);
/**
@brief Runs integrity checks on this table collection.
@rst
Checks the integrity of this table collection. The default checks (i.e., with
options = 0) guarantee the integrity of memory and entity references within the
table collection. All positions along the genome are checked
to see if they are finite values and within the required bounds. Time values
are checked to see if they are finite or marked as unknown.
Consistency of the direction of inheritance is also checked: whether
parents are more recent than children, mutations are not more recent
than their nodes or their mutation parents, etcetera.
To check if a set of tables fulfills the :ref:`requirements
` needed for a valid tree sequence, use
the :c:macro:`TSK_CHECK_TREES` option. When this method is called with
:c:macro:`TSK_CHECK_TREES`, the number of trees in the tree sequence is returned. Thus,
to check for errors client code should verify that the return value is less than zero.
All other options will return zero on success and a negative value on failure.
More fine-grained checks can be achieved using bitwise combinations of the
other options.
**Options**:
Options can be specified by providing one or more of the following bitwise
flags:
- :c:macro:`TSK_CHECK_EDGE_ORDERING`
- :c:macro:`TSK_CHECK_SITE_ORDERING`
- :c:macro:`TSK_CHECK_SITE_DUPLICATES`
- :c:macro:`TSK_CHECK_MUTATION_ORDERING`
- :c:macro:`TSK_CHECK_INDIVIDUAL_ORDERING`
- :c:macro:`TSK_CHECK_MIGRATION_ORDERING`
- :c:macro:`TSK_CHECK_INDEXES`
- :c:macro:`TSK_CHECK_TREES`
- :c:macro:`TSK_NO_CHECK_POPULATION_REFS`
@endrst
@param self A pointer to a tsk_table_collection_t object.
@param options Bitwise options.
@return Return a negative error value on if any problems are detected
in the tree sequence. If the TSK_CHECK_TREES option is provided,
the number of trees in the tree sequence will be returned, on
success.
*/
tsk_id_t tsk_table_collection_check_integrity(
const tsk_table_collection_t *self, tsk_flags_t options);
/** @} */
/* Undocumented methods */
/* Flags for ibd_segments */
#define TSK_IBD_STORE_PAIRS (1 << 0)
#define TSK_IBD_STORE_SEGMENTS (1 << 1)
/* TODO be systematic about where "result" should be in the params
* list, different here and in link_ancestors. */
/* FIXME the order of num_samples and samples needs to be reversed in within.
* This should be done as part of documenting, I guess. */
int tsk_table_collection_ibd_within(const tsk_table_collection_t *self,
tsk_identity_segments_t *result, const tsk_id_t *samples, tsk_size_t num_samples,
double min_span, double max_time, tsk_flags_t options);
int tsk_table_collection_ibd_between(const tsk_table_collection_t *self,
tsk_identity_segments_t *result, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, double min_span,
double max_time, tsk_flags_t options);
int tsk_table_collection_link_ancestors(tsk_table_collection_t *self, tsk_id_t *samples,
tsk_size_t num_samples, tsk_id_t *ancestors, tsk_size_t num_ancestors,
tsk_flags_t options, tsk_edge_table_t *result);
int tsk_table_collection_deduplicate_sites(
tsk_table_collection_t *tables, tsk_flags_t options);
int tsk_table_collection_compute_mutation_parents(
tsk_table_collection_t *self, tsk_flags_t options);
int tsk_table_collection_compute_mutation_times(
tsk_table_collection_t *self, double *random, tsk_flags_t options);
int tsk_table_collection_delete_older(
tsk_table_collection_t *self, double time, tsk_flags_t options);
int tsk_table_collection_set_indexes(tsk_table_collection_t *self,
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order);
int tsk_table_collection_takeset_metadata(
tsk_table_collection_t *self, char *metadata, tsk_size_t metadata_length);
int tsk_table_collection_takeset_indexes(tsk_table_collection_t *self,
tsk_id_t *edge_insertion_order, tsk_id_t *edge_removal_order);
int tsk_individual_table_takeset_columns(tsk_individual_table_t *self,
tsk_size_t num_rows, tsk_flags_t *flags, double *location,
tsk_size_t *location_offset, tsk_id_t *parents, tsk_size_t *parents_offset,
char *metadata, tsk_size_t *metadata_offset);
int tsk_node_table_takeset_columns(tsk_node_table_t *self, tsk_size_t num_rows,
tsk_flags_t *flags, double *time, tsk_id_t *population, tsk_id_t *individual,
char *metadata, tsk_size_t *metadata_offset);
int tsk_edge_table_takeset_columns(tsk_edge_table_t *self, tsk_size_t num_rows,
double *left, double *right, tsk_id_t *parent, tsk_id_t *child, char *metadata,
tsk_size_t *metadata_offset);
int tsk_migration_table_takeset_columns(tsk_migration_table_t *self, tsk_size_t num_rows,
double *left, double *right, tsk_id_t *node, tsk_id_t *source, tsk_id_t *dest,
double *time, char *metadata, tsk_size_t *metadata_offset);
int tsk_site_table_takeset_columns(tsk_site_table_t *self, tsk_size_t num_rows,
double *position, char *ancestral_state, tsk_size_t *ancestral_state_offset,
char *metadata, tsk_size_t *metadata_offset);
int tsk_mutation_table_takeset_columns(tsk_mutation_table_t *self, tsk_size_t num_rows,
tsk_id_t *site, tsk_id_t *node, tsk_id_t *parent, double *time, char *derived_state,
tsk_size_t *derived_state_offset, char *metadata, tsk_size_t *metadata_offset);
int tsk_population_table_takeset_columns(tsk_population_table_t *self,
tsk_size_t num_rows, char *metadata, tsk_size_t *metadata_offset);
int tsk_provenance_table_takeset_columns(tsk_provenance_table_t *self,
tsk_size_t num_rows, char *timestamp, tsk_size_t *timestamp_offset, char *record,
tsk_size_t *record_offset);
bool tsk_table_collection_has_reference_sequence(const tsk_table_collection_t *self);
int tsk_reference_sequence_init(tsk_reference_sequence_t *self, tsk_flags_t options);
int tsk_reference_sequence_free(tsk_reference_sequence_t *self);
bool tsk_reference_sequence_is_null(const tsk_reference_sequence_t *self);
bool tsk_reference_sequence_equals(const tsk_reference_sequence_t *self,
const tsk_reference_sequence_t *other, tsk_flags_t options);
int tsk_reference_sequence_copy(const tsk_reference_sequence_t *self,
tsk_reference_sequence_t *dest, tsk_flags_t options);
int tsk_reference_sequence_set_data(
tsk_reference_sequence_t *self, const char *data, tsk_size_t data_length);
int tsk_reference_sequence_set_url(
tsk_reference_sequence_t *self, const char *url, tsk_size_t url_length);
int tsk_reference_sequence_set_metadata(
tsk_reference_sequence_t *self, const char *metadata, tsk_size_t metadata_length);
int tsk_reference_sequence_set_metadata_schema(tsk_reference_sequence_t *self,
const char *metadata_schema, tsk_size_t metadata_schema_length);
int tsk_reference_sequence_takeset_data(
tsk_reference_sequence_t *self, char *data, tsk_size_t data_length);
int tsk_reference_sequence_takeset_metadata(
tsk_reference_sequence_t *self, char *metadata, tsk_size_t metadata_length);
/**
@defgroup TABLE_SORTER_API_GROUP Low-level table sorter API.
@{
*/
/* NOTE: We use the "struct _tsk_table_sorter_t" form here
* rather then the usual tsk_table_sorter_t alias because
* of problems with Doxygen. This was the only way I could
* get it to work - ideally, we'd use the usual typedefs
* to avoid confusing people.
*/
/**
@brief Initialises the memory for the sorter object.
@rst
This must be called before any operations are performed on the
table sorter and initialises all fields. The ``edge_sort`` function
is set to the default method using qsort. The ``user_data``
field is set to NULL.
This method supports the same options as
:c:func:`tsk_table_collection_sort`.
@endrst
@param self A pointer to an uninitialised tsk_table_sorter_t object.
@param tables The table collection to sort.
@param options Sorting options.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_sorter_init(struct _tsk_table_sorter_t *self,
tsk_table_collection_t *tables, tsk_flags_t options);
/**
@brief Runs the sort using the configured functions.
@rst
Runs the sorting process:
1. Drop the table indexes.
2. If the ``sort_edges`` function pointer is not NULL, run it. The
first parameter to the called function will be a pointer to this
table_sorter_t object. The second parameter will be the value
``start.edges``. This specifies the offset at which sorting should
start in the edge table. This offset is guaranteed to be within the
bounds of the edge table.
3. Sort the site table, building the mapping between site IDs in the
current and sorted tables.
4. Sort the mutation table, using the ``sort_mutations`` pointer.
If an error occurs during the execution of a user-supplied
sorting function a non-zero value must be returned. This value
will then be returned by ``tsk_table_sorter_run``. The error
return value should be chosen to avoid conflicts with tskit error
codes.
See :c:func:`tsk_table_collection_sort` for details on the ``start`` parameter.
@endrst
@param self A pointer to a tsk_table_sorter_t object.
@param start The position in the tables at which sorting starts.
@return Return 0 on success or a negative value on failure.
*/
int tsk_table_sorter_run(struct _tsk_table_sorter_t *self, const tsk_bookmark_t *start);
/**
@brief Free the internal memory for the specified table sorter.
@param self A pointer to an initialised tsk_table_sorter_t object.
@return Always returns 0.
*/
int tsk_table_sorter_free(struct _tsk_table_sorter_t *self);
/** @} */
int tsk_squash_edges(
tsk_edge_t *edges, tsk_size_t num_edges, tsk_size_t *num_output_edges);
/* IBD segments API. This is experimental and the interface may change. */
tsk_size_t tsk_identity_segments_get_num_segments(const tsk_identity_segments_t *self);
double tsk_identity_segments_get_total_span(const tsk_identity_segments_t *self);
tsk_size_t tsk_identity_segments_get_num_pairs(const tsk_identity_segments_t *self);
int tsk_identity_segments_get_keys(
const tsk_identity_segments_t *result, tsk_id_t *pairs);
int tsk_identity_segments_get_items(const tsk_identity_segments_t *self, tsk_id_t *pairs,
tsk_identity_segment_list_t **lists);
int tsk_identity_segments_get(const tsk_identity_segments_t *self, tsk_id_t a,
tsk_id_t b, tsk_identity_segment_list_t **ret_list);
void tsk_identity_segments_print_state(tsk_identity_segments_t *self, FILE *out);
int tsk_identity_segments_free(tsk_identity_segments_t *self);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit/trees.c
================================================
/*
* MIT License
*
* Copyright (c) 2019-2025 Tskit Developers
* Copyright (c) 2015-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
#include
#include
#include
#include
#include
#include
#include
#include
static inline bool
is_discrete(double x)
{
return trunc(x) == x;
}
/* ======================================================== *
* tree sequence
* ======================================================== */
static void
tsk_treeseq_check_state(const tsk_treeseq_t *self)
{
tsk_size_t j;
tsk_size_t k, l;
tsk_site_t site;
tsk_id_t site_id = 0;
for (j = 0; j < self->num_trees; j++) {
for (k = 0; k < self->tree_sites_length[j]; k++) {
site = self->tree_sites[j][k];
tsk_bug_assert(site.id == site_id);
site_id++;
for (l = 0; l < site.mutations_length; l++) {
tsk_bug_assert(site.mutations[l].site == site.id);
}
}
}
}
void
tsk_treeseq_print_state(const tsk_treeseq_t *self, FILE *out)
{
tsk_size_t j;
tsk_size_t k, l, m;
tsk_site_t site;
fprintf(out, "tree_sequence state\n");
fprintf(out, "num_trees = %lld\n", (long long) self->num_trees);
fprintf(out, "samples = (%lld)\n", (long long) self->num_samples);
for (j = 0; j < self->num_samples; j++) {
fprintf(out, "\t%lld\n", (long long) self->samples[j]);
}
tsk_table_collection_print_state(self->tables, out);
fprintf(out, "tree_sites = \n");
for (j = 0; j < self->num_trees; j++) {
fprintf(out, "tree %lld\t%lld sites\n", (long long) j,
(long long) self->tree_sites_length[j]);
for (k = 0; k < self->tree_sites_length[j]; k++) {
site = self->tree_sites[j][k];
fprintf(out, "\tsite %lld pos = %f ancestral state = ", (long long) site.id,
site.position);
for (l = 0; l < site.ancestral_state_length; l++) {
fprintf(out, "%c", site.ancestral_state[l]);
}
fprintf(out, " %lld mutations\n", (long long) site.mutations_length);
for (l = 0; l < site.mutations_length; l++) {
fprintf(out, "\t\tmutation %lld node = %lld derived_state = ",
(long long) site.mutations[l].id,
(long long) site.mutations[l].node);
for (m = 0; m < site.mutations[l].derived_state_length; m++) {
fprintf(out, "%c", site.mutations[l].derived_state[m]);
}
fprintf(out, "\n");
}
}
}
tsk_treeseq_check_state(self);
}
int
tsk_treeseq_free(tsk_treeseq_t *self)
{
if (self->tables != NULL) {
tsk_table_collection_free(self->tables);
}
tsk_safe_free(self->tables);
tsk_safe_free(self->samples);
tsk_safe_free(self->sample_index_map);
tsk_safe_free(self->breakpoints);
tsk_safe_free(self->tree_sites);
tsk_safe_free(self->tree_sites_length);
tsk_safe_free(self->tree_sites_mem);
tsk_safe_free(self->site_mutations_mem);
tsk_safe_free(self->site_mutations_length);
tsk_safe_free(self->site_mutations);
tsk_safe_free(self->individual_nodes_mem);
tsk_safe_free(self->individual_nodes_length);
tsk_safe_free(self->individual_nodes);
return 0;
}
static int
tsk_treeseq_init_sites(tsk_treeseq_t *self)
{
tsk_id_t j, k;
int ret = 0;
tsk_size_t offset = 0;
const tsk_size_t num_mutations = self->tables->mutations.num_rows;
const tsk_size_t num_sites = self->tables->sites.num_rows;
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const double *restrict site_position = self->tables->sites.position;
bool discrete_sites = true;
tsk_mutation_t *mutation;
self->site_mutations_mem
= tsk_malloc(num_mutations * sizeof(*self->site_mutations_mem));
self->site_mutations_length
= tsk_malloc(num_sites * sizeof(*self->site_mutations_length));
self->site_mutations = tsk_malloc(num_sites * sizeof(*self->site_mutations));
self->tree_sites_mem = tsk_malloc(num_sites * sizeof(*self->tree_sites_mem));
if (self->site_mutations_mem == NULL || self->site_mutations_length == NULL
|| self->site_mutations == NULL || self->tree_sites_mem == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (k = 0; k < (tsk_id_t) num_mutations; k++) {
mutation = self->site_mutations_mem + k;
ret = tsk_treeseq_get_mutation(self, k, mutation);
if (ret != 0) {
goto out;
}
}
k = 0;
for (j = 0; j < (tsk_id_t) num_sites; j++) {
discrete_sites = discrete_sites && is_discrete(site_position[j]);
self->site_mutations[j] = self->site_mutations_mem + offset;
self->site_mutations_length[j] = 0;
/* Go through all mutations for this site */
while (k < (tsk_id_t) num_mutations && mutation_site[k] == j) {
self->site_mutations_length[j]++;
offset++;
k++;
}
ret = tsk_treeseq_get_site(self, j, self->tree_sites_mem + j);
if (ret != 0) {
goto out;
}
}
self->discrete_genome = self->discrete_genome && discrete_sites;
out:
return ret;
}
static int
tsk_treeseq_init_individuals(tsk_treeseq_t *self)
{
int ret = 0;
tsk_id_t node;
tsk_id_t ind;
tsk_size_t offset = 0;
tsk_size_t total_node_refs = 0;
tsk_size_t *node_count = NULL;
tsk_id_t *node_array;
const tsk_size_t num_inds = self->tables->individuals.num_rows;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t *restrict node_individual = self->tables->nodes.individual;
// First find number of nodes per individual
self->individual_nodes_length
= tsk_calloc(TSK_MAX(1, num_inds), sizeof(*self->individual_nodes_length));
node_count = tsk_calloc(TSK_MAX(1, num_inds), sizeof(*node_count));
if (self->individual_nodes_length == NULL || node_count == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (node = 0; node < (tsk_id_t) num_nodes; node++) {
ind = node_individual[node];
if (ind != TSK_NULL) {
self->individual_nodes_length[ind]++;
total_node_refs++;
}
}
self->individual_nodes_mem
= tsk_malloc(TSK_MAX(1, total_node_refs) * sizeof(tsk_node_t));
self->individual_nodes = tsk_malloc(TSK_MAX(1, num_inds) * sizeof(tsk_node_t *));
if (self->individual_nodes_mem == NULL || self->individual_nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* Now fill in the node IDs */
for (ind = 0; ind < (tsk_id_t) num_inds; ind++) {
self->individual_nodes[ind] = self->individual_nodes_mem + offset;
offset += self->individual_nodes_length[ind];
}
for (node = 0; node < (tsk_id_t) num_nodes; node++) {
ind = node_individual[node];
if (ind != TSK_NULL) {
node_array = self->individual_nodes[ind];
tsk_bug_assert(node_array - self->individual_nodes_mem
< (tsk_id_t) (total_node_refs - node_count[ind]));
node_array[node_count[ind]] = node;
node_count[ind] += 1;
}
}
out:
tsk_safe_free(node_count);
return ret;
}
/* Initialises memory associated with the trees.
*/
static int
tsk_treeseq_init_trees(tsk_treeseq_t *self)
{
int ret = TSK_ERR_GENERIC;
tsk_size_t j, k, tree_index;
tsk_id_t site_id, edge_id, mutation_id;
double tree_left, tree_right;
const double sequence_length = self->tables->sequence_length;
const tsk_id_t num_sites = (tsk_id_t) self->tables->sites.num_rows;
const tsk_id_t num_mutations = (tsk_id_t) self->tables->mutations.num_rows;
const tsk_size_t num_edges = self->tables->edges.num_rows;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const double *restrict site_position = self->tables->sites.position;
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const tsk_id_t *restrict mutation_parent = self->tables->mutations.parent;
const char *restrict sites_ancestral_state = self->tables->sites.ancestral_state;
const tsk_size_t *restrict sites_ancestral_state_offset
= self->tables->sites.ancestral_state_offset;
const char *restrict mutations_derived_state = self->tables->mutations.derived_state;
const tsk_size_t *restrict mutations_derived_state_offset
= self->tables->mutations.derived_state_offset;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_right = self->tables->edges.right;
const double *restrict edge_left = self->tables->edges.left;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
tsk_size_t num_trees_alloc = self->num_trees + 1;
bool discrete_breakpoints = true;
tsk_id_t *node_edge_map = tsk_malloc(num_nodes * sizeof(*node_edge_map));
tsk_mutation_t *mutation;
tsk_id_t parent_id;
self->tree_sites_length
= tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length));
self->tree_sites = tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites));
self->breakpoints = tsk_malloc(num_trees_alloc * sizeof(*self->breakpoints));
if (node_edge_map == NULL || self->tree_sites == NULL
|| self->tree_sites_length == NULL || self->breakpoints == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(
self->tree_sites_length, 0, self->num_trees * sizeof(*self->tree_sites_length));
tsk_memset(self->tree_sites, 0, self->num_trees * sizeof(*self->tree_sites));
tsk_memset(node_edge_map, TSK_NULL, num_nodes * sizeof(*node_edge_map));
tree_left = 0;
tree_right = sequence_length;
tree_index = 0;
site_id = 0;
mutation_id = 0;
j = 0;
k = 0;
while (j < num_edges || tree_left < sequence_length) {
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_left);
self->breakpoints[tree_index] = tree_left;
while (k < num_edges && edge_right[O[k]] == tree_left) {
edge_id = O[k];
node_edge_map[edge_child[edge_id]] = TSK_NULL;
k++;
}
while (j < num_edges && edge_left[I[j]] == tree_left) {
edge_id = I[j];
node_edge_map[edge_child[edge_id]] = edge_id;
j++;
}
tree_right = sequence_length;
if (j < num_edges) {
tree_right = TSK_MIN(tree_right, edge_left[I[j]]);
}
if (k < num_edges) {
tree_right = TSK_MIN(tree_right, edge_right[O[k]]);
}
self->tree_sites[tree_index] = self->tree_sites_mem + site_id;
while (site_id < num_sites && site_position[site_id] < tree_right) {
self->tree_sites_length[tree_index]++;
while (
mutation_id < num_mutations && mutation_site[mutation_id] == site_id) {
mutation = self->site_mutations_mem + mutation_id;
mutation->edge = node_edge_map[mutation->node];
/* Compute inherited state */
if (mutation_parent[mutation_id] == TSK_NULL) {
/* No parent: inherited state is the site's ancestral state */
mutation->inherited_state
= sites_ancestral_state + sites_ancestral_state_offset[site_id];
mutation->inherited_state_length
= sites_ancestral_state_offset[site_id + 1]
- sites_ancestral_state_offset[site_id];
} else {
/* Has parent: inherited state is parent's derived state */
parent_id = mutation_parent[mutation_id];
mutation->inherited_state
= mutations_derived_state
+ mutations_derived_state_offset[parent_id];
mutation->inherited_state_length
= mutations_derived_state_offset[parent_id + 1]
- mutations_derived_state_offset[parent_id];
}
mutation_id++;
}
site_id++;
}
tree_left = tree_right;
tree_index++;
}
tsk_bug_assert(site_id == num_sites);
tsk_bug_assert(tree_index == self->num_trees);
self->breakpoints[tree_index] = tree_right;
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_right);
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
ret = 0;
out:
tsk_safe_free(node_edge_map);
return ret;
}
static void
tsk_treeseq_init_migrations(tsk_treeseq_t *self)
{
tsk_size_t j;
tsk_size_t num_migrations = self->tables->migrations.num_rows;
const double *restrict left = self->tables->migrations.left;
const double *restrict right = self->tables->migrations.right;
const double *restrict time = self->tables->migrations.time;
bool discrete_breakpoints = true;
bool discrete_times = true;
for (j = 0; j < num_migrations; j++) {
discrete_breakpoints
= discrete_breakpoints && is_discrete(left[j]) && is_discrete(right[j]);
discrete_times
= discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));
}
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
self->discrete_time = self->discrete_time && discrete_times;
}
static void
tsk_treeseq_init_mutations(tsk_treeseq_t *self)
{
tsk_size_t j;
tsk_size_t num_mutations = self->tables->mutations.num_rows;
const double *restrict time = self->tables->mutations.time;
bool discrete_times = true;
for (j = 0; j < num_mutations; j++) {
discrete_times
= discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));
}
self->discrete_time = self->discrete_time && discrete_times;
for (j = 0; j < num_mutations; j++) {
if (!tsk_is_unknown_time(time[j])) {
self->min_time = TSK_MIN(self->min_time, time[j]);
self->max_time = TSK_MAX(self->max_time, time[j]);
}
}
}
static int
tsk_treeseq_init_nodes(tsk_treeseq_t *self)
{
tsk_size_t j, k;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_flags_t *restrict node_flags = self->tables->nodes.flags;
const double *restrict time = self->tables->nodes.time;
int ret = 0;
bool discrete_times = true;
/* Determine the sample size */
self->num_samples = 0;
for (j = 0; j < num_nodes; j++) {
if (!!(node_flags[j] & TSK_NODE_IS_SAMPLE)) {
self->num_samples++;
}
}
/* TODO raise an error if < 2 samples?? */
self->samples = tsk_malloc(self->num_samples * sizeof(tsk_id_t));
self->sample_index_map = tsk_malloc(num_nodes * sizeof(tsk_id_t));
if (self->samples == NULL || self->sample_index_map == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
k = 0;
for (j = 0; j < num_nodes; j++) {
self->sample_index_map[j] = -1;
if (!!(node_flags[j] & TSK_NODE_IS_SAMPLE)) {
self->samples[k] = (tsk_id_t) j;
self->sample_index_map[j] = (tsk_id_t) k;
k++;
}
}
tsk_bug_assert(k == self->num_samples);
for (j = 0; j < num_nodes; j++) {
discrete_times
= discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));
}
self->discrete_time = self->discrete_time && discrete_times;
for (j = 0; j < num_nodes; j++) {
if (!tsk_is_unknown_time(time[j])) {
self->min_time = TSK_MIN(self->min_time, time[j]);
self->max_time = TSK_MAX(self->max_time, time[j]);
}
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_init(
tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options)
{
int ret = 0;
tsk_id_t num_trees;
tsk_memset(self, 0, sizeof(*self));
if (options & TSK_TAKE_OWNERSHIP) {
self->tables = tables;
if (tables->edges.options & TSK_TABLE_NO_METADATA) {
ret = tsk_trace_error(TSK_ERR_CANT_TAKE_OWNERSHIP_NO_EDGE_METADATA);
goto out;
}
} else {
self->tables = tsk_malloc(sizeof(*self->tables));
if (self->tables == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* Note that this copy reinstates metadata for a table collection with
* TSK_TC_NO_EDGE_METADATA. Otherwise a table without metadata would
* crash tsk_diff_iter_next. */
ret = tsk_table_collection_copy(tables, self->tables, TSK_COPY_FILE_UUID);
if (ret != 0) {
goto out;
}
}
if (options & TSK_TS_INIT_BUILD_INDEXES) {
ret = tsk_table_collection_build_index(self->tables, 0);
if (ret != 0) {
goto out;
}
}
if (options & TSK_TS_INIT_COMPUTE_MUTATION_PARENTS) {
/* As tsk_table_collection_compute_mutation_parents performs an
integrity check, and we don't wish to do that twice we perform
our own check here */
num_trees = tsk_table_collection_check_integrity(self->tables, TSK_CHECK_TREES);
if (num_trees < 0) {
ret = (int) num_trees;
goto out;
}
ret = tsk_table_collection_compute_mutation_parents(
self->tables, TSK_NO_CHECK_INTEGRITY);
if (ret != 0) {
goto out;
}
} else {
num_trees = tsk_table_collection_check_integrity(
self->tables, TSK_CHECK_TREES | TSK_CHECK_MUTATION_PARENTS);
if (num_trees < 0) {
ret = (int) num_trees;
goto out;
}
}
self->num_trees = (tsk_size_t) num_trees;
self->discrete_genome = true;
self->discrete_time = true;
self->min_time = INFINITY;
self->max_time = -INFINITY;
ret = tsk_treeseq_init_nodes(self);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init_sites(self);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init_individuals(self);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init_trees(self);
if (ret != 0) {
goto out;
}
tsk_treeseq_init_migrations(self);
tsk_treeseq_init_mutations(self);
if (tsk_treeseq_get_time_units_length(self) == strlen(TSK_TIME_UNITS_UNCALIBRATED)
&& !strncmp(tsk_treeseq_get_time_units(self), TSK_TIME_UNITS_UNCALIBRATED,
strlen(TSK_TIME_UNITS_UNCALIBRATED))) {
self->time_uncalibrated = true;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_copy_tables(
const tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options)
{
return tsk_table_collection_copy(self->tables, tables, options);
}
int TSK_WARN_UNUSED
tsk_treeseq_load(tsk_treeseq_t *self, const char *filename, tsk_flags_t options)
{
int ret = 0;
tsk_table_collection_t *tables = malloc(sizeof(*tables));
/* Need to make sure that we're zero'd out in case of error */
tsk_memset(self, 0, sizeof(*self));
if (tables == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_table_collection_load(tables, filename, options);
if (ret != 0) {
tsk_table_collection_free(tables);
tsk_safe_free(tables);
goto out;
}
/* TSK_TAKE_OWNERSHIP takes immediate ownership of the tables, regardless
* of error conditions. */
ret = tsk_treeseq_init(self, tables, TSK_TAKE_OWNERSHIP);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_loadf(tsk_treeseq_t *self, FILE *file, tsk_flags_t options)
{
int ret = 0;
tsk_table_collection_t *tables = malloc(sizeof(*tables));
/* Need to make sure that we're zero'd out in case of error */
tsk_memset(self, 0, sizeof(*self));
if (tables == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_table_collection_loadf(tables, file, options);
if (ret != 0) {
tsk_table_collection_free(tables);
tsk_safe_free(tables);
goto out;
}
/* TSK_TAKE_OWNERSHIP takes immediate ownership of the tables, regardless
* of error conditions. */
ret = tsk_treeseq_init(self, tables, TSK_TAKE_OWNERSHIP);
if (ret != 0) {
goto out;
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_dump(const tsk_treeseq_t *self, const char *filename, tsk_flags_t options)
{
return tsk_table_collection_dump(self->tables, filename, options);
}
int TSK_WARN_UNUSED
tsk_treeseq_dumpf(const tsk_treeseq_t *self, FILE *file, tsk_flags_t options)
{
return tsk_table_collection_dumpf(self->tables, file, options);
}
/* Simple attribute getters */
const char *
tsk_treeseq_get_metadata(const tsk_treeseq_t *self)
{
return self->tables->metadata;
}
tsk_size_t
tsk_treeseq_get_metadata_length(const tsk_treeseq_t *self)
{
return self->tables->metadata_length;
}
const char *
tsk_treeseq_get_metadata_schema(const tsk_treeseq_t *self)
{
return self->tables->metadata_schema;
}
tsk_size_t
tsk_treeseq_get_metadata_schema_length(const tsk_treeseq_t *self)
{
return self->tables->metadata_schema_length;
}
const char *
tsk_treeseq_get_time_units(const tsk_treeseq_t *self)
{
return self->tables->time_units;
}
tsk_size_t
tsk_treeseq_get_time_units_length(const tsk_treeseq_t *self)
{
return self->tables->time_units_length;
}
double
tsk_treeseq_get_sequence_length(const tsk_treeseq_t *self)
{
return self->tables->sequence_length;
}
const char *
tsk_treeseq_get_file_uuid(const tsk_treeseq_t *self)
{
return self->tables->file_uuid;
}
tsk_size_t
tsk_treeseq_get_num_samples(const tsk_treeseq_t *self)
{
return self->num_samples;
}
tsk_size_t
tsk_treeseq_get_num_nodes(const tsk_treeseq_t *self)
{
return self->tables->nodes.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_edges(const tsk_treeseq_t *self)
{
return self->tables->edges.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_migrations(const tsk_treeseq_t *self)
{
return self->tables->migrations.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_sites(const tsk_treeseq_t *self)
{
return self->tables->sites.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_mutations(const tsk_treeseq_t *self)
{
return self->tables->mutations.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_populations(const tsk_treeseq_t *self)
{
return self->tables->populations.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_individuals(const tsk_treeseq_t *self)
{
return self->tables->individuals.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_provenances(const tsk_treeseq_t *self)
{
return self->tables->provenances.num_rows;
}
tsk_size_t
tsk_treeseq_get_num_trees(const tsk_treeseq_t *self)
{
return self->num_trees;
}
const double *
tsk_treeseq_get_breakpoints(const tsk_treeseq_t *self)
{
return self->breakpoints;
}
const tsk_id_t *
tsk_treeseq_get_samples(const tsk_treeseq_t *self)
{
return self->samples;
}
const tsk_id_t *
tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self)
{
return self->sample_index_map;
}
bool
tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u)
{
bool ret = false;
if (u >= 0 && u < (tsk_id_t) self->tables->nodes.num_rows) {
ret = !!(self->tables->nodes.flags[u] & TSK_NODE_IS_SAMPLE);
}
return ret;
}
bool
tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self)
{
return self->discrete_genome;
}
bool
tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self)
{
return self->discrete_time;
}
double
tsk_treeseq_get_min_time(const tsk_treeseq_t *self)
{
return self->min_time;
}
double
tsk_treeseq_get_max_time(const tsk_treeseq_t *self)
{
return self->max_time;
}
bool
tsk_treeseq_has_reference_sequence(const tsk_treeseq_t *self)
{
return tsk_table_collection_has_reference_sequence(self->tables);
}
int
tsk_treeseq_get_individuals_population(const tsk_treeseq_t *self, tsk_id_t *output)
{
int ret = 0;
tsk_size_t i, j;
tsk_individual_t ind;
tsk_id_t ind_pop;
const tsk_id_t *node_population = self->tables->nodes.population;
const tsk_size_t num_individuals = self->tables->individuals.num_rows;
tsk_memset(output, TSK_NULL, num_individuals * sizeof(*output));
for (i = 0; i < num_individuals; i++) {
ret = tsk_treeseq_get_individual(self, (tsk_id_t) i, &ind);
tsk_bug_assert(ret == 0);
if (ind.nodes_length > 0) {
ind_pop = -2;
for (j = 0; j < ind.nodes_length; j++) {
if (ind_pop == -2) {
ind_pop = node_population[ind.nodes[j]];
} else if (ind_pop != node_population[ind.nodes[j]]) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_POPULATION_MISMATCH);
goto out;
}
}
output[ind.id] = ind_pop;
}
}
out:
return ret;
}
int
tsk_treeseq_get_individuals_time(const tsk_treeseq_t *self, double *output)
{
int ret = 0;
tsk_size_t i, j;
tsk_individual_t ind;
double ind_time;
const double *node_time = self->tables->nodes.time;
const tsk_size_t num_individuals = self->tables->individuals.num_rows;
for (i = 0; i < num_individuals; i++) {
ret = tsk_treeseq_get_individual(self, (tsk_id_t) i, &ind);
tsk_bug_assert(ret == 0);
/* the default is UNKNOWN_TIME, but nodes cannot have
* UNKNOWN _TIME so this is safe. */
ind_time = TSK_UNKNOWN_TIME;
for (j = 0; j < ind.nodes_length; j++) {
if (j == 0) {
ind_time = node_time[ind.nodes[j]];
} else if (ind_time != node_time[ind.nodes[j]]) {
ret = tsk_trace_error(TSK_ERR_INDIVIDUAL_TIME_MISMATCH);
goto out;
}
}
output[ind.id] = ind_time;
}
out:
return ret;
}
/* Stats functions */
#define GET_2D_ROW(array, row_len, row) (array + (((size_t) (row_len)) * (size_t) (row)))
static inline double *
GET_3D_ROW(double *base, tsk_size_t num_nodes, tsk_size_t output_dim,
tsk_size_t window_index, tsk_id_t u)
{
tsk_size_t offset
= window_index * num_nodes * output_dim + ((tsk_size_t) u) * output_dim;
return base + offset;
}
/* Increments the n-dimensional array with the specified shape by the specified value at
* the specified coordinate. */
static inline void
increment_nd_array_value(double *array, tsk_size_t n, const tsk_size_t *shape,
const tsk_size_t *coordinate, double value)
{
tsk_size_t offset = 0;
tsk_size_t product = 1;
int k;
for (k = (int) n - 1; k >= 0; k--) {
tsk_bug_assert(coordinate[k] < shape[k]);
offset += coordinate[k] * product;
product *= shape[k];
}
array[offset] += value;
}
/* TODO flatten the reference sets input here and follow the same pattern used
* in diversity, divergence, etc. */
int TSK_WARN_UNUSED
tsk_treeseq_genealogical_nearest_neighbours(const tsk_treeseq_t *self,
const tsk_id_t *focal, tsk_size_t num_focal, const tsk_id_t *const *reference_sets,
const tsk_size_t *reference_set_size, tsk_size_t num_reference_sets,
tsk_flags_t TSK_UNUSED(options), double *ret_array)
{
int ret = 0;
tsk_id_t u, v, p;
tsk_size_t j;
/* TODO It's probably not worth bothering with the int16_t here. */
int16_t k, focal_reference_set;
/* We use the K'th element of the array for the total. */
const int16_t K = (int16_t) (num_reference_sets + 1);
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double sequence_length = self->tables->sequence_length;
tsk_id_t tj, tk, h;
double left, right, *A_row, scale, tree_length;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
double *restrict length = tsk_calloc(num_focal, sizeof(*length));
uint32_t *restrict ref_count
= tsk_calloc(((tsk_size_t) K) * num_nodes, sizeof(*ref_count));
int16_t *restrict reference_set_map
= tsk_malloc(num_nodes * sizeof(*reference_set_map));
uint32_t *restrict row = NULL;
uint32_t *restrict child_row = NULL;
uint32_t total, delta;
/* We support a max of 8K focal sets */
if (num_reference_sets == 0 || num_reference_sets > (INT16_MAX - 1)) {
/* TODO: more specific error */
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (parent == NULL || ref_count == NULL || reference_set_map == NULL
|| length == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
tsk_memset(reference_set_map, 0xff, num_nodes * sizeof(*reference_set_map));
tsk_memset(ret_array, 0, num_focal * num_reference_sets * sizeof(*ret_array));
total = 0; /* keep the compiler happy */
/* Set the initial conditions and check the input. */
for (k = 0; k < (int16_t) num_reference_sets; k++) {
for (j = 0; j < reference_set_size[k]; j++) {
u = reference_sets[k][j];
if (u < 0 || u >= (tsk_id_t) num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (reference_set_map[u] != TSK_NULL) {
/* FIXME Technically inaccurate here: duplicate focal not sample */
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
reference_set_map[u] = k;
row = GET_2D_ROW(ref_count, K, u);
row[k] = 1;
/* Also set the count for the total among all sets */
row[K - 1] = 1;
}
}
for (j = 0; j < num_focal; j++) {
u = focal[j];
if (u < 0 || u >= (tsk_id_t) num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
}
/* Iterate over the trees */
tj = 0;
tk = 0;
left = 0;
while (tj < num_edges || left < sequence_length) {
while (tk < num_edges && edge_right[O[tk]] == left) {
h = O[tk];
tk++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = TSK_NULL;
child_row = GET_2D_ROW(ref_count, K, u);
while (v != TSK_NULL) {
row = GET_2D_ROW(ref_count, K, v);
for (k = 0; k < K; k++) {
row[k] -= child_row[k];
}
v = parent[v];
}
}
while (tj < num_edges && edge_left[I[tj]] == left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
child_row = GET_2D_ROW(ref_count, K, u);
while (v != TSK_NULL) {
row = GET_2D_ROW(ref_count, K, v);
for (k = 0; k < K; k++) {
row[k] += child_row[k];
}
v = parent[v];
}
}
right = sequence_length;
if (tj < num_edges) {
right = TSK_MIN(right, edge_left[I[tj]]);
}
if (tk < num_edges) {
right = TSK_MIN(right, edge_right[O[tk]]);
}
tree_length = right - left;
/* Process this tree */
for (j = 0; j < num_focal; j++) {
u = focal[j];
focal_reference_set = reference_set_map[u];
delta = focal_reference_set != -1;
p = u;
while (p != TSK_NULL) {
row = GET_2D_ROW(ref_count, K, p);
total = row[K - 1];
if (total > delta) {
break;
}
p = parent[p];
}
if (p != TSK_NULL) {
length[j] += tree_length;
scale = tree_length / (total - delta);
A_row = GET_2D_ROW(ret_array, num_reference_sets, j);
for (k = 0; k < K - 1; k++) {
A_row[k] += row[k] * scale;
}
if (focal_reference_set != -1) {
/* Remove the contribution for the reference set u belongs to and
* insert the correct value. The long-hand version is
* A_row[k] = A_row[k] - row[k] * scale + (row[k] - 1) * scale;
* which cancels to give: */
A_row[focal_reference_set] -= scale;
}
}
}
/* Move on to the next tree */
left = right;
}
/* Divide by the accumulated length for each node to normalise */
for (j = 0; j < num_focal; j++) {
A_row = GET_2D_ROW(ret_array, num_reference_sets, j);
if (length[j] > 0) {
for (k = 0; k < K - 1; k++) {
A_row[k] /= length[j];
}
}
}
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
if (ref_count != NULL) {
free(ref_count);
}
if (reference_set_map != NULL) {
free(reference_set_map);
}
if (length != NULL) {
free(length);
}
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_mean_descendants(const tsk_treeseq_t *self,
const tsk_id_t *const *reference_sets, const tsk_size_t *reference_set_size,
tsk_size_t num_reference_sets, tsk_flags_t TSK_UNUSED(options), double *ret_array)
{
int ret = 0;
tsk_id_t u, v;
tsk_size_t j;
int32_t k;
/* We use the K'th element of the array for the total. */
const int32_t K = (int32_t) (num_reference_sets + 1);
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double sequence_length = self->tables->sequence_length;
tsk_id_t tj, tk, h;
double left, right, length, *restrict C_row;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
uint32_t *restrict ref_count
= tsk_calloc(num_nodes * ((size_t) K), sizeof(*ref_count));
double *restrict last_update = tsk_calloc(num_nodes, sizeof(*last_update));
double *restrict total_length = tsk_calloc(num_nodes, sizeof(*total_length));
uint32_t *restrict row, *restrict child_row;
if (num_reference_sets == 0 || num_reference_sets > (INT32_MAX - 1)) {
/* TODO: more specific error */
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
if (parent == NULL || ref_count == NULL || last_update == NULL
|| total_length == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* TODO add check for duplicate values in the reference sets */
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
tsk_memset(ret_array, 0, num_nodes * num_reference_sets * sizeof(*ret_array));
/* Set the initial conditions and check the input. */
for (k = 0; k < (int32_t) num_reference_sets; k++) {
for (j = 0; j < reference_set_size[k]; j++) {
u = reference_sets[k][j];
if (u < 0 || u >= (tsk_id_t) num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
row = GET_2D_ROW(ref_count, K, u);
row[k] = 1;
/* Also set the count for the total among all sets */
row[K - 1] = 1;
}
}
/* Iterate over the trees */
tj = 0;
tk = 0;
left = 0;
while (tj < num_edges || left < sequence_length) {
while (tk < num_edges && edge_right[O[tk]] == left) {
h = O[tk];
tk++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = TSK_NULL;
child_row = GET_2D_ROW(ref_count, K, u);
while (v != TSK_NULL) {
row = GET_2D_ROW(ref_count, K, v);
if (last_update[v] != left) {
if (row[K - 1] > 0) {
length = left - last_update[v];
C_row = GET_2D_ROW(ret_array, num_reference_sets, v);
for (k = 0; k < (int32_t) num_reference_sets; k++) {
C_row[k] += length * row[k];
}
total_length[v] += length;
}
last_update[v] = left;
}
for (k = 0; k < K; k++) {
row[k] -= child_row[k];
}
v = parent[v];
}
}
while (tj < num_edges && edge_left[I[tj]] == left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
child_row = GET_2D_ROW(ref_count, K, u);
while (v != TSK_NULL) {
row = GET_2D_ROW(ref_count, K, v);
if (last_update[v] != left) {
if (row[K - 1] > 0) {
length = left - last_update[v];
C_row = GET_2D_ROW(ret_array, num_reference_sets, v);
for (k = 0; k < (int32_t) num_reference_sets; k++) {
C_row[k] += length * row[k];
}
total_length[v] += length;
}
last_update[v] = left;
}
for (k = 0; k < K; k++) {
row[k] += child_row[k];
}
v = parent[v];
}
}
right = sequence_length;
if (tj < num_edges) {
right = TSK_MIN(right, edge_left[I[tj]]);
}
if (tk < num_edges) {
right = TSK_MIN(right, edge_right[O[tk]]);
}
left = right;
}
/* Add the stats for the last tree and divide by the total length that
* each node was an ancestor to > 0 of the reference nodes. */
for (v = 0; v < (tsk_id_t) num_nodes; v++) {
row = GET_2D_ROW(ref_count, K, v);
C_row = GET_2D_ROW(ret_array, num_reference_sets, v);
if (row[K - 1] > 0) {
length = sequence_length - last_update[v];
total_length[v] += length;
for (k = 0; k < (int32_t) num_reference_sets; k++) {
C_row[k] += length * row[k];
}
}
if (total_length[v] > 0) {
length = total_length[v];
for (k = 0; k < (int32_t) num_reference_sets; k++) {
C_row[k] /= length;
}
}
}
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
if (ref_count != NULL) {
free(ref_count);
}
if (last_update != NULL) {
free(last_update);
}
if (total_length != NULL) {
free(total_length);
}
return ret;
}
/***********************************
* General stats framework
***********************************/
#define TSK_REQUIRE_FULL_SPAN 1
static int
tsk_treeseq_check_windows(const tsk_treeseq_t *self, tsk_size_t num_windows,
const double *windows, tsk_flags_t options)
{
int ret = 0;
tsk_size_t j;
if (num_windows < 1) {
ret = tsk_trace_error(TSK_ERR_BAD_NUM_WINDOWS);
goto out;
}
if (options & TSK_REQUIRE_FULL_SPAN) {
/* TODO the general stat code currently requires that we include the
* entire tree sequence span. This should be relaxed, so hopefully
* this branch (and the option) can be removed at some point */
if (windows[0] != 0) {
ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);
goto out;
}
if (windows[num_windows] != self->tables->sequence_length) {
ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);
goto out;
}
} else {
if (windows[0] < 0) {
ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);
goto out;
}
if (windows[num_windows] > self->tables->sequence_length) {
ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);
goto out;
}
}
for (j = 0; j < num_windows; j++) {
if (windows[j] >= windows[j + 1]) {
ret = tsk_trace_error(TSK_ERR_BAD_WINDOWS);
goto out;
}
}
ret = 0;
out:
return ret;
}
static int
tsk_treeseq_check_time_windows(tsk_size_t num_windows, const double *windows)
{
// This does not check the last window ends at infinity,
// which is required for some time window functions.
int ret = TSK_ERR_BAD_TIME_WINDOWS;
tsk_size_t j;
if (num_windows < 1) {
ret = TSK_ERR_BAD_TIME_WINDOWS_DIM;
goto out;
}
if (windows[0] != 0.0) {
goto out;
}
for (j = 0; j < num_windows; j++) {
if (windows[j] >= windows[j + 1]) {
goto out;
}
}
ret = 0;
out:
return ret;
}
/* TODO make these functions more consistent in how the arguments are ordered */
static inline void
update_state(double *X, tsk_size_t state_dim, tsk_id_t dest, tsk_id_t source, int sign)
{
tsk_size_t k;
double *X_dest = GET_2D_ROW(X, state_dim, dest);
double *X_source = GET_2D_ROW(X, state_dim, source);
for (k = 0; k < state_dim; k++) {
X_dest[k] += sign * X_source[k];
}
}
static inline int
update_node_summary(tsk_id_t u, tsk_size_t result_dim, double *node_summary, double *X,
tsk_size_t state_dim, general_stat_func_t *f, void *f_params)
{
double *X_u = GET_2D_ROW(X, state_dim, u);
double *summary_u = GET_2D_ROW(node_summary, result_dim, u);
return f(state_dim, X_u, result_dim, summary_u, f_params);
}
static inline void
update_running_sum(tsk_id_t u, double sign, const double *restrict branch_length,
const double *summary, tsk_size_t result_dim, double *running_sum)
{
const double *summary_u = GET_2D_ROW(summary, result_dim, u);
const double x = sign * branch_length[u];
tsk_size_t m;
for (m = 0; m < result_dim; m++) {
running_sum[m] += x * summary_u[m];
}
}
static int
tsk_treeseq_branch_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,
void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,
double *result)
{
int ret = 0;
tsk_id_t u, v;
tsk_size_t j, k, window_index;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double *restrict time = self->tables->nodes.time;
const double sequence_length = self->tables->sequence_length;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
double *restrict branch_length = tsk_calloc(num_nodes, sizeof(*branch_length));
tsk_id_t tj, tk, h;
double t_left, t_right, w_left, w_right, left, right, scale;
const double *weight_u;
double *state_u, *result_row, *summary_u;
double *state = tsk_calloc(num_nodes * state_dim, sizeof(*state));
double *summary = tsk_calloc(num_nodes * result_dim, sizeof(*summary));
double *running_sum = tsk_calloc(result_dim, sizeof(*running_sum));
double *zero_state = tsk_calloc(state_dim, sizeof(*zero_state));
double *zero_summary = tsk_calloc(result_dim, sizeof(*zero_state));
if (self->time_uncalibrated && !(options & TSK_STAT_ALLOW_TIME_UNCALIBRATED)) {
ret = tsk_trace_error(TSK_ERR_TIME_UNCALIBRATED);
goto out;
}
if (parent == NULL || branch_length == NULL || state == NULL || running_sum == NULL
|| summary == NULL || zero_state == NULL || zero_summary == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
/* If f is not strict, we may need to set conditions for non-sample nodes as well. */
ret = f(state_dim, zero_state, result_dim, zero_summary, f_params);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_nodes; j++) { // we could skip this if zero_summary is zero
summary_u = GET_2D_ROW(summary, result_dim, j);
tsk_memcpy(summary_u, zero_summary, result_dim * sizeof(*zero_summary));
}
/* Set the initial conditions */
for (j = 0; j < self->num_samples; j++) {
u = self->samples[j];
state_u = GET_2D_ROW(state, state_dim, u);
weight_u = GET_2D_ROW(sample_weights, state_dim, j);
tsk_memcpy(state_u, weight_u, state_dim * sizeof(*state_u));
summary_u = GET_2D_ROW(summary, result_dim, u);
ret = f(state_dim, state_u, result_dim, summary_u, f_params);
if (ret != 0) {
goto out;
}
}
tsk_memset(result, 0, num_windows * result_dim * sizeof(*result));
/* Iterate over the trees */
tj = 0;
tk = 0;
t_left = 0;
window_index = 0;
while (tj < num_edges || t_left < sequence_length) {
while (tk < num_edges && edge_right[O[tk]] == t_left) {
h = O[tk];
tk++;
u = edge_child[h];
update_running_sum(u, -1, branch_length, summary, result_dim, running_sum);
parent[u] = TSK_NULL;
branch_length[u] = 0;
u = edge_parent[h];
while (u != TSK_NULL) {
update_running_sum(
u, -1, branch_length, summary, result_dim, running_sum);
update_state(state, state_dim, u, edge_child[h], -1);
ret = update_node_summary(
u, result_dim, summary, state, state_dim, f, f_params);
if (ret != 0) {
goto out;
}
update_running_sum(
u, +1, branch_length, summary, result_dim, running_sum);
u = parent[u];
}
}
while (tj < num_edges && edge_left[I[tj]] == t_left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
branch_length[u] = time[v] - time[u];
update_running_sum(u, +1, branch_length, summary, result_dim, running_sum);
u = v;
while (u != TSK_NULL) {
update_running_sum(
u, -1, branch_length, summary, result_dim, running_sum);
update_state(state, state_dim, u, edge_child[h], +1);
ret = update_node_summary(
u, result_dim, summary, state, state_dim, f, f_params);
if (ret != 0) {
goto out;
}
update_running_sum(
u, +1, branch_length, summary, result_dim, running_sum);
u = parent[u];
}
}
t_right = sequence_length;
if (tj < num_edges) {
t_right = TSK_MIN(t_right, edge_left[I[tj]]);
}
if (tk < num_edges) {
t_right = TSK_MIN(t_right, edge_right[O[tk]]);
}
while (windows[window_index] < t_right) {
tsk_bug_assert(window_index < num_windows);
w_left = windows[window_index];
w_right = windows[window_index + 1];
left = TSK_MAX(t_left, w_left);
right = TSK_MIN(t_right, w_right);
scale = (right - left);
tsk_bug_assert(scale > 0);
result_row = GET_2D_ROW(result, result_dim, window_index);
for (k = 0; k < result_dim; k++) {
result_row[k] += running_sum[k] * scale;
}
if (w_right <= t_right) {
window_index++;
} else {
/* This interval crosses a tree boundary, so we update it again in the */
/* for the next tree */
break;
}
}
/* Move to the next tree */
t_left = t_right;
}
tsk_bug_assert(window_index == num_windows);
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
if (branch_length != NULL) {
free(branch_length);
}
tsk_safe_free(state);
tsk_safe_free(summary);
tsk_safe_free(running_sum);
tsk_safe_free(zero_state);
tsk_safe_free(zero_summary);
return ret;
}
static int
get_allele_weights(const tsk_site_t *site, const double *state, tsk_size_t state_dim,
const double *total_weight, tsk_size_t *ret_num_alleles, double **ret_allele_states)
{
int ret = 0;
tsk_size_t k;
tsk_mutation_t mutation, parent_mut;
tsk_size_t mutation_index, allele, num_alleles, alt_allele_length;
/* The allele table */
tsk_size_t max_alleles = site->mutations_length + 1;
const char **alleles = tsk_malloc(max_alleles * sizeof(*alleles));
tsk_size_t *allele_lengths = tsk_calloc(max_alleles, sizeof(*allele_lengths));
double *allele_states = tsk_calloc(max_alleles * state_dim, sizeof(*allele_states));
double *allele_row;
const double *state_row;
const char *alt_allele;
if (alleles == NULL || allele_lengths == NULL || allele_states == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_bug_assert(state != NULL);
alleles[0] = site->ancestral_state;
allele_lengths[0] = site->ancestral_state_length;
tsk_memcpy(allele_states, total_weight, state_dim * sizeof(*allele_states));
num_alleles = 1;
for (mutation_index = 0; mutation_index < site->mutations_length; mutation_index++) {
mutation = site->mutations[mutation_index];
/* Compute the allele index for this derived state value. */
allele = 0;
while (allele < num_alleles) {
if (mutation.derived_state_length == allele_lengths[allele]
&& tsk_memcmp(
mutation.derived_state, alleles[allele], allele_lengths[allele])
== 0) {
break;
}
allele++;
}
if (allele == num_alleles) {
tsk_bug_assert(allele < max_alleles);
alleles[allele] = mutation.derived_state;
allele_lengths[allele] = mutation.derived_state_length;
num_alleles++;
}
/* Add the state for the the mutation's node to this allele */
state_row = GET_2D_ROW(state, state_dim, mutation.node);
allele_row = GET_2D_ROW(allele_states, state_dim, allele);
for (k = 0; k < state_dim; k++) {
allele_row[k] += state_row[k];
}
/* Get the index for the alternate allele that we must subtract from */
alt_allele = site->ancestral_state;
alt_allele_length = site->ancestral_state_length;
if (mutation.parent != TSK_NULL) {
parent_mut = site->mutations[mutation.parent - site->mutations[0].id];
alt_allele = parent_mut.derived_state;
alt_allele_length = parent_mut.derived_state_length;
}
allele = 0;
while (allele < num_alleles) {
if (alt_allele_length == allele_lengths[allele]
&& tsk_memcmp(alt_allele, alleles[allele], allele_lengths[allele])
== 0) {
break;
}
allele++;
}
tsk_bug_assert(allele < num_alleles);
allele_row = GET_2D_ROW(allele_states, state_dim, allele);
for (k = 0; k < state_dim; k++) {
allele_row[k] -= state_row[k];
}
}
*ret_num_alleles = num_alleles;
*ret_allele_states = allele_states;
allele_states = NULL;
out:
tsk_safe_free(alleles);
tsk_safe_free(allele_lengths);
tsk_safe_free(allele_states);
return ret;
}
static int
compute_general_stat_site_result(tsk_site_t *site, double *state, tsk_size_t state_dim,
tsk_size_t result_dim, general_stat_func_t *f, void *f_params, double *total_weight,
bool polarised, double *result)
{
int ret = 0;
tsk_size_t k;
tsk_size_t allele, num_alleles;
double *allele_states;
double *result_tmp = tsk_calloc(result_dim, sizeof(*result_tmp));
if (result_tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(result, 0, result_dim * sizeof(*result));
ret = get_allele_weights(
site, state, state_dim, total_weight, &num_alleles, &allele_states);
if (ret != 0) {
goto out;
}
/* Sum over the allele weights. Skip the ancestral state if this is a polarised stat
*/
for (allele = polarised ? 1 : 0; allele < num_alleles; allele++) {
ret = f(state_dim, GET_2D_ROW(allele_states, state_dim, allele), result_dim,
result_tmp, f_params);
if (ret != 0) {
goto out;
}
for (k = 0; k < result_dim; k++) {
result[k] += result_tmp[k];
}
}
out:
tsk_safe_free(result_tmp);
tsk_safe_free(allele_states);
return ret;
}
static int
tsk_treeseq_site_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,
void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,
double *result)
{
int ret = 0;
tsk_id_t u, v;
tsk_size_t j, k, tree_site, tree_index, window_index;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double sequence_length = self->tables->sequence_length;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
tsk_site_t *site;
tsk_id_t tj, tk, h;
double t_left, t_right;
const double *weight_u;
double *state_u, *result_row;
double *state = tsk_calloc(num_nodes * state_dim, sizeof(*state));
double *total_weight = tsk_calloc(state_dim, sizeof(*total_weight));
double *site_result = tsk_calloc(result_dim, sizeof(*site_result));
bool polarised = false;
if (parent == NULL || state == NULL || total_weight == NULL || site_result == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
if (options & TSK_STAT_POLARISED) {
polarised = true;
}
/* Set the initial conditions */
for (j = 0; j < self->num_samples; j++) {
u = self->samples[j];
state_u = GET_2D_ROW(state, state_dim, u);
weight_u = GET_2D_ROW(sample_weights, state_dim, j);
tsk_memcpy(state_u, weight_u, state_dim * sizeof(*state_u));
for (k = 0; k < state_dim; k++) {
total_weight[k] += weight_u[k];
}
}
tsk_memset(result, 0, num_windows * result_dim * sizeof(*result));
/* Iterate over the trees */
tj = 0;
tk = 0;
t_left = 0;
tree_index = 0;
window_index = 0;
while (tj < num_edges || t_left < sequence_length) {
while (tk < num_edges && edge_right[O[tk]] == t_left) {
h = O[tk];
tk++;
u = edge_child[h];
v = edge_parent[h];
while (v != TSK_NULL) {
update_state(state, state_dim, v, u, -1);
v = parent[v];
}
parent[u] = TSK_NULL;
}
while (tj < num_edges && edge_left[I[tj]] == t_left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
while (v != TSK_NULL) {
update_state(state, state_dim, v, u, +1);
v = parent[v];
}
}
t_right = sequence_length;
if (tj < num_edges) {
t_right = TSK_MIN(t_right, edge_left[I[tj]]);
}
if (tk < num_edges) {
t_right = TSK_MIN(t_right, edge_right[O[tk]]);
}
/* Update the sites */
for (tree_site = 0; tree_site < self->tree_sites_length[tree_index];
tree_site++) {
site = self->tree_sites[tree_index] + tree_site;
ret = compute_general_stat_site_result(site, state, state_dim, result_dim, f,
f_params, total_weight, polarised, site_result);
if (ret != 0) {
goto out;
}
while (windows[window_index + 1] <= site->position) {
window_index++;
tsk_bug_assert(window_index < num_windows);
}
tsk_bug_assert(windows[window_index] <= site->position);
tsk_bug_assert(site->position < windows[window_index + 1]);
result_row = GET_2D_ROW(result, result_dim, window_index);
for (k = 0; k < result_dim; k++) {
result_row[k] += site_result[k];
}
}
tree_index++;
t_left = t_right;
}
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
tsk_safe_free(state);
tsk_safe_free(total_weight);
tsk_safe_free(site_result);
return ret;
}
static inline void
increment_row(tsk_size_t length, double multiplier, double *source, double *dest)
{
tsk_size_t j;
for (j = 0; j < length; j++) {
dest[j] += multiplier * source[j];
}
}
static int
tsk_treeseq_node_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,
void *f_params, tsk_size_t num_windows, const double *windows,
tsk_flags_t TSK_UNUSED(options), double *result)
{
int ret = 0;
tsk_id_t u, v;
tsk_size_t j, window_index;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double sequence_length = self->tables->sequence_length;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
tsk_id_t tj, tk, h;
const double *weight_u;
double *state_u;
double *state = tsk_calloc(num_nodes * state_dim, sizeof(*state));
double *node_summary = tsk_calloc(num_nodes * result_dim, sizeof(*node_summary));
double *last_update = tsk_calloc(num_nodes, sizeof(*last_update));
double t_left, t_right, w_right;
if (parent == NULL || state == NULL || node_summary == NULL || last_update == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
tsk_memset(result, 0, num_windows * num_nodes * result_dim * sizeof(*result));
/* Set the initial conditions */
for (j = 0; j < self->num_samples; j++) {
u = self->samples[j];
state_u = GET_2D_ROW(state, state_dim, u);
weight_u = GET_2D_ROW(sample_weights, state_dim, j);
tsk_memcpy(state_u, weight_u, state_dim * sizeof(*state_u));
}
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
ret = update_node_summary(
u, result_dim, node_summary, state, state_dim, f, f_params);
if (ret != 0) {
goto out;
}
}
/* Iterate over the trees */
tj = 0;
tk = 0;
t_left = 0;
window_index = 0;
while (tj < num_edges || t_left < sequence_length) {
tsk_bug_assert(window_index < num_windows);
while (tk < num_edges && edge_right[O[tk]] == t_left) {
h = O[tk];
tk++;
u = edge_child[h];
v = edge_parent[h];
while (v != TSK_NULL) {
increment_row(result_dim, t_left - last_update[v],
GET_2D_ROW(node_summary, result_dim, v),
GET_3D_ROW(result, num_nodes, result_dim, window_index, v));
last_update[v] = t_left;
update_state(state, state_dim, v, u, -1);
ret = update_node_summary(
v, result_dim, node_summary, state, state_dim, f, f_params);
if (ret != 0) {
goto out;
}
v = parent[v];
}
parent[u] = TSK_NULL;
}
while (tj < num_edges && edge_left[I[tj]] == t_left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
while (v != TSK_NULL) {
increment_row(result_dim, t_left - last_update[v],
GET_2D_ROW(node_summary, result_dim, v),
GET_3D_ROW(result, num_nodes, result_dim, window_index, v));
last_update[v] = t_left;
update_state(state, state_dim, v, u, +1);
ret = update_node_summary(
v, result_dim, node_summary, state, state_dim, f, f_params);
if (ret != 0) {
goto out;
}
v = parent[v];
}
}
t_right = sequence_length;
if (tj < num_edges) {
t_right = TSK_MIN(t_right, edge_left[I[tj]]);
}
if (tk < num_edges) {
t_right = TSK_MIN(t_right, edge_right[O[tk]]);
}
while (window_index < num_windows && windows[window_index + 1] <= t_right) {
w_right = windows[window_index + 1];
/* Flush the contributions of all nodes to the current window */
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
tsk_bug_assert(last_update[u] < w_right);
increment_row(result_dim, w_right - last_update[u],
GET_2D_ROW(node_summary, result_dim, u),
GET_3D_ROW(result, num_nodes, result_dim, window_index, u));
last_update[u] = w_right;
}
window_index++;
}
t_left = t_right;
}
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
tsk_safe_free(state);
tsk_safe_free(node_summary);
tsk_safe_free(last_update);
return ret;
}
static void
span_normalise(
tsk_size_t num_windows, const double *windows, tsk_size_t row_size, double *array)
{
tsk_size_t window_index, k;
double span, *row;
for (window_index = 0; window_index < num_windows; window_index++) {
span = windows[window_index + 1] - windows[window_index];
row = GET_2D_ROW(array, row_size, window_index);
for (k = 0; k < row_size; k++) {
row[k] /= span;
}
}
}
typedef struct {
general_stat_func_t *f;
void *f_params;
double *total_weight;
double *total_minus_state;
double *result_tmp;
} unpolarised_summary_func_args;
static int
unpolarised_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t result_dim, double *result, void *params)
{
int ret = 0;
unpolarised_summary_func_args *upargs = (unpolarised_summary_func_args *) params;
const double *total_weight = upargs->total_weight;
double *total_minus_state = upargs->total_minus_state;
double *result_tmp = upargs->result_tmp;
tsk_size_t k, m;
ret = upargs->f(state_dim, state, result_dim, result, upargs->f_params);
if (ret != 0) {
goto out;
}
for (k = 0; k < state_dim; k++) {
total_minus_state[k] = total_weight[k] - state[k];
}
ret = upargs->f(
state_dim, total_minus_state, result_dim, result_tmp, upargs->f_params);
if (ret != 0) {
goto out;
}
for (m = 0; m < result_dim; m++) {
result[m] += result_tmp[m];
}
out:
return ret;
}
/* Abstracts the running of node and branch stats where the summary function
* is run twice when non-polarised. We replace the call to the input summary
* function with a call of the required form when non-polarised, simplifying
* the implementation and memory management for the node and branch stats.
*/
static int
tsk_polarisable_func_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,
void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,
double *result)
{
int ret = 0;
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool polarised = options & TSK_STAT_POLARISED;
general_stat_func_t *wrapped_f = f;
void *wrapped_f_params = f_params;
const double *weight_u;
unpolarised_summary_func_args upargs;
tsk_size_t j, k;
tsk_memset(&upargs, 0, sizeof(upargs));
if (!polarised) {
upargs.f = f;
upargs.f_params = f_params;
upargs.total_weight = tsk_calloc(state_dim, sizeof(double));
upargs.total_minus_state = tsk_calloc(state_dim, sizeof(double));
upargs.result_tmp = tsk_calloc(result_dim, sizeof(double));
if (upargs.total_weight == NULL || upargs.total_minus_state == NULL
|| upargs.result_tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* Compute the total weight */
for (j = 0; j < self->num_samples; j++) {
weight_u = GET_2D_ROW(sample_weights, state_dim, j);
for (k = 0; k < state_dim; k++) {
upargs.total_weight[k] += weight_u[k];
}
}
wrapped_f = unpolarised_summary_func;
wrapped_f_params = &upargs;
}
if (stat_branch) {
ret = tsk_treeseq_branch_general_stat(self, state_dim, sample_weights,
result_dim, wrapped_f, wrapped_f_params, num_windows, windows, options,
result);
} else {
ret = tsk_treeseq_node_general_stat(self, state_dim, sample_weights, result_dim,
wrapped_f, wrapped_f_params, num_windows, windows, options, result);
}
out:
tsk_safe_free(upargs.total_weight);
tsk_safe_free(upargs.total_minus_state);
tsk_safe_free(upargs.result_tmp);
return ret;
}
int
tsk_treeseq_general_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
const double *sample_weights, tsk_size_t result_dim, general_stat_func_t *f,
void *f_params, tsk_size_t num_windows, const double *windows, tsk_flags_t options,
double *result)
{
int ret = 0;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool stat_node = !!(options & TSK_STAT_NODE);
double default_windows[] = { 0, self->tables->sequence_length };
tsk_size_t row_size;
/* If no mode is specified, we default to site mode */
if (!(stat_site || stat_branch || stat_node)) {
stat_site = true;
}
/* It's an error to specify more than one mode */
if (stat_site + stat_branch + stat_node > 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
goto out;
}
if (state_dim < 1) {
ret = tsk_trace_error(TSK_ERR_BAD_STATE_DIMS);
goto out;
}
if (result_dim < 1) {
ret = tsk_trace_error(TSK_ERR_BAD_RESULT_DIMS);
goto out;
}
if (windows == NULL) {
num_windows = 1;
windows = default_windows;
} else {
ret = tsk_treeseq_check_windows(
self, num_windows, windows, TSK_REQUIRE_FULL_SPAN);
if (ret != 0) {
goto out;
}
}
if (stat_site) {
ret = tsk_treeseq_site_general_stat(self, state_dim, sample_weights, result_dim,
f, f_params, num_windows, windows, options, result);
} else {
ret = tsk_polarisable_func_general_stat(self, state_dim, sample_weights,
result_dim, f, f_params, num_windows, windows, options, result);
}
if (options & TSK_STAT_SPAN_NORMALISE) {
row_size = result_dim;
if (stat_node) {
row_size = result_dim * tsk_treeseq_get_num_nodes(self);
}
span_normalise(num_windows, windows, row_size, result);
}
out:
return ret;
}
static int
check_set_indexes(
tsk_size_t num_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes)
{
int ret = 0;
tsk_size_t j;
for (j = 0; j < num_set_indexes; j++) {
if (set_indexes[j] < 0 || set_indexes[j] >= (tsk_id_t) num_sets) {
ret = tsk_trace_error(TSK_ERR_BAD_SAMPLE_SET_INDEX);
goto out;
}
}
out:
return ret;
}
static int
tsk_treeseq_check_sample_sets(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets)
{
int ret = 0;
tsk_size_t j, k, l;
const tsk_id_t num_nodes = (tsk_id_t) self->tables->nodes.num_rows;
tsk_id_t u, sample_index;
if (num_sample_sets == 0) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
goto out;
}
j = 0;
for (k = 0; k < num_sample_sets; k++) {
if (sample_set_sizes[k] == 0) {
ret = tsk_trace_error(TSK_ERR_EMPTY_SAMPLE_SET);
goto out;
}
for (l = 0; l < sample_set_sizes[k]; l++) {
u = sample_sets[j];
if (u < 0 || u >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
sample_index = self->sample_index_map[u];
if (sample_index == TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_SAMPLES);
goto out;
}
j++;
}
}
out:
return ret;
}
typedef struct {
tsk_size_t num_samples;
} weight_stat_params_t;
typedef struct {
tsk_size_t num_samples;
tsk_size_t num_covariates;
double *V;
} covariates_stat_params_t;
typedef struct {
const tsk_id_t *sample_sets;
tsk_size_t num_sample_sets;
const tsk_size_t *sample_set_sizes;
const tsk_id_t *set_indexes;
} sample_count_stat_params_t;
typedef struct {
tsk_size_t num_samples;
double *total_weights;
const tsk_id_t *index_tuples;
} indexed_weight_stat_params_t;
static int
tsk_treeseq_sample_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t result_dim, const tsk_id_t *set_indexes, general_stat_func_t *f,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
const tsk_size_t num_samples = self->num_samples;
tsk_size_t j, k, l;
tsk_id_t u, sample_index;
double *weights = NULL;
double *weight_row;
sample_count_stat_params_t args = { .sample_sets = sample_sets,
.num_sample_sets = num_sample_sets,
.sample_set_sizes = sample_set_sizes,
.set_indexes = set_indexes };
ret = tsk_treeseq_check_sample_sets(
self, num_sample_sets, sample_set_sizes, sample_sets);
if (ret != 0) {
goto out;
}
weights = tsk_calloc(num_samples * num_sample_sets, sizeof(*weights));
if (weights == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
j = 0;
for (k = 0; k < num_sample_sets; k++) {
for (l = 0; l < sample_set_sizes[k]; l++) {
u = sample_sets[j];
sample_index = self->sample_index_map[u];
weight_row = GET_2D_ROW(weights, num_sample_sets, sample_index);
if (weight_row[k] != 0) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
weight_row[k] = 1;
j++;
}
}
ret = tsk_treeseq_general_stat(self, num_sample_sets, weights, result_dim, f, &args,
num_windows, windows, options, result);
out:
tsk_safe_free(weights);
return ret;
}
/***********************************
* Two Locus Statistics
***********************************/
static int
get_allele_samples(const tsk_site_t *site, tsk_size_t site_offset,
const tsk_bitset_t *state, tsk_bitset_t *out_allele_samples,
tsk_size_t *out_num_alleles)
{
int ret = 0;
tsk_mutation_t mutation, parent_mut;
tsk_size_t mutation_index, allele, alt_allele, alt_allele_length;
/* The allele table */
tsk_size_t max_alleles = site->mutations_length + 1;
const char **alleles = tsk_malloc(max_alleles * sizeof(*alleles));
tsk_size_t *allele_lengths = tsk_calloc(max_alleles, sizeof(*allele_lengths));
const char *alt_allele_state;
tsk_size_t num_alleles = 1;
if (alleles == NULL || allele_lengths == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_bug_assert(state != NULL);
alleles[0] = site->ancestral_state;
allele_lengths[0] = site->ancestral_state_length;
for (mutation_index = 0; mutation_index < site->mutations_length; mutation_index++) {
mutation = site->mutations[mutation_index];
/* Compute the allele index for this derived state value. */
for (allele = 0; allele < num_alleles; allele++) {
if (mutation.derived_state_length == allele_lengths[allele]
&& tsk_memcmp(
mutation.derived_state, alleles[allele], allele_lengths[allele])
== 0) {
break;
}
}
if (allele == num_alleles) {
tsk_bug_assert(allele < max_alleles);
alleles[allele] = mutation.derived_state;
allele_lengths[allele] = mutation.derived_state_length;
num_alleles++;
}
/* Add the mutation's samples to this allele */
tsk_bitset_union(
out_allele_samples, allele + site_offset, state, mutation_index);
/* Get the index for the alternate allele that we must subtract from */
alt_allele_state = site->ancestral_state;
alt_allele_length = site->ancestral_state_length;
if (mutation.parent != TSK_NULL) {
parent_mut = site->mutations[mutation.parent - site->mutations[0].id];
alt_allele_state = parent_mut.derived_state;
alt_allele_length = parent_mut.derived_state_length;
}
for (alt_allele = 0; alt_allele < num_alleles; alt_allele++) {
if (alt_allele_length == allele_lengths[alt_allele]
&& tsk_memcmp(
alt_allele_state, alleles[alt_allele], allele_lengths[alt_allele])
== 0) {
break;
}
}
tsk_bug_assert(allele < num_alleles);
tsk_bitset_subtract(out_allele_samples, alt_allele + site_offset,
out_allele_samples, allele + site_offset);
}
*out_num_alleles = num_alleles;
out:
tsk_safe_free(alleles);
tsk_safe_free(allele_lengths);
return ret;
}
static int
norm_hap_weighted(tsk_size_t result_dim, const double *hap_weights,
tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *weight_row;
double n;
tsk_size_t k;
for (k = 0; k < result_dim; k++) {
weight_row = GET_2D_ROW(hap_weights, 3, k);
n = (double) args.sample_set_sizes[k];
result[k] = weight_row[0] / n;
}
return 0;
}
static int
norm_hap_weighted_ij(tsk_size_t result_dim, const double *hap_weights,
tsk_size_t TSK_UNUSED(n_a), tsk_size_t TSK_UNUSED(n_b), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *weight_row;
double ni, nj, wAB_i, wAB_j;
tsk_id_t i, j;
tsk_size_t k;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
weight_row = GET_2D_ROW(hap_weights, 3, i);
wAB_i = weight_row[0];
weight_row = GET_2D_ROW(hap_weights, 3, j);
wAB_j = weight_row[0];
result[k] = (wAB_i + wAB_j) / (ni + nj);
}
return 0;
}
static int
norm_total_weighted(tsk_size_t result_dim, const double *TSK_UNUSED(hap_weights),
tsk_size_t n_a, tsk_size_t n_b, double *result, void *TSK_UNUSED(params))
{
tsk_size_t k;
double norm = 1 / (double) (n_a * n_b);
for (k = 0; k < result_dim; k++) {
result[k] = norm;
}
return 0;
}
static void
get_all_samples_bits(tsk_bitset_t *all_samples, tsk_size_t n)
{
tsk_size_t i;
const tsk_bitset_val_t all = ~((tsk_bitset_val_t) 0);
const tsk_bitset_val_t remainder_samples = n % TSK_BITSET_BITS;
all_samples->data[all_samples->row_len - 1]
= remainder_samples ? ~(all << remainder_samples) : all;
for (i = 0; i < all_samples->row_len - 1; i++) {
all_samples->data[i] = all;
}
}
// Stores the intermediate values for computing two-locus statistics.
typedef struct {
double *weights;
double *norm;
double *result_tmp;
tsk_bitset_t AB_samples;
} two_locus_work_t;
static int
two_locus_work_init(tsk_size_t max_alleles, tsk_size_t num_samples,
tsk_size_t result_dim, tsk_size_t state_dim, two_locus_work_t *out)
{
int ret = 0;
out->weights = tsk_malloc(3 * state_dim * sizeof(*out->weights));
out->norm = tsk_malloc(result_dim * sizeof(*out->norm));
out->result_tmp
= tsk_malloc(result_dim * max_alleles * max_alleles * sizeof(*out->result_tmp));
tsk_memset(&out->AB_samples, 0, sizeof(out->AB_samples));
if (out->weights == NULL || out->norm == NULL || out->result_tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_bitset_init(&out->AB_samples, num_samples, 1);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static void
two_locus_work_free(two_locus_work_t *work)
{
tsk_safe_free(work->weights);
tsk_safe_free(work->norm);
tsk_safe_free(work->result_tmp);
tsk_bitset_free(&work->AB_samples);
}
static int
compute_general_normed_two_site_stat_result(const tsk_bitset_t *state,
const tsk_size_t *allele_counts, tsk_size_t a_off, tsk_size_t b_off,
tsk_size_t num_a_alleles, tsk_size_t num_b_alleles, tsk_size_t state_dim,
tsk_size_t result_dim, general_stat_func_t *f, sample_count_stat_params_t *f_params,
norm_func_t *norm_f, bool polarised, two_locus_work_t *restrict work, double *result)
{
int ret = 0;
// Sample sets and b sites are rows, a sites are columns
// b1 b2 b3
// a1 [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]
// a2 [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]
// a3 [s1, s2, s3] [s1, s2, s3] [s1, s2, s3]
tsk_size_t k, mut_a, mut_b, result_row_len = num_b_alleles * result_dim;
uint8_t is_polarised = polarised ? 1 : 0;
double *restrict hap_row, *restrict result_tmp_row;
double *restrict norm = work->norm;
double *restrict weights = work->weights;
double *restrict result_tmp = work->result_tmp;
tsk_bitset_t AB_samples = work->AB_samples;
for (mut_a = is_polarised; mut_a < num_a_alleles; mut_a++) {
result_tmp_row = GET_2D_ROW(result_tmp, result_row_len, mut_a);
for (mut_b = is_polarised; mut_b < num_b_alleles; mut_b++) {
for (k = 0; k < state_dim; k++) {
tsk_bitset_intersect(state, a_off + (mut_a * state_dim) + k, state,
b_off + (mut_b * state_dim) + k, &AB_samples);
hap_row = GET_2D_ROW(weights, 3, k);
hap_row[0] = (double) tsk_bitset_count(&AB_samples, 0);
hap_row[1] = (double) allele_counts[a_off + (mut_a * state_dim) + k]
- hap_row[0];
hap_row[2] = (double) allele_counts[b_off + (mut_b * state_dim) + k]
- hap_row[0];
}
ret = f(state_dim, weights, result_dim, result_tmp_row, f_params);
if (ret != 0) {
goto out;
}
ret = norm_f(result_dim, weights, num_a_alleles - is_polarised,
num_b_alleles - is_polarised, norm, f_params);
if (ret != 0) {
goto out;
}
for (k = 0; k < result_dim; k++) {
result[k] += result_tmp_row[k] * norm[k];
}
result_tmp_row += result_dim; // Advance to the next column
}
}
out:
return ret;
}
static int
compute_general_two_site_stat_result(const tsk_bitset_t *state,
const tsk_size_t *allele_counts, tsk_size_t a_off, tsk_size_t b_off,
tsk_size_t state_dim, tsk_size_t result_dim, general_stat_func_t *f,
sample_count_stat_params_t *f_params, two_locus_work_t *restrict work,
double *result)
{
int ret = 0;
tsk_size_t k;
tsk_bitset_t AB_samples = work->AB_samples;
tsk_size_t mut_a = 1, mut_b = 1;
double *restrict hap_row, *restrict weights = work->weights;
for (k = 0; k < state_dim; k++) {
tsk_bitset_intersect(state, a_off + (mut_a * state_dim) + k, state,
b_off + (mut_b * state_dim) + k, &AB_samples);
hap_row = GET_2D_ROW(weights, 3, k);
hap_row[0] = (double) tsk_bitset_count(&AB_samples, 0);
hap_row[1]
= (double) allele_counts[a_off + (mut_a * state_dim) + k] - hap_row[0];
hap_row[2]
= (double) allele_counts[b_off + (mut_b * state_dim) + k] - hap_row[0];
}
ret = f(state_dim, weights, result_dim, result, f_params);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static void
get_site_row_col_indices(tsk_size_t n_rows, const tsk_id_t *row_sites, tsk_size_t n_cols,
const tsk_id_t *col_sites, tsk_id_t *sites, tsk_size_t *n_sites, tsk_size_t *row_idx,
tsk_size_t *col_idx)
{
tsk_size_t r = 0, c = 0, s = 0;
// Iterate rows and columns until we've exhaused one of the lists
while ((r < n_rows) && (c < n_cols)) {
if (row_sites[r] < col_sites[c]) {
sites[s] = row_sites[r];
row_idx[r] = s;
s++;
r++;
} else if (col_sites[c] < row_sites[r]) {
sites[s] = col_sites[c];
col_idx[c] = s;
s++;
c++;
} else { // row == col
sites[s] = row_sites[r];
col_idx[c] = s;
row_idx[r] = s;
s++;
r++;
c++;
}
}
// If there are any items remaining in the other list, drain it
while (r < n_rows) {
sites[s] = row_sites[r];
row_idx[r] = s;
s++;
r++;
}
while (c < n_cols) {
sites[s] = col_sites[c];
col_idx[c] = s;
s++;
c++;
}
*n_sites = s;
}
static int
get_mutation_samples(const tsk_treeseq_t *ts, const tsk_id_t *sites, tsk_size_t n_sites,
tsk_size_t *num_alleles, tsk_bitset_t *allele_samples)
{
int ret = 0;
const tsk_flags_t *restrict flags = ts->tables->nodes.flags;
const tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
const tsk_size_t *restrict site_muts_len = ts->site_mutations_length;
tsk_site_t site;
tsk_tree_t tree;
tsk_bitset_t all_samples_bits, mut_samples;
tsk_size_t max_muts_len, site_offset, num_nodes, site_idx, s, m, n;
tsk_id_t node, *nodes = NULL;
void *tmp_nodes;
tsk_memset(&mut_samples, 0, sizeof(mut_samples));
tsk_memset(&all_samples_bits, 0, sizeof(all_samples_bits));
max_muts_len = 0;
for (s = 0; s < n_sites; s++) {
max_muts_len = TSK_MAX(site_muts_len[sites[s]], max_muts_len);
}
// Allocate a bit array of size max alleles for all sites
ret = tsk_bitset_init(&mut_samples, num_samples, max_muts_len);
if (ret != 0) {
goto out;
}
ret = tsk_bitset_init(&all_samples_bits, num_samples, 1);
if (ret != 0) {
goto out;
}
get_all_samples_bits(&all_samples_bits, num_samples);
ret = tsk_tree_init(&tree, ts, TSK_NO_SAMPLE_COUNTS);
if (ret != 0) {
goto out;
}
// For each mutation within each site, perform one preorder traversal to gather
// the samples under each mutation's node.
site_offset = 0;
for (site_idx = 0; site_idx < n_sites; site_idx++) {
tsk_treeseq_get_site(ts, sites[site_idx], &site);
ret = tsk_tree_seek(&tree, site.position, 0);
if (ret != 0) {
goto out;
}
tmp_nodes = tsk_realloc(nodes, tsk_tree_get_size_bound(&tree) * sizeof(*nodes));
if (tmp_nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
nodes = tmp_nodes;
tsk_bitset_union(allele_samples, site_offset, &all_samples_bits, 0);
// Zero out results before the start of each iteration
tsk_memset(mut_samples.data, 0,
mut_samples.row_len * max_muts_len * sizeof(tsk_bitset_val_t));
for (m = 0; m < site.mutations_length; m++) {
node = site.mutations[m].node;
ret = tsk_tree_preorder_from(&tree, node, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
for (n = 0; n < num_nodes; n++) {
node = nodes[n];
if (flags[node] & TSK_NODE_IS_SAMPLE) {
tsk_bitset_set_bit(
&mut_samples, m, (tsk_bitset_val_t) ts->sample_index_map[node]);
}
}
}
get_allele_samples(
&site, site_offset, &mut_samples, allele_samples, &(num_alleles[site_idx]));
site_offset += site.mutations_length + 1;
}
// if adding code below, check ret before continuing
out:
tsk_safe_free(nodes);
tsk_tree_free(&tree);
tsk_bitset_free(&mut_samples);
tsk_bitset_free(&all_samples_bits);
return ret == TSK_TREE_OK ? 0 : ret;
}
// Given the samples under each allele's node and the sample sets, get the samples under
// each allele's node for each sample set. We pack this data into a bitset
// (`allele_sample_sets`) that is size m x n, where m is (n_alleles * num_sample_sets)
// and n is the size of the largest sample set. In addition, we compute the number of
// samples contained in the intersection of each allele's samples and each sample set in
// an array (`allele_sample_sets`) of length (n_alleles * num_sample_sets).
static void
get_mutation_sample_sets(const tsk_bitset_t *allele_samples, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
const tsk_id_t *sample_index_map, tsk_bitset_t *allele_sample_sets,
tsk_size_t *allele_sample_set_counts)
{
tsk_bitset_val_t k, sample;
tsk_size_t i, j, ss_off;
for (i = 0; i < allele_samples->len; i++) {
ss_off = 0;
for (j = 0; j < num_sample_sets; j++) {
for (k = 0; k < sample_set_sizes[j]; k++) {
sample = (tsk_bitset_val_t) sample_index_map[sample_sets[k + ss_off]];
if (tsk_bitset_contains(allele_samples, i, sample)) {
tsk_bitset_set_bit(allele_sample_sets, j + i * num_sample_sets, k);
allele_sample_set_counts[j + i * num_sample_sets]++;
}
}
ss_off += sample_set_sizes[j];
}
}
}
static int
tsk_treeseq_two_site_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f,
sample_count_stat_params_t *f_params, norm_func_t *norm_f, tsk_size_t n_rows,
const tsk_id_t *row_sites, tsk_size_t n_cols, const tsk_id_t *col_sites,
tsk_flags_t options, double *result)
{
int ret = 0;
tsk_bitset_t allele_samples, allele_sample_sets;
bool polarised = options & TSK_STAT_POLARISED;
tsk_id_t *sites;
tsk_size_t i, j, n_sites, *row_idx, *col_idx;
double *result_row;
const tsk_size_t num_samples = self->num_samples;
tsk_size_t *num_alleles = NULL, *site_offsets = NULL, *allele_counts = NULL;
tsk_size_t result_row_len = n_cols * result_dim;
tsk_size_t max_ss_size = 0, max_alleles = 0, n_alleles = 0;
two_locus_work_t work;
tsk_memset(&work, 0, sizeof(work));
tsk_memset(&allele_samples, 0, sizeof(allele_samples));
tsk_memset(&allele_sample_sets, 0, sizeof(allele_sample_sets));
sites = tsk_malloc(self->tables->sites.num_rows * sizeof(*sites));
row_idx = tsk_malloc(self->tables->sites.num_rows * sizeof(*row_idx));
col_idx = tsk_malloc(self->tables->sites.num_rows * sizeof(*col_idx));
if (sites == NULL || row_idx == NULL || col_idx == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
get_site_row_col_indices(
n_rows, row_sites, n_cols, col_sites, sites, &n_sites, row_idx, col_idx);
// depends on n_sites
num_alleles = tsk_malloc(n_sites * sizeof(*num_alleles));
site_offsets = tsk_malloc(n_sites * sizeof(*site_offsets));
if (num_alleles == NULL || site_offsets == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (i = 0; i < n_sites; i++) {
site_offsets[i] = n_alleles * num_sample_sets;
n_alleles += self->site_mutations_length[sites[i]] + 1;
max_alleles = TSK_MAX(self->site_mutations_length[sites[i]], max_alleles);
}
max_alleles++; // add 1 for the ancestral allele
// depends on n_alleles
ret = tsk_bitset_init(&allele_samples, num_samples, n_alleles);
if (ret != 0) {
goto out;
}
for (i = 0; i < num_sample_sets; i++) {
max_ss_size = TSK_MAX(sample_set_sizes[i], max_ss_size);
}
// depend on n_alleles and max_ss_size
ret = tsk_bitset_init(&allele_sample_sets, max_ss_size, n_alleles * num_sample_sets);
if (ret != 0) {
goto out;
}
allele_counts = tsk_calloc(n_alleles * num_sample_sets, sizeof(*allele_counts));
if (allele_counts == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
// depends on max_ss_size and max_alleles
ret = two_locus_work_init(max_alleles, max_ss_size, result_dim, state_dim, &work);
if (ret != 0) {
goto out;
}
// we track the number of alleles to account for backmutations
ret = get_mutation_samples(self, sites, n_sites, num_alleles, &allele_samples);
if (ret != 0) {
goto out;
}
get_mutation_sample_sets(&allele_samples, num_sample_sets, sample_set_sizes,
sample_sets, self->sample_index_map, &allele_sample_sets, allele_counts);
// For each row/column pair, fill in the sample set in the result matrix.
for (i = 0; i < n_rows; i++) {
result_row = GET_2D_ROW(result, result_row_len, i);
for (j = 0; j < n_cols; j++) {
if (num_alleles[row_idx[i]] == 2 && num_alleles[col_idx[j]] == 2) {
// both sites are biallelic
ret = compute_general_two_site_stat_result(&allele_sample_sets,
allele_counts, site_offsets[row_idx[i]], site_offsets[col_idx[j]],
state_dim, result_dim, f, f_params, &work,
&(result_row[j * result_dim]));
} else {
// at least one site is multiallelic
ret = compute_general_normed_two_site_stat_result(&allele_sample_sets,
allele_counts, site_offsets[row_idx[i]], site_offsets[col_idx[j]],
num_alleles[row_idx[i]], num_alleles[col_idx[j]], state_dim,
result_dim, f, f_params, norm_f, polarised, &work,
&(result_row[j * result_dim]));
}
if (ret != 0) {
goto out;
}
}
}
out:
tsk_safe_free(sites);
tsk_safe_free(row_idx);
tsk_safe_free(col_idx);
tsk_safe_free(num_alleles);
tsk_safe_free(site_offsets);
tsk_safe_free(allele_counts);
two_locus_work_free(&work);
tsk_bitset_free(&allele_samples);
tsk_bitset_free(&allele_sample_sets);
return ret;
}
static int
sample_sets_to_bitset(const tsk_treeseq_t *self, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_sample_sets,
tsk_bitset_t *sample_sets_bits)
{
int ret;
tsk_size_t j, k, l;
tsk_id_t u, sample_index;
ret = tsk_bitset_init(sample_sets_bits, self->num_samples, num_sample_sets);
if (ret != 0) {
return ret;
}
j = 0;
for (k = 0; k < num_sample_sets; k++) {
for (l = 0; l < sample_set_sizes[k]; l++) {
u = sample_sets[j];
sample_index = self->sample_index_map[u];
if (tsk_bitset_contains(
sample_sets_bits, k, (tsk_bitset_val_t) sample_index)) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
tsk_bitset_set_bit(sample_sets_bits, k, (tsk_bitset_val_t) sample_index);
j++;
}
}
out:
return ret;
}
static int
check_sites(const tsk_id_t *sites, tsk_size_t num_sites, tsk_size_t num_site_rows)
{
int ret = 0;
tsk_size_t i;
if (num_sites == 0) {
return ret; // No need to verify sites if there aren't any
}
for (i = 0; i < num_sites - 1; i++) {
if (sites[i] < 0 || sites[i] >= (tsk_id_t) num_site_rows) {
ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);
goto out;
}
if (sites[i] > sites[i + 1]) {
ret = tsk_trace_error(TSK_ERR_STAT_UNSORTED_SITES);
goto out;
}
if (sites[i] == sites[i + 1]) {
ret = tsk_trace_error(TSK_ERR_STAT_DUPLICATE_SITES);
goto out;
}
}
// check the last value
if (sites[i] < 0 || sites[i] >= (tsk_id_t) num_site_rows) {
ret = tsk_trace_error(TSK_ERR_SITE_OUT_OF_BOUNDS);
goto out;
}
out:
return ret;
}
static int
check_positions(
const double *positions, tsk_size_t num_positions, double sequence_length)
{
int ret = 0;
tsk_size_t i;
if (num_positions == 0) {
return ret; // No need to verify positions if there aren't any
}
for (i = 0; i < num_positions - 1; i++) {
if (positions[i] < 0 || positions[i] >= sequence_length) {
ret = tsk_trace_error(TSK_ERR_POSITION_OUT_OF_BOUNDS);
goto out;
}
if (positions[i] > positions[i + 1]) {
ret = tsk_trace_error(TSK_ERR_STAT_UNSORTED_POSITIONS);
goto out;
}
if (positions[i] == positions[i + 1]) {
ret = tsk_trace_error(TSK_ERR_STAT_DUPLICATE_POSITIONS);
goto out;
}
}
// check bounds of last value
if (positions[i] < 0 || positions[i] >= sequence_length) {
ret = tsk_trace_error(TSK_ERR_POSITION_OUT_OF_BOUNDS);
goto out;
}
out:
return ret;
}
static int
positions_to_tree_indexes(const tsk_treeseq_t *ts, const double *positions,
tsk_size_t num_positions, tsk_id_t **tree_indexes)
{
int ret = 0;
tsk_id_t tree_index = 0;
tsk_size_t i, num_trees = ts->num_trees;
// This is tricky. If there are 0 positions, we calloc a size of 1
// we must calloc, because memset will have no effect when called with size 0
*tree_indexes = tsk_calloc(num_positions, sizeof(*tree_indexes));
if (tree_indexes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(*tree_indexes, TSK_NULL, num_positions * sizeof(**tree_indexes));
for (i = 0; i < num_positions; i++) {
while (ts->breakpoints[tree_index + 1] <= positions[i]) {
tree_index++;
}
(*tree_indexes)[i] = tree_index;
}
tsk_bug_assert(tree_index <= (tsk_id_t) (num_trees - 1));
out:
return ret;
}
static int
get_index_counts(
const tsk_id_t *indexes, tsk_size_t num_indexes, tsk_size_t **out_counts)
{
int ret = 0;
tsk_id_t index = indexes[0];
tsk_size_t count, i;
tsk_size_t *counts = tsk_calloc(
(tsk_size_t) (indexes[num_indexes ? num_indexes - 1 : 0] - indexes[0] + 1),
sizeof(*counts));
if (counts == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
count = 1;
for (i = 1; i < num_indexes; i++) {
if (indexes[i] == indexes[i - 1]) {
count++;
} else {
counts[index - indexes[0]] = count;
count = 1;
index = indexes[i];
}
}
counts[index - indexes[0]] = count;
*out_counts = counts;
out:
return ret;
}
typedef struct {
tsk_tree_t tree;
tsk_bitset_t *node_samples;
tsk_id_t *parent;
tsk_id_t *edges_out;
tsk_id_t *edges_in;
double *branch_len;
tsk_size_t n_edges_out;
tsk_size_t n_edges_in;
} iter_state;
static int
iter_state_init(iter_state *self, const tsk_treeseq_t *ts, tsk_size_t state_dim)
{
int ret = 0;
const tsk_size_t num_nodes = ts->tables->nodes.num_rows;
ret = tsk_tree_init(&self->tree, ts, TSK_NO_SAMPLE_COUNTS);
if (ret != 0) {
goto out;
}
self->node_samples = tsk_calloc(1, sizeof(*self->node_samples));
if (self->node_samples == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_bitset_init(self->node_samples, ts->num_samples, state_dim * num_nodes);
if (ret != 0) {
goto out;
}
self->parent = tsk_malloc(num_nodes * sizeof(*self->parent));
self->edges_out = tsk_malloc(num_nodes * sizeof(*self->edges_out));
self->edges_in = tsk_malloc(num_nodes * sizeof(*self->edges_in));
self->branch_len = tsk_calloc(num_nodes, sizeof(*self->branch_len));
if (self->parent == NULL || self->edges_out == NULL || self->edges_in == NULL
|| self->branch_len == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
static int
get_node_samples(const tsk_treeseq_t *ts, tsk_size_t state_dim,
const tsk_bitset_t *sample_sets, tsk_bitset_t *node_samples)
{
int ret = 0;
tsk_size_t n, k;
tsk_size_t num_nodes = ts->tables->nodes.num_rows;
tsk_bitset_val_t sample;
const tsk_id_t *restrict sample_index_map = ts->sample_index_map;
const tsk_flags_t *restrict flags = ts->tables->nodes.flags;
ret = tsk_bitset_init(node_samples, ts->num_samples, num_nodes * state_dim);
if (ret != 0) {
goto out;
}
for (k = 0; k < state_dim; k++) {
for (n = 0; n < num_nodes; n++) {
if (flags[n] & TSK_NODE_IS_SAMPLE) {
sample = (tsk_bitset_val_t) sample_index_map[n];
if (tsk_bitset_contains(sample_sets, k, sample)) {
tsk_bitset_set_bit(node_samples, (state_dim * n) + k, sample);
}
}
}
}
out:
return ret;
}
static void
iter_state_clear(iter_state *self, tsk_size_t state_dim, tsk_size_t num_nodes,
const tsk_bitset_t *node_samples)
{
self->n_edges_out = 0;
self->n_edges_in = 0;
tsk_tree_clear(&self->tree);
tsk_memset(self->parent, TSK_NULL, num_nodes * sizeof(*self->parent));
tsk_memset(self->edges_out, TSK_NULL, num_nodes * sizeof(*self->edges_out));
tsk_memset(self->edges_in, TSK_NULL, num_nodes * sizeof(*self->edges_in));
tsk_memset(self->branch_len, 0, num_nodes * sizeof(*self->branch_len));
tsk_memcpy(self->node_samples->data, node_samples->data,
node_samples->row_len * state_dim * num_nodes * sizeof(*node_samples->data));
}
static void
iter_state_free(iter_state *self)
{
tsk_tree_free(&self->tree);
tsk_bitset_free(self->node_samples);
tsk_safe_free(self->node_samples);
tsk_safe_free(self->parent);
tsk_safe_free(self->edges_out);
tsk_safe_free(self->edges_in);
tsk_safe_free(self->branch_len);
}
static int
advance_collect_edges(iter_state *s, tsk_id_t index)
{
int ret = 0;
tsk_id_t j, e;
tsk_size_t i;
double left, right;
tsk_tree_position_t pos;
tsk_tree_t *tree = &s->tree;
const double *restrict edge_left = tree->tree_sequence->tables->edges.left;
const double *restrict edge_right = tree->tree_sequence->tables->edges.right;
// Either we're seeking forward one step from some nonzero position in the tree, or
// from the beginning of the tree sequence.
if (tree->index != TSK_NULL || index == 0) {
ret = tsk_tree_next(tree);
if (ret < 0) {
goto out;
}
pos = tree->tree_pos;
i = 0;
for (j = pos.out.start; j != pos.out.stop; j++) {
s->edges_out[i] = pos.out.order[j];
i++;
}
s->n_edges_out = i;
i = 0;
for (j = pos.in.start; j != pos.in.stop; j++) {
s->edges_in[i] = pos.in.order[j];
i++;
}
s->n_edges_in = i;
} else {
// Seek from an arbitrary nonzero position from an uninitialized tree.
tsk_bug_assert(tree->index == -1);
ret = tsk_tree_seek_index(tree, index, 0);
if (ret < 0) {
goto out;
}
pos = tree->tree_pos;
i = 0;
if (pos.direction == TSK_DIR_FORWARD) {
left = pos.interval.left;
for (j = pos.in.start; j != pos.in.stop; j++) {
e = pos.in.order[j];
if (edge_left[e] <= left && left < edge_right[e]) {
s->edges_in[i] = pos.in.order[j];
i++;
}
}
} else {
right = pos.interval.right;
for (j = pos.in.start; j != pos.in.stop; j--) {
e = pos.in.order[j];
if (edge_right[e] >= right && right > edge_left[e]) {
s->edges_in[i] = pos.in.order[j];
i++;
}
}
}
s->n_edges_out = 0;
s->n_edges_in = i;
}
ret = 0;
out:
return ret;
}
static int
compute_two_tree_branch_state_update(const tsk_treeseq_t *ts, tsk_id_t c,
const iter_state *A_state, const iter_state *B_state, tsk_size_t state_dim,
tsk_size_t result_dim, int sign, general_stat_func_t *f,
sample_count_stat_params_t *f_params, two_locus_work_t *restrict work,
double *result)
{
int ret = 0;
double a_len, b_len;
double *restrict B_branch_len = B_state->branch_len;
double *weights_row;
tsk_size_t n, k, a_row, b_row;
const double *restrict A_branch_len = A_state->branch_len;
const tsk_bitset_t *restrict A_state_samples = A_state->node_samples;
const tsk_bitset_t *restrict B_state_samples = B_state->node_samples;
tsk_size_t num_nodes = ts->tables->nodes.num_rows;
double *weights = work->weights;
double *result_tmp = work->result_tmp;
tsk_bitset_t AB_samples = work->AB_samples;
b_len = B_branch_len[c] * sign;
if (b_len == 0) {
return ret;
}
for (n = 0; n < num_nodes; n++) {
a_len = A_branch_len[n];
if (a_len == 0) {
continue;
}
for (k = 0; k < state_dim; k++) {
a_row = (state_dim * n) + k;
b_row = (state_dim * (tsk_size_t) c) + k;
weights_row = GET_2D_ROW(weights, 3, k);
tsk_bitset_intersect(
A_state_samples, a_row, B_state_samples, b_row, &AB_samples);
weights_row[0] = (double) tsk_bitset_count(&AB_samples, 0);
weights_row[1]
= (double) tsk_bitset_count(A_state_samples, a_row) - weights_row[0];
weights_row[2]
= (double) tsk_bitset_count(B_state_samples, b_row) - weights_row[0];
}
ret = f(state_dim, weights, result_dim, result_tmp, f_params);
if (ret != 0) {
goto out;
}
for (k = 0; k < result_dim; k++) {
result[k] += result_tmp[k] * a_len * b_len;
}
}
out:
return ret;
}
static int
compute_two_tree_branch_stat(const tsk_treeseq_t *ts, const iter_state *l_state,
iter_state *r_state, general_stat_func_t *f, sample_count_stat_params_t *f_params,
tsk_size_t result_dim, tsk_size_t state_dim, double *result)
{
int ret = 0;
tsk_id_t e, c, ec, p, *updated_nodes = NULL;
tsk_size_t j, k, n_updates;
const double *restrict time = ts->tables->nodes.time;
const tsk_id_t *restrict edges_child = ts->tables->edges.child;
const tsk_id_t *restrict edges_parent = ts->tables->edges.parent;
const tsk_size_t num_nodes = ts->tables->nodes.num_rows;
tsk_bitset_t updates, *r_samples = r_state->node_samples;
two_locus_work_t work;
tsk_memset(&work, 0, sizeof(work));
tsk_memset(&updates, 0, sizeof(updates));
// only two alleles are possible for branch stats
ret = two_locus_work_init(2, ts->num_samples, result_dim, state_dim, &work);
if (ret != 0) {
goto out;
}
ret = tsk_bitset_init(&updates, num_nodes, 1);
if (ret != 0) {
goto out;
}
updated_nodes = tsk_calloc(num_nodes, sizeof(*updated_nodes));
if (updated_nodes == NULL) {
ret = TSK_ERR_NO_MEMORY;
goto out;
}
// Identify modified nodes both added and removed
for (j = 0; j < r_state->n_edges_out + r_state->n_edges_in; j++) {
e = j < r_state->n_edges_out ? r_state->edges_out[j]
: r_state->edges_in[j - r_state->n_edges_out];
p = edges_parent[e];
c = edges_child[e];
// Identify affected nodes above child
while (p != TSK_NULL) {
tsk_bitset_set_bit(&updates, 0, (tsk_bitset_val_t) c);
c = p;
p = r_state->parent[p];
}
}
// Subtract the whole contribution from the child node
tsk_bitset_get_items(&updates, 0, updated_nodes, &n_updates);
while (n_updates != 0) {
n_updates--;
c = updated_nodes[n_updates];
compute_two_tree_branch_state_update(ts, c, l_state, r_state, state_dim,
result_dim, -1, f, f_params, &work, result);
}
// Remove samples under nodes from removed edges to parent nodes
for (j = 0; j < r_state->n_edges_out; j++) {
e = r_state->edges_out[j];
p = edges_parent[e];
ec = edges_child[e]; // edge child
while (p != TSK_NULL) {
for (k = 0; k < state_dim; k++) {
tsk_bitset_subtract(r_samples, (state_dim * (tsk_size_t) p) + k,
r_samples, (state_dim * (tsk_size_t) ec) + k);
}
p = r_state->parent[p];
}
r_state->branch_len[ec] = 0;
r_state->parent[ec] = TSK_NULL;
}
// Add samples under nodes from added edges
for (j = 0; j < r_state->n_edges_in; j++) {
e = r_state->edges_in[j];
p = edges_parent[e];
ec = c = edges_child[e];
r_state->branch_len[c] = time[p] - time[c];
r_state->parent[c] = p;
while (p != TSK_NULL) {
tsk_bitset_set_bit(&updates, 0, (tsk_bitset_val_t) c);
for (k = 0; k < state_dim; k++) {
tsk_bitset_union(r_samples, (state_dim * (tsk_size_t) p) + k, r_samples,
(state_dim * (tsk_size_t) ec) + k);
}
c = p;
p = r_state->parent[p];
}
}
// Update all affected child nodes (fully subtracted, deferred from addition)
n_updates = 0;
tsk_bitset_get_items(&updates, 0, updated_nodes, &n_updates);
while (n_updates != 0) {
n_updates--;
c = updated_nodes[n_updates];
compute_two_tree_branch_state_update(ts, c, l_state, r_state, state_dim,
result_dim, +1, f, f_params, &work, result);
}
out:
tsk_safe_free(updated_nodes);
two_locus_work_free(&work);
tsk_bitset_free(&updates);
return ret;
}
static int
tsk_treeseq_two_branch_count_stat(const tsk_treeseq_t *self, tsk_size_t state_dim,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t result_dim, general_stat_func_t *f,
sample_count_stat_params_t *f_params, norm_func_t *TSK_UNUSED(norm_f),
tsk_size_t n_rows, const double *row_positions, tsk_size_t n_cols,
const double *col_positions, tsk_flags_t TSK_UNUSED(options), double *result)
{
int ret = 0;
int r, c;
tsk_id_t *row_indexes = NULL, *col_indexes = NULL;
tsk_size_t i, j, k, row, col, *row_repeats = NULL, *col_repeats = NULL;
tsk_bitset_t node_samples, sample_sets_bits;
iter_state l_state, r_state;
double *result_tmp = NULL, *result_row;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
tsk_memset(&sample_sets_bits, 0, sizeof(sample_sets_bits));
tsk_memset(&node_samples, 0, sizeof(node_samples));
tsk_memset(&l_state, 0, sizeof(l_state));
tsk_memset(&r_state, 0, sizeof(r_state));
result_tmp = tsk_malloc(result_dim * sizeof(*result_tmp));
if (result_tmp == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = iter_state_init(&l_state, self, state_dim);
if (ret != 0) {
goto out;
}
ret = iter_state_init(&r_state, self, state_dim);
if (ret != 0) {
goto out;
}
ret = sample_sets_to_bitset(
self, sample_set_sizes, sample_sets, num_sample_sets, &sample_sets_bits);
if (ret != 0) {
goto out;
}
ret = positions_to_tree_indexes(self, row_positions, n_rows, &row_indexes);
if (ret != 0) {
goto out;
}
ret = positions_to_tree_indexes(self, col_positions, n_cols, &col_indexes);
if (ret != 0) {
goto out;
}
ret = get_index_counts(row_indexes, n_rows, &row_repeats);
if (ret != 0) {
goto out;
}
ret = get_index_counts(col_indexes, n_cols, &col_repeats);
if (ret != 0) {
goto out;
}
ret = get_node_samples(self, state_dim, &sample_sets_bits, &node_samples);
if (ret != 0) {
goto out;
}
iter_state_clear(&l_state, state_dim, num_nodes, &node_samples);
row = 0;
for (r = 0; r < (row_indexes[n_rows ? n_rows - 1U : 0] - row_indexes[0] + 1); r++) {
tsk_memset(result_tmp, 0, result_dim * sizeof(*result_tmp));
iter_state_clear(&r_state, state_dim, num_nodes, &node_samples);
ret = advance_collect_edges(&l_state, (tsk_id_t) r + row_indexes[0]);
if (ret != 0) {
goto out;
}
result_row = GET_2D_ROW(result, result_dim * n_cols, row);
ret = compute_two_tree_branch_stat(
self, &r_state, &l_state, f, f_params, result_dim, state_dim, result_tmp);
if (ret != 0) {
goto out;
}
col = 0;
for (c = 0; c < (col_indexes[n_cols ? n_cols - 1 : 0] - col_indexes[0] + 1);
c++) {
ret = advance_collect_edges(&r_state, (tsk_id_t) c + col_indexes[0]);
if (ret != 0) {
goto out;
}
ret = compute_two_tree_branch_stat(self, &l_state, &r_state, f, f_params,
result_dim, state_dim, result_tmp);
if (ret != 0) {
goto out;
}
for (i = 0; i < row_repeats[r]; i++) {
for (j = 0; j < col_repeats[c]; j++) {
result_row = GET_2D_ROW(result, result_dim * n_cols, row + i);
for (k = 0; k < result_dim; k++) {
result_row[col + (j * result_dim) + k] = result_tmp[k];
}
}
}
col += (col_repeats[c] * result_dim);
}
row += row_repeats[r];
}
out:
tsk_safe_free(result_tmp);
tsk_safe_free(row_indexes);
tsk_safe_free(col_indexes);
tsk_safe_free(row_repeats);
tsk_safe_free(col_repeats);
iter_state_free(&l_state);
iter_state_free(&r_state);
tsk_bitset_free(&node_samples);
tsk_bitset_free(&sample_sets_bits);
return ret;
}
static int
check_sample_set_dups(tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, const tsk_id_t *restrict sample_index_map,
tsk_size_t num_samples)
{
int ret;
tsk_size_t j, k, l;
tsk_id_t u, sample_index;
tsk_bitset_t tmp;
tsk_memset(&tmp, 0, sizeof(tmp));
ret = tsk_bitset_init(&tmp, num_samples, 1);
if (ret != 0) {
goto out;
}
j = 0;
for (k = 0; k < num_sample_sets; k++) {
tsk_memset(tmp.data, 0, sizeof(*tmp.data) * tmp.row_len);
for (l = 0; l < sample_set_sizes[k]; l++) {
u = sample_sets[j];
sample_index = sample_index_map[u];
if (tsk_bitset_contains(&tmp, 0, (tsk_bitset_val_t) sample_index)) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
tsk_bitset_set_bit(&tmp, 0, (tsk_bitset_val_t) sample_index);
j++;
}
}
out:
tsk_bitset_free(&tmp);
return ret;
}
int
tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t result_dim, const tsk_id_t *set_indexes, general_stat_func_t *f,
norm_func_t *norm_f, tsk_size_t out_rows, const tsk_id_t *row_sites,
const double *row_positions, tsk_size_t out_cols, const tsk_id_t *col_sites,
const double *col_positions, tsk_flags_t options, double *result)
{
// TODO: generalize this function if we ever decide to do weighted two_locus stats.
// We only implement count stats and therefore we don't handle weights.
int ret = 0;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
tsk_size_t state_dim = num_sample_sets;
sample_count_stat_params_t f_params = { .sample_sets = sample_sets,
.num_sample_sets = num_sample_sets,
.sample_set_sizes = sample_set_sizes,
.set_indexes = set_indexes };
// We do not support two-locus node stats
if (!!(options & TSK_STAT_NODE)) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
// If no mode is specified, we default to site mode
if (!(stat_site || stat_branch)) {
stat_site = true;
}
// It's an error to specify more than one mode
if (stat_site + stat_branch > 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
goto out;
}
ret = tsk_treeseq_check_sample_sets(
self, num_sample_sets, sample_set_sizes, sample_sets);
if (ret != 0) {
goto out;
}
if (result_dim < 1) {
ret = tsk_trace_error(TSK_ERR_BAD_RESULT_DIMS);
goto out;
}
if (stat_site) {
ret = check_sites(row_sites, out_rows, self->tables->sites.num_rows);
if (ret != 0) {
goto out;
}
ret = check_sites(col_sites, out_cols, self->tables->sites.num_rows);
if (ret != 0) {
goto out;
}
ret = check_sample_set_dups(num_sample_sets, sample_set_sizes, sample_sets,
self->sample_index_map, self->num_samples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_two_site_count_stat(self, state_dim, num_sample_sets,
sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows,
row_sites, out_cols, col_sites, options, result);
} else if (stat_branch) {
ret = check_positions(
row_positions, out_rows, tsk_treeseq_get_sequence_length(self));
if (ret != 0) {
goto out;
}
ret = check_positions(
col_positions, out_cols, tsk_treeseq_get_sequence_length(self));
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_two_branch_count_stat(self, state_dim, num_sample_sets,
sample_set_sizes, sample_sets, result_dim, f, &f_params, norm_f, out_rows,
row_positions, out_cols, col_positions, options, result);
}
out:
return ret;
}
/***********************************
* Allele frequency spectrum
***********************************/
static inline void
fold(tsk_size_t *restrict coordinate, const tsk_size_t *restrict dims,
tsk_size_t num_dims)
{
tsk_size_t k;
double n = 0;
int s = 0;
for (k = 0; k < num_dims; k++) {
tsk_bug_assert(coordinate[k] < dims[k]);
n += (double) dims[k] - 1;
s += (int) coordinate[k];
}
n /= 2;
k = num_dims;
while (s == n && k > 0) {
k--;
n -= ((double) (dims[k] - 1)) / 2;
s -= (int) coordinate[k];
}
if (s > n) {
for (k = 0; k < num_dims; k++) {
s = (int) (dims[k] - 1 - coordinate[k]);
tsk_bug_assert(s >= 0);
coordinate[k] = (tsk_size_t) s;
}
}
}
static int
tsk_treeseq_update_site_afs(const tsk_treeseq_t *self, const tsk_site_t *site,
const double *total_counts, const double *counts, tsk_size_t num_sample_sets,
tsk_size_t window_index, tsk_size_t *result_dims, tsk_flags_t options,
double *result)
{
int ret = 0;
tsk_size_t afs_size;
tsk_size_t k, allele, num_alleles, all_samples;
double increment, *afs, *allele_counts, *allele_count;
tsk_size_t *coordinate = tsk_malloc(num_sample_sets * sizeof(*coordinate));
bool polarised = !!(options & TSK_STAT_POLARISED);
const tsk_size_t K = num_sample_sets + 1;
if (coordinate == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = get_allele_weights(
site, counts, K, total_counts, &num_alleles, &allele_counts);
if (ret != 0) {
goto out;
}
afs_size = result_dims[num_sample_sets];
afs = result + afs_size * window_index;
increment = polarised ? 1 : 0.5;
/* Sum over the allele weights. Skip the ancestral state if polarised. */
for (allele = polarised ? 1 : 0; allele < num_alleles; allele++) {
allele_count = GET_2D_ROW(allele_counts, K, allele);
all_samples = (tsk_size_t) allele_count[num_sample_sets];
if (all_samples > 0 && all_samples < self->num_samples) {
for (k = 0; k < num_sample_sets; k++) {
coordinate[k] = (tsk_size_t) allele_count[k];
}
if (!polarised) {
fold(coordinate, result_dims, num_sample_sets);
}
increment_nd_array_value(
afs, num_sample_sets, result_dims, coordinate, increment);
}
}
out:
tsk_safe_free(coordinate);
tsk_safe_free(allele_counts);
return ret;
}
static int
tsk_treeseq_site_allele_frequency_spectrum(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes, double *counts,
tsk_size_t num_windows, const double *windows, tsk_size_t *result_dims,
tsk_flags_t options, double *result)
{
int ret = 0;
tsk_id_t u, v;
tsk_size_t tree_site, tree_index, window_index;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double sequence_length = self->tables->sequence_length;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
tsk_site_t *site;
tsk_id_t tj, tk, h;
tsk_size_t j;
const tsk_size_t K = num_sample_sets + 1;
double t_left, t_right;
double *total_counts = tsk_malloc((1 + num_sample_sets) * sizeof(*total_counts));
if (parent == NULL || total_counts == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
for (j = 0; j < num_sample_sets; j++) {
total_counts[j] = (double) sample_set_sizes[j];
}
total_counts[num_sample_sets] = (double) self->num_samples;
/* Iterate over the trees */
tj = 0;
tk = 0;
t_left = 0;
tree_index = 0;
window_index = 0;
while (tj < num_edges || t_left < sequence_length) {
while (tk < num_edges && edge_right[O[tk]] == t_left) {
h = O[tk];
tk++;
u = edge_child[h];
v = edge_parent[h];
while (v != TSK_NULL) {
update_state(counts, K, v, u, -1);
v = parent[v];
}
parent[u] = TSK_NULL;
}
while (tj < num_edges && edge_left[I[tj]] == t_left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
while (v != TSK_NULL) {
update_state(counts, K, v, u, +1);
v = parent[v];
}
}
t_right = sequence_length;
if (tj < num_edges) {
t_right = TSK_MIN(t_right, edge_left[I[tj]]);
}
if (tk < num_edges) {
t_right = TSK_MIN(t_right, edge_right[O[tk]]);
}
/* Update the sites */
for (tree_site = 0; tree_site < self->tree_sites_length[tree_index];
tree_site++) {
site = self->tree_sites[tree_index] + tree_site;
while (windows[window_index + 1] <= site->position) {
window_index++;
tsk_bug_assert(window_index < num_windows);
}
ret = tsk_treeseq_update_site_afs(self, site, total_counts, counts,
num_sample_sets, window_index, result_dims, options, result);
if (ret != 0) {
goto out;
}
tsk_bug_assert(windows[window_index] <= site->position);
tsk_bug_assert(site->position < windows[window_index + 1]);
}
tree_index++;
t_left = t_right;
}
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
tsk_safe_free(total_counts);
return ret;
}
static void
tsk_treeseq_update_branch_afs(const tsk_treeseq_t *self, tsk_id_t u, double right,
double *restrict last_update, const double *restrict time, tsk_id_t *restrict parent,
tsk_size_t *restrict coordinate, const double *counts, tsk_size_t num_sample_sets,
tsk_size_t num_time_windows, const double *time_windows, tsk_size_t window_index,
const tsk_size_t *result_dims, tsk_flags_t options, double *result)
{
tsk_size_t afs_size;
tsk_size_t k;
tsk_size_t time_window_index;
double *afs;
bool polarised = !!(options & TSK_STAT_POLARISED);
const double *count_row = GET_2D_ROW(counts, num_sample_sets + 1, u);
double x = 0;
double t_u, t_v;
double tw_branch_length = 0;
const tsk_size_t all_samples = (tsk_size_t) count_row[num_sample_sets];
if (parent[u] != TSK_NULL) {
t_u = time[u];
t_v = time[parent[u]];
if (0 < all_samples && all_samples < self->num_samples) {
time_window_index = 0;
afs_size = result_dims[num_sample_sets];
while (time_window_index < num_time_windows
&& time_windows[time_window_index] < t_v) {
afs = result
+ afs_size * (window_index * num_time_windows + time_window_index);
for (k = 0; k < num_sample_sets; k++) {
coordinate[k] = (tsk_size_t) count_row[k];
}
if (!polarised) {
fold(coordinate, result_dims, num_sample_sets);
}
tw_branch_length
= TSK_MAX(0.0, TSK_MIN(time_windows[time_window_index + 1], t_v)
- TSK_MAX(time_windows[time_window_index], t_u));
x = (right - last_update[u]) * tw_branch_length;
increment_nd_array_value(
afs, num_sample_sets, result_dims, coordinate, x);
time_window_index++;
}
}
}
last_update[u] = right;
}
static int
tsk_treeseq_branch_allele_frequency_spectrum(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, double *counts, tsk_size_t num_windows,
const double *windows, tsk_size_t num_time_windows, const double *time_windows,
const tsk_size_t *result_dims, tsk_flags_t options, double *result)
{
int ret = 0;
tsk_id_t u, v;
tsk_size_t window_index;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows;
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order;
const tsk_id_t *restrict O = self->tables->indexes.edge_removal_order;
const double *restrict edge_left = self->tables->edges.left;
const double *restrict edge_right = self->tables->edges.right;
const tsk_id_t *restrict edge_parent = self->tables->edges.parent;
const tsk_id_t *restrict edge_child = self->tables->edges.child;
const double *restrict node_time = self->tables->nodes.time;
const double sequence_length = self->tables->sequence_length;
tsk_id_t *restrict parent = tsk_malloc(num_nodes * sizeof(*parent));
double *restrict last_update = tsk_calloc(num_nodes, sizeof(*last_update));
double *restrict branch_length = tsk_calloc(num_nodes, sizeof(*branch_length));
tsk_size_t *restrict coordinate = tsk_malloc(num_sample_sets * sizeof(*coordinate));
tsk_id_t tj, tk, h;
double t_left, t_right, w_right;
const tsk_size_t K = num_sample_sets + 1;
if (self->time_uncalibrated && !(options & TSK_STAT_ALLOW_TIME_UNCALIBRATED)) {
ret = tsk_trace_error(TSK_ERR_TIME_UNCALIBRATED);
goto out;
}
if (parent == NULL || last_update == NULL || coordinate == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(parent, 0xff, num_nodes * sizeof(*parent));
/* Iterate over the trees */
tj = 0;
tk = 0;
t_left = 0;
window_index = 0;
while (tj < num_edges || t_left < sequence_length) {
tsk_bug_assert(window_index < num_windows);
while (tk < num_edges && edge_right[O[tk]] == t_left) {
h = O[tk];
tk++;
u = edge_child[h];
v = edge_parent[h];
tsk_treeseq_update_branch_afs(self, u, t_left, last_update, node_time,
parent, coordinate, counts, num_sample_sets, num_time_windows,
time_windows, window_index, result_dims, options, result);
while (v != TSK_NULL) {
tsk_treeseq_update_branch_afs(self, v, t_left, last_update, node_time,
parent, coordinate, counts, num_sample_sets, num_time_windows,
time_windows, window_index, result_dims, options, result);
update_state(counts, K, v, u, -1);
v = parent[v];
}
parent[u] = TSK_NULL;
branch_length[u] = 0;
}
while (tj < num_edges && edge_left[I[tj]] == t_left) {
h = I[tj];
tj++;
u = edge_child[h];
v = edge_parent[h];
parent[u] = v;
branch_length[u] = node_time[v] - node_time[u];
while (v != TSK_NULL) {
tsk_treeseq_update_branch_afs(self, v, t_left, last_update, node_time,
parent, coordinate, counts, num_sample_sets, num_time_windows,
time_windows, window_index, result_dims, options, result);
update_state(counts, K, v, u, +1);
v = parent[v];
}
}
t_right = sequence_length;
if (tj < num_edges) {
t_right = TSK_MIN(t_right, edge_left[I[tj]]);
}
if (tk < num_edges) {
t_right = TSK_MIN(t_right, edge_right[O[tk]]);
}
while (window_index < num_windows && windows[window_index + 1] <= t_right) {
w_right = windows[window_index + 1];
/* Flush the contributions of all nodes to the current window */
for (u = 0; u < (tsk_id_t) num_nodes; u++) {
tsk_bug_assert(last_update[u] < w_right);
tsk_treeseq_update_branch_afs(self, u, w_right, last_update, node_time,
parent, coordinate, counts, num_sample_sets, num_time_windows,
time_windows, window_index, result_dims, options, result);
}
window_index++;
}
t_left = t_right;
}
out:
/* Can't use msp_safe_free here because of restrict */
if (parent != NULL) {
free(parent);
}
if (last_update != NULL) {
free(last_update);
}
if (branch_length != NULL) {
free(branch_length);
}
if (coordinate != NULL) {
free(coordinate);
}
return ret;
}
int
tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,
tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,
double *result)
{
int ret = 0;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool stat_node = !!(options & TSK_STAT_NODE);
const double default_windows[] = { 0, self->tables->sequence_length };
const double default_time_windows[] = { 0, INFINITY };
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_size_t K = num_sample_sets + 1;
tsk_size_t j, k, l, afs_size;
tsk_id_t u;
tsk_size_t *result_dims = NULL;
/* These counts should really be ints, but we use doubles so that we can
* reuse code from the general_stats code paths. */
double *counts = NULL;
double *count_row;
if (stat_node) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
/* If no mode is specified, we default to site mode */
if (!(stat_site || stat_branch)) {
stat_site = true;
}
/* It's an error to specify more than one mode */
if (stat_site + stat_branch > 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
goto out;
}
if (windows == NULL) {
num_windows = 1;
windows = default_windows;
} else {
ret = tsk_treeseq_check_windows(
self, num_windows, windows, TSK_REQUIRE_FULL_SPAN);
if (ret != 0) {
goto out;
}
}
if (time_windows == NULL) {
num_time_windows = 1;
time_windows = default_time_windows;
} else {
ret = tsk_treeseq_check_time_windows(num_time_windows, time_windows);
if (ret != 0) {
goto out;
}
// Site mode does not support time windows
if (stat_site && !(time_windows[0] == 0.0 && isinf((float) time_windows[1]))) {
ret = TSK_ERR_UNSUPPORTED_STAT_MODE;
goto out;
}
}
ret = tsk_treeseq_check_sample_sets(
self, num_sample_sets, sample_set_sizes, sample_sets);
if (ret != 0) {
goto out;
}
/* the last element of result_dims stores the total size of the dimensions */
result_dims = tsk_malloc((num_sample_sets + 1) * sizeof(*result_dims));
counts = tsk_calloc(num_nodes * K, sizeof(*counts));
if (counts == NULL || result_dims == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
afs_size = 1;
j = 0;
for (k = 0; k < num_sample_sets; k++) {
result_dims[k] = 1 + sample_set_sizes[k];
afs_size *= result_dims[k];
for (l = 0; l < sample_set_sizes[k]; l++) {
u = sample_sets[j];
count_row = GET_2D_ROW(counts, K, u);
if (count_row[k] != 0) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
count_row[k] = 1;
j++;
}
}
for (j = 0; j < self->num_samples; j++) {
u = self->samples[j];
count_row = GET_2D_ROW(counts, K, u);
count_row[num_sample_sets] = 1;
}
result_dims[num_sample_sets] = (tsk_size_t) afs_size;
tsk_memset(result, 0, num_windows * num_time_windows * afs_size * sizeof(*result));
if (stat_site) {
ret = tsk_treeseq_site_allele_frequency_spectrum(self, num_sample_sets,
sample_set_sizes, counts, num_windows, windows, result_dims, options,
result);
} else {
ret = tsk_treeseq_branch_allele_frequency_spectrum(self, num_sample_sets, counts,
num_windows, windows, num_time_windows, time_windows, result_dims, options,
result);
}
if (options & TSK_STAT_SPAN_NORMALISE) {
span_normalise(num_windows, windows, afs_size * num_time_windows, result);
}
out:
tsk_safe_free(counts);
tsk_safe_free(result_dims);
return ret;
}
/***********************************
* One way stats
***********************************/
static int
diversity_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double n;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
result[j] = x[j] * (n - x[j]) / (n * (n - 1));
}
return 0;
}
int
tsk_treeseq_diversity(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)
{
return tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, diversity_summary_func, num_windows, windows,
options, result);
}
static int
trait_covariance_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
weight_stat_params_t args = *(weight_stat_params_t *) params;
const double n = (double) args.num_samples;
const double *x = state;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
result[j] = (x[j] * x[j]) / (2 * (n - 1) * (n - 1));
}
return 0;
}
int
tsk_treeseq_trait_covariance(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result)
{
tsk_size_t num_samples = self->num_samples;
tsk_size_t j, k;
int ret;
const double *row;
double *new_row;
double *means = tsk_calloc(num_weights, sizeof(double));
double *new_weights = tsk_malloc((num_weights + 1) * num_samples * sizeof(double));
weight_stat_params_t args = { num_samples = self->num_samples };
if (new_weights == NULL || means == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (num_weights == 0) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);
goto out;
}
// center weights
for (j = 0; j < num_samples; j++) {
row = GET_2D_ROW(weights, num_weights, j);
for (k = 0; k < num_weights; k++) {
means[k] += row[k];
}
}
for (k = 0; k < num_weights; k++) {
means[k] /= (double) num_samples;
}
for (j = 0; j < num_samples; j++) {
row = GET_2D_ROW(weights, num_weights, j);
new_row = GET_2D_ROW(new_weights, num_weights, j);
for (k = 0; k < num_weights; k++) {
new_row[k] = row[k] - means[k];
}
}
ret = tsk_treeseq_general_stat(self, num_weights, new_weights, num_weights,
trait_covariance_summary_func, &args, num_windows, windows, options, result);
out:
tsk_safe_free(means);
tsk_safe_free(new_weights);
return ret;
}
static int
trait_correlation_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
weight_stat_params_t args = *(weight_stat_params_t *) params;
const double n = (double) args.num_samples;
const double *x = state;
double p;
tsk_size_t j;
p = x[state_dim - 1];
for (j = 0; j < state_dim - 1; j++) {
if ((p > 0.0) && (p < 1.0)) {
result[j] = (x[j] * x[j]) / (2 * (p * (1 - p)) * n * (n - 1));
} else {
result[j] = 0.0;
}
}
return 0;
}
int
tsk_treeseq_trait_correlation(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result)
{
tsk_size_t num_samples = self->num_samples;
tsk_size_t j, k;
int ret;
double *means = tsk_calloc(num_weights, sizeof(double));
double *meansqs = tsk_calloc(num_weights, sizeof(double));
double *sds = tsk_calloc(num_weights, sizeof(double));
const double *row;
double *new_row;
double *new_weights = tsk_malloc((num_weights + 1) * num_samples * sizeof(double));
weight_stat_params_t args = { num_samples = self->num_samples };
if (new_weights == NULL || means == NULL || meansqs == NULL || sds == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (num_weights < 1) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);
goto out;
}
// center and scale weights
for (j = 0; j < num_samples; j++) {
row = GET_2D_ROW(weights, num_weights, j);
for (k = 0; k < num_weights; k++) {
means[k] += row[k];
meansqs[k] += row[k] * row[k];
}
}
for (k = 0; k < num_weights; k++) {
means[k] /= (double) num_samples;
meansqs[k] -= means[k] * means[k] * (double) num_samples;
meansqs[k] /= (double) (num_samples - 1);
sds[k] = sqrt(meansqs[k]);
}
for (j = 0; j < num_samples; j++) {
row = GET_2D_ROW(weights, num_weights, j);
new_row = GET_2D_ROW(new_weights, num_weights + 1, j);
for (k = 0; k < num_weights; k++) {
new_row[k] = (row[k] - means[k]) / sds[k];
}
// set final row to 1/n to compute frequency
new_row[num_weights] = 1.0 / (double) num_samples;
}
ret = tsk_treeseq_general_stat(self, num_weights + 1, new_weights, num_weights,
trait_correlation_summary_func, &args, num_windows, windows, options, result);
out:
tsk_safe_free(means);
tsk_safe_free(meansqs);
tsk_safe_free(sds);
tsk_safe_free(new_weights);
return ret;
}
static int
trait_linear_model_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t result_dim, double *result, void *params)
{
covariates_stat_params_t args = *(covariates_stat_params_t *) params;
const double num_samples = (double) args.num_samples;
const tsk_size_t k = args.num_covariates;
const double *V = args.V;
;
const double *x = state;
const double *v;
double m, a, denom, z;
tsk_size_t i, j;
// x[0], ..., x[result_dim - 1] contains the traits, W
// x[result_dim], ..., x[state_dim - 2] contains the covariates, Z
// x[state_dim - 1] has the number of samples below the node
m = x[state_dim - 1];
for (i = 0; i < result_dim; i++) {
if ((m > 0.0) && (m < num_samples)) {
v = GET_2D_ROW(V, k, i);
a = x[i];
denom = m;
for (j = 0; j < k; j++) {
z = x[result_dim + j];
a -= z * v[j];
denom -= z * z;
}
// denom is the length of projection of the trait onto the subspace
// spanned by the covariates, so if it is zero then the system is
// singular and the solution is nonunique. This numerical tolerance
// could be smaller without hitting floating-point error, but being
// a tiny bit conservative about when the trait is almost in the
// span of the covariates is probably good.
if (denom < 1e-8) {
result[i] = 0.0;
} else {
result[i] = (a * a) / (2 * denom * denom);
}
} else {
result[i] = 0.0;
}
}
return 0;
}
int
tsk_treeseq_trait_linear_model(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_covariates, const double *covariates,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)
{
tsk_size_t num_samples = self->num_samples;
tsk_size_t i, j, k;
int ret;
const double *w, *z;
double *v, *new_row;
double *V = tsk_calloc(num_covariates * num_weights, sizeof(double));
double *new_weights
= tsk_malloc((num_weights + num_covariates + 1) * num_samples * sizeof(double));
covariates_stat_params_t args
= { .num_samples = self->num_samples, .num_covariates = num_covariates, .V = V };
// We assume that the covariates have been *already standardised*,
// so that (a) 1 is in the span of the columns, and
// (b) their crossproduct is the identity.
// We could do this instead here with gsl linalg.
if (new_weights == NULL || V == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (num_weights < 1) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);
goto out;
}
// V = weights^T (matrix mult) covariates
for (k = 0; k < num_samples; k++) {
w = GET_2D_ROW(weights, num_weights, k);
z = GET_2D_ROW(covariates, num_covariates, k);
for (i = 0; i < num_weights; i++) {
v = GET_2D_ROW(V, num_covariates, i);
for (j = 0; j < num_covariates; j++) {
v[j] += w[i] * z[j];
}
}
}
for (k = 0; k < num_samples; k++) {
w = GET_2D_ROW(weights, num_weights, k);
z = GET_2D_ROW(covariates, num_covariates, k);
new_row = GET_2D_ROW(new_weights, num_covariates + num_weights + 1, k);
for (i = 0; i < num_weights; i++) {
new_row[i] = w[i];
}
for (i = 0; i < num_covariates; i++) {
new_row[i + num_weights] = z[i];
}
// set final row to 1 to count alleles
new_row[num_weights + num_covariates] = 1.0;
}
ret = tsk_treeseq_general_stat(self, num_weights + num_covariates + 1, new_weights,
num_weights, trait_linear_model_summary_func, &args, num_windows, windows,
options, result);
out:
tsk_safe_free(V);
tsk_safe_free(new_weights);
return ret;
}
static int
segregating_sites_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double n;
tsk_size_t j;
// this works because sum_{i=1}^k (1-p_i) = k-1
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
result[j] = (x[j] > 0) * (1 - x[j] / n);
}
return 0;
}
int
tsk_treeseq_segregating_sites(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)
{
return tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, segregating_sites_summary_func, num_windows,
windows, options, result);
}
static int
Y1_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, denom, numer;
tsk_size_t i;
for (i = 0; i < result_dim; i++) {
ni = (double) args.sample_set_sizes[i];
denom = ni * (ni - 1) * (ni - 2);
numer = x[i] * (ni - x[i]) * (ni - x[i] - 1);
result[i] = numer / denom;
}
return 0;
}
int
tsk_treeseq_Y1(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)
{
return tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, Y1_summary_func, num_windows, windows,
options, result);
}
static int
D_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
result[j] = p_AB - (p_A * p_B);
}
return 0;
}
int
tsk_treeseq_D(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
options |= TSK_STAT_POLARISED; // TODO: allow user to pick?
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, D_summary_func, norm_total_weighted,
num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,
result);
}
static int
D2_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
result[j] = p_AB - (p_A * p_B);
result[j] *= result[j];
}
return 0;
}
int
tsk_treeseq_D2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, D2_summary_func, norm_total_weighted,
num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,
result);
}
static int
r2_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
double D = p_AB - (p_A * p_B);
double denom = p_A * p_B * (1 - p_A) * (1 - p_B);
result[j] = (D * D) / denom;
}
return 0;
}
int
tsk_treeseq_r2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, r2_summary_func, norm_hap_weighted, num_rows,
row_sites, row_positions, num_cols, col_sites, col_positions, options, result);
}
static int
D_prime_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
double D = p_AB - (p_A * p_B);
if (D >= 0) {
result[j] = D / TSK_MIN(p_A * (1 - p_B), (1 - p_A) * p_B);
} else if (D < 0) {
result[j] = D / TSK_MIN(p_A * p_B, (1 - p_A) * (1 - p_B));
}
}
return 0;
}
int
tsk_treeseq_D_prime(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
options |= TSK_STAT_POLARISED; // TODO: allow user to pick?
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, D_prime_summary_func, norm_total_weighted,
num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,
result);
}
static int
r_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
double D = p_AB - (p_A * p_B);
double denom = p_A * p_B * (1 - p_A) * (1 - p_B);
result[j] = D / sqrt(denom);
}
return 0;
}
int
tsk_treeseq_r(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
options |= TSK_STAT_POLARISED; // TODO: allow user to pick?
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, r_summary_func, norm_total_weighted,
num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,
result);
}
static int
Dz_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
double D = p_AB - (p_A * p_B);
result[j] = D * (1 - 2 * p_A) * (1 - 2 * p_B);
}
return 0;
}
int
tsk_treeseq_Dz(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, Dz_summary_func, norm_total_weighted,
num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,
result);
}
static int
pi2_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double p_AB = state_row[0] / n;
double p_Ab = state_row[1] / n;
double p_aB = state_row[2] / n;
double p_A = p_AB + p_Ab;
double p_B = p_AB + p_aB;
result[j] = p_A * (1 - p_A) * p_B * (1 - p_B);
}
return 0;
}
int
tsk_treeseq_pi2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, pi2_summary_func, norm_total_weighted,
num_rows, row_sites, row_positions, num_cols, col_sites, col_positions, options,
result);
}
static int
D2_unbiased_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double w_AB = state_row[0];
double w_Ab = state_row[1];
double w_aB = state_row[2];
double w_ab = n - (w_AB + w_Ab + w_aB);
result[j] = (1 / (n * (n - 1) * (n - 2) * (n - 3)))
* ((w_aB * w_aB * (w_Ab - 1) * w_Ab)
+ ((w_ab - 1) * w_ab * (w_AB - 1) * w_AB)
- (w_aB * w_Ab * (w_Ab + (2 * w_ab * w_AB) - 1)));
}
return 0;
}
int
tsk_treeseq_D2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, D2_unbiased_summary_func,
norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,
col_positions, options, result);
}
static int
Dz_unbiased_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double w_AB = state_row[0];
double w_Ab = state_row[1];
double w_aB = state_row[2];
double w_ab = n - (w_AB + w_Ab + w_aB);
result[j] = (1 / (n * (n - 1) * (n - 2) * (n - 3)))
* ((((w_AB * w_ab) - (w_Ab * w_aB)) * (w_aB + w_ab - w_AB - w_Ab)
* (w_Ab + w_ab - w_AB - w_aB))
- ((w_AB * w_ab) * (w_AB + w_ab - w_Ab - w_aB - 2))
- ((w_Ab * w_aB) * (w_Ab + w_aB - w_AB - w_ab - 2)));
}
return 0;
}
int
tsk_treeseq_Dz_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, Dz_unbiased_summary_func,
norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,
col_positions, options, result);
}
static int
pi2_unbiased_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t TSK_UNUSED(result_dim), double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
double n;
const double *state_row;
tsk_size_t j;
for (j = 0; j < state_dim; j++) {
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
double w_AB = state_row[0];
double w_Ab = state_row[1];
double w_aB = state_row[2];
double w_ab = n - (w_AB + w_Ab + w_aB);
result[j] = (1 / (n * (n - 1) * (n - 2) * (n - 3)))
* (((w_AB + w_Ab) * (w_aB + w_ab) * (w_AB + w_aB) * (w_Ab + w_ab))
- ((w_AB * w_ab) * (w_AB + w_ab + (3 * w_Ab) + (3 * w_aB) - 1))
- ((w_Ab * w_aB) * (w_Ab + w_aB + (3 * w_AB) + (3 * w_ab) - 1)));
}
return 0;
}
int
tsk_treeseq_pi2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
return tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_sample_sets, NULL, pi2_unbiased_summary_func,
norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,
col_positions, options, result);
}
/***********************************
* Two way stats
***********************************/
static int
check_sample_stat_inputs(tsk_size_t num_sample_sets, tsk_size_t tuple_size,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples)
{
int ret = 0;
if (num_sample_sets < 1) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_SAMPLE_SETS);
goto out;
}
if (num_index_tuples < 1) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_INDEX_TUPLES);
goto out;
}
ret = check_set_indexes(
num_sample_sets, tuple_size * num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
divergence_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, nj, denom;
tsk_id_t i, j;
tsk_size_t k;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
denom = ni * (nj - (i == j));
result[k] = x[i] * (nj - x[j]) / denom;
}
return 0;
}
int
tsk_treeseq_divergence(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, divergence_summary_func,
num_windows, windows, options, result);
out:
return ret;
}
static int
genetic_relatedness_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
tsk_id_t i, j;
tsk_size_t k;
double sumx = 0;
double meanx, ni, nj;
for (k = 0; k < state_dim; k++) {
sumx += x[k] / (double) args.sample_set_sizes[k];
}
meanx = sumx / (double) state_dim;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
result[k] = (x[i] / ni - meanx) * (x[j] / nj - meanx);
}
return 0;
}
static int
genetic_relatedness_noncentred_summary_func(tsk_size_t TSK_UNUSED(state_dim),
const double *state, tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
tsk_id_t i, j;
tsk_size_t k;
double ni, nj;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
result[k] = x[i] * x[j] / (ni * nj);
}
return 0;
}
int
tsk_treeseq_genetic_relatedness(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
if (!(options & TSK_STAT_NONCENTRED)) {
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples,
genetic_relatedness_summary_func, num_windows, windows, options, result);
} else {
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples,
genetic_relatedness_noncentred_summary_func, num_windows, windows, options,
result);
}
out:
return ret;
}
static int
genetic_relatedness_weighted_summary_func(tsk_size_t state_dim, const double *state,
tsk_size_t result_dim, double *result, void *params)
{
indexed_weight_stat_params_t args = *(indexed_weight_stat_params_t *) params;
const double *x = state;
tsk_id_t i, j;
tsk_size_t k;
double pn, ni, nj;
pn = state[state_dim - 1];
for (k = 0; k < result_dim; k++) {
i = args.index_tuples[2 * k];
j = args.index_tuples[2 * k + 1];
ni = args.total_weights[i];
nj = args.total_weights[j];
result[k] = (x[i] - ni * pn) * (x[j] - nj * pn);
}
return 0;
}
static int
genetic_relatedness_weighted_noncentred_summary_func(tsk_size_t TSK_UNUSED(state_dim),
const double *state, tsk_size_t result_dim, double *result, void *params)
{
indexed_weight_stat_params_t args = *(indexed_weight_stat_params_t *) params;
const double *x = state;
tsk_id_t i, j;
tsk_size_t k;
for (k = 0; k < result_dim; k++) {
i = args.index_tuples[2 * k];
j = args.index_tuples[2 * k + 1];
result[k] = x[i] * x[j];
}
return 0;
}
int
tsk_treeseq_genetic_relatedness_weighted(const tsk_treeseq_t *self,
tsk_size_t num_weights, const double *weights, tsk_size_t num_index_tuples,
const tsk_id_t *index_tuples, tsk_size_t num_windows, const double *windows,
double *result, tsk_flags_t options)
{
int ret = 0;
tsk_size_t num_samples = self->num_samples;
size_t j, k;
indexed_weight_stat_params_t args;
const double *row;
double *new_row;
double *total_weights = tsk_calloc((num_weights + 1), sizeof(*total_weights));
double *new_weights
= tsk_malloc((num_weights + 1) * num_samples * sizeof(*new_weights));
if (total_weights == NULL || new_weights == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (num_weights == 0) {
ret = tsk_trace_error(TSK_ERR_INSUFFICIENT_WEIGHTS);
goto out;
}
// Add a column of ones to W
for (j = 0; j < num_samples; j++) {
row = GET_2D_ROW(weights, num_weights, j);
new_row = GET_2D_ROW(new_weights, num_weights + 1, j);
for (k = 0; k < num_weights; k++) {
new_row[k] = row[k];
total_weights[k] += row[k];
}
new_row[num_weights] = 1.0 / (double) num_samples;
}
total_weights[num_weights] = 1.0;
args.total_weights = total_weights;
args.index_tuples = index_tuples;
if (!(options & TSK_STAT_NONCENTRED)) {
ret = tsk_treeseq_general_stat(self, num_weights + 1, new_weights,
num_index_tuples, genetic_relatedness_weighted_summary_func, &args,
num_windows, windows, options, result);
if (ret != 0) {
goto out;
}
} else {
ret = tsk_treeseq_general_stat(self, num_weights + 1, new_weights,
num_index_tuples, genetic_relatedness_weighted_noncentred_summary_func,
&args, num_windows, windows, options, result);
if (ret != 0) {
goto out;
}
}
out:
tsk_safe_free(total_weights);
tsk_safe_free(new_weights);
return ret;
}
static int
Y2_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, nj, denom;
tsk_id_t i, j;
tsk_size_t k;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
denom = ni * nj * (nj - 1);
result[k] = x[i] * (nj - x[j]) * (nj - x[j] - 1) / denom;
}
return 0;
}
int
tsk_treeseq_Y2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, Y2_summary_func, num_windows,
windows, options, result);
out:
return ret;
}
static int
f2_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, nj, denom, numer;
tsk_id_t i, j;
tsk_size_t k;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
denom = ni * (ni - 1) * nj * (nj - 1);
numer = x[i] * (x[i] - 1) * (nj - x[j]) * (nj - x[j] - 1)
- x[i] * (ni - x[i]) * (nj - x[j]) * x[j];
result[k] = numer / denom;
}
return 0;
}
int
tsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, f2_summary_func, num_windows,
windows, options, result);
out:
return ret;
}
static int
D2_ij_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *state_row;
double n;
tsk_size_t k;
tsk_id_t i, j;
double p_A, p_B, p_AB, p_Ab, p_aB, D_i, D_j;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
n = (double) args.sample_set_sizes[i];
state_row = GET_2D_ROW(state, 3, i);
p_AB = state_row[0] / n;
p_Ab = state_row[1] / n;
p_aB = state_row[2] / n;
p_A = p_AB + p_Ab;
p_B = p_AB + p_aB;
D_i = p_AB - (p_A * p_B);
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
p_AB = state_row[0] / n;
p_Ab = state_row[1] / n;
p_aB = state_row[2] / n;
p_A = p_AB + p_Ab;
p_B = p_AB + p_aB;
D_j = p_AB - (p_A * p_B);
result[k] = D_i * D_j;
}
return 0;
}
int
tsk_treeseq_D2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, D2_ij_summary_func,
norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,
col_positions, options, result);
out:
return ret;
}
static int
D2_ij_unbiased_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *state_row;
tsk_size_t k;
tsk_id_t i, j;
double n_i, n_j;
double w_AB_i, w_Ab_i, w_aB_i, w_ab_i;
double w_AB_j, w_Ab_j, w_aB_j, w_ab_j;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
if (i == j) {
// We require disjoint sample sets because we test equality here
n_i = (double) args.sample_set_sizes[i];
state_row = GET_2D_ROW(state, 3, i);
w_AB_i = state_row[0];
w_Ab_i = state_row[1];
w_aB_i = state_row[2];
w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i);
result[k] = (w_AB_i * (w_AB_i - 1) * w_ab_i * (w_ab_i - 1)
+ w_Ab_i * (w_Ab_i - 1) * w_aB_i * (w_aB_i - 1)
- 2 * w_AB_i * w_Ab_i * w_aB_i * w_ab_i)
/ n_i / (n_i - 1) / (n_i - 2) / (n_i - 3);
}
else {
n_i = (double) args.sample_set_sizes[i];
state_row = GET_2D_ROW(state, 3, i);
w_AB_i = state_row[0];
w_Ab_i = state_row[1];
w_aB_i = state_row[2];
w_ab_i = n_i - (w_AB_i + w_Ab_i + w_aB_i);
n_j = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
w_AB_j = state_row[0];
w_Ab_j = state_row[1];
w_aB_j = state_row[2];
w_ab_j = n_j - (w_AB_j + w_Ab_j + w_aB_j);
result[k] = (w_Ab_i * w_aB_i - w_AB_i * w_ab_i)
* (w_Ab_j * w_aB_j - w_AB_j * w_ab_j) / n_i / (n_i - 1) / n_j
/ (n_j - 1);
}
}
return 0;
}
int
tsk_treeseq_D2_ij_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, D2_ij_unbiased_summary_func,
norm_total_weighted, num_rows, row_sites, row_positions, num_cols, col_sites,
col_positions, options, result);
out:
return ret;
}
static int
r2_ij_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *state_row;
tsk_size_t k;
tsk_id_t i, j;
double n, pAB, pAb, paB, pA, pB, D_i, D_j, denom_i, denom_j;
for (k = 0; k < result_dim; k++) {
i = args.set_indexes[2 * k];
j = args.set_indexes[2 * k + 1];
n = (double) args.sample_set_sizes[i];
state_row = GET_2D_ROW(state, 3, i);
pAB = state_row[0] / n;
pAb = state_row[1] / n;
paB = state_row[2] / n;
pA = pAB + pAb;
pB = pAB + paB;
D_i = pAB - (pA * pB);
denom_i = sqrt(pA * (1 - pA) * pB * (1 - pB));
n = (double) args.sample_set_sizes[j];
state_row = GET_2D_ROW(state, 3, j);
pAB = state_row[0] / n;
pAb = state_row[1] / n;
paB = state_row[2] / n;
pA = pAB + pAb;
pB = pAB + paB;
D_j = pAB - (pA * pB);
denom_j = sqrt(pA * (1 - pA) * pB * (1 - pB));
result[k] = (D_i * D_j) / (denom_i * denom_j);
}
return 0;
}
int
tsk_treeseq_r2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 2, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_two_locus_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, r2_ij_summary_func,
norm_hap_weighted_ij, num_rows, row_sites, row_positions, num_cols, col_sites,
col_positions, options, result);
out:
return ret;
}
/***********************************
* Three way stats
***********************************/
static int
Y3_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, nj, nk, denom, numer;
tsk_id_t i, j, k;
tsk_size_t tuple_index;
for (tuple_index = 0; tuple_index < result_dim; tuple_index++) {
i = args.set_indexes[3 * tuple_index];
j = args.set_indexes[3 * tuple_index + 1];
k = args.set_indexes[3 * tuple_index + 2];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
nk = (double) args.sample_set_sizes[k];
denom = ni * nj * nk;
numer = x[i] * (nj - x[j]) * (nk - x[k]);
result[tuple_index] = numer / denom;
}
return 0;
}
int
tsk_treeseq_Y3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 3, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, Y3_summary_func, num_windows,
windows, options, result);
out:
return ret;
}
static int
f3_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, nj, nk, denom, numer;
tsk_id_t i, j, k;
tsk_size_t tuple_index;
for (tuple_index = 0; tuple_index < result_dim; tuple_index++) {
i = args.set_indexes[3 * tuple_index];
j = args.set_indexes[3 * tuple_index + 1];
k = args.set_indexes[3 * tuple_index + 2];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
nk = (double) args.sample_set_sizes[k];
denom = ni * (ni - 1) * nj * nk;
numer = x[i] * (x[i] - 1) * (nj - x[j]) * (nk - x[k])
- x[i] * (ni - x[i]) * (nj - x[j]) * x[k];
result[tuple_index] = numer / denom;
}
return 0;
}
int
tsk_treeseq_f3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 3, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, f3_summary_func, num_windows,
windows, options, result);
out:
return ret;
}
/***********************************
* Four way stats
***********************************/
static int
f4_summary_func(tsk_size_t TSK_UNUSED(state_dim), const double *state,
tsk_size_t result_dim, double *result, void *params)
{
sample_count_stat_params_t args = *(sample_count_stat_params_t *) params;
const double *x = state;
double ni, nj, nk, nl, denom, numer;
tsk_id_t i, j, k, l;
tsk_size_t tuple_index;
for (tuple_index = 0; tuple_index < result_dim; tuple_index++) {
i = args.set_indexes[4 * tuple_index];
j = args.set_indexes[4 * tuple_index + 1];
k = args.set_indexes[4 * tuple_index + 2];
l = args.set_indexes[4 * tuple_index + 3];
ni = (double) args.sample_set_sizes[i];
nj = (double) args.sample_set_sizes[j];
nk = (double) args.sample_set_sizes[k];
nl = (double) args.sample_set_sizes[l];
denom = ni * nj * nk * nl;
numer = x[i] * x[k] * (nj - x[j]) * (nl - x[l])
- x[i] * x[l] * (nj - x[j]) * (nk - x[k]);
result[tuple_index] = numer / denom;
}
return 0;
}
int
tsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
ret = check_sample_stat_inputs(num_sample_sets, 4, num_index_tuples, index_tuples);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_sample_count_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_index_tuples, index_tuples, f4_summary_func, num_windows,
windows, options, result);
out:
return ret;
}
/* Error-raising getter functions */
int TSK_WARN_UNUSED
tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node)
{
return tsk_node_table_get_row(&self->tables->nodes, index, node);
}
int TSK_WARN_UNUSED
tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge)
{
return tsk_edge_table_get_row(&self->tables->edges, index, edge);
}
int TSK_WARN_UNUSED
tsk_treeseq_get_migration(
const tsk_treeseq_t *self, tsk_id_t index, tsk_migration_t *migration)
{
return tsk_migration_table_get_row(&self->tables->migrations, index, migration);
}
int TSK_WARN_UNUSED
tsk_treeseq_get_mutation(
const tsk_treeseq_t *self, tsk_id_t index, tsk_mutation_t *mutation)
{
int ret = 0;
ret = tsk_mutation_table_get_row(&self->tables->mutations, index, mutation);
if (ret != 0) {
goto out;
}
mutation->edge = self->site_mutations_mem[index].edge;
mutation->inherited_state = self->site_mutations_mem[index].inherited_state;
mutation->inherited_state_length
= self->site_mutations_mem[index].inherited_state_length;
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_get_site(const tsk_treeseq_t *self, tsk_id_t index, tsk_site_t *site)
{
int ret = 0;
ret = tsk_site_table_get_row(&self->tables->sites, index, site);
if (ret != 0) {
goto out;
}
site->mutations = self->site_mutations[index];
site->mutations_length = self->site_mutations_length[index];
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_get_individual(
const tsk_treeseq_t *self, tsk_id_t index, tsk_individual_t *individual)
{
int ret = 0;
ret = tsk_individual_table_get_row(&self->tables->individuals, index, individual);
if (ret != 0) {
goto out;
}
individual->nodes = self->individual_nodes[index];
individual->nodes_length = self->individual_nodes_length[index];
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_get_population(
const tsk_treeseq_t *self, tsk_id_t index, tsk_population_t *population)
{
return tsk_population_table_get_row(&self->tables->populations, index, population);
}
int TSK_WARN_UNUSED
tsk_treeseq_get_provenance(
const tsk_treeseq_t *self, tsk_id_t index, tsk_provenance_t *provenance)
{
return tsk_provenance_table_get_row(&self->tables->provenances, index, provenance);
}
int TSK_WARN_UNUSED
tsk_treeseq_simplify(const tsk_treeseq_t *self, const tsk_id_t *samples,
tsk_size_t num_samples, tsk_flags_t options, tsk_treeseq_t *output,
tsk_id_t *node_map)
{
int ret = 0;
tsk_table_collection_t *tables = tsk_malloc(sizeof(*tables));
if (tables == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_treeseq_copy_tables(self, tables, 0);
if (ret != 0) {
goto out;
}
ret = tsk_table_collection_simplify(tables, samples, num_samples, options, node_map);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init(
output, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TAKE_OWNERSHIP);
/* Once tsk_treeseq_init has returned ownership of tables is transferred */
tables = NULL;
out:
if (tables != NULL) {
tsk_table_collection_free(tables);
tsk_safe_free(tables);
}
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_split_edges(const tsk_treeseq_t *self, double time, tsk_flags_t flags,
tsk_id_t population, const char *metadata, tsk_size_t metadata_length,
tsk_flags_t TSK_UNUSED(options), tsk_treeseq_t *output)
{
int ret = 0;
tsk_table_collection_t *tables = tsk_malloc(sizeof(*tables));
const double *restrict node_time = self->tables->nodes.time;
const tsk_size_t num_edges = self->tables->edges.num_rows;
const tsk_size_t num_mutations = self->tables->mutations.num_rows;
tsk_id_t *split_edge = tsk_malloc(num_edges * sizeof(*split_edge));
tsk_id_t j, u, mapped_node, ret_id;
double mutation_time;
tsk_edge_t edge;
tsk_mutation_t mutation;
tsk_bookmark_t sort_start;
memset(output, 0, sizeof(*output));
if (split_edge == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_treeseq_copy_tables(self, tables, 0);
if (ret != 0) {
goto out;
}
if (tables->migrations.num_rows > 0) {
ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
goto out;
}
/* We could catch this below in add_row, but it's simpler to guarantee
* that we always catch the error in corner cases where the values
* aren't used. */
if (population < -1 || population >= (tsk_id_t) self->tables->populations.num_rows) {
ret = tsk_trace_error(TSK_ERR_POPULATION_OUT_OF_BOUNDS);
goto out;
}
if (!tsk_isfinite(time)) {
ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);
goto out;
}
tsk_edge_table_clear(&tables->edges);
tsk_memset(split_edge, TSK_NULL, num_edges * sizeof(*split_edge));
for (j = 0; j < (tsk_id_t) num_edges; j++) {
/* Would prefer to use tsk_edge_table_get_row_unsafe, but it's
* currently static to tables.c */
ret = tsk_edge_table_get_row(&self->tables->edges, j, &edge);
tsk_bug_assert(ret == 0);
if (node_time[edge.child] < time && time < node_time[edge.parent]) {
u = tsk_node_table_add_row(&tables->nodes, flags, time, population, TSK_NULL,
metadata, metadata_length);
if (u < 0) {
ret = (int) u;
goto out;
}
ret_id = tsk_edge_table_add_row(&tables->edges, edge.left, edge.right, u,
edge.child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
edge.child = u;
split_edge[j] = u;
}
ret_id = tsk_edge_table_add_row(&tables->edges, edge.left, edge.right,
edge.parent, edge.child, edge.metadata, edge.metadata_length);
if (ret_id < 0) {
ret = (int) ret_id;
goto out;
}
}
for (j = 0; j < (tsk_id_t) num_mutations; j++) {
/* Note: we could speed this up a bit by accessing the local
* memory for mutations directly. */
ret = tsk_treeseq_get_mutation(self, j, &mutation);
tsk_bug_assert(ret == 0);
mapped_node = TSK_NULL;
if (mutation.edge != TSK_NULL) {
mapped_node = split_edge[mutation.edge];
}
mutation_time = tsk_is_unknown_time(mutation.time) ? node_time[mutation.node]
: mutation.time;
if (mapped_node != TSK_NULL && mutation_time >= time) {
/* Update the column in-place to save a bit of time. */
tables->mutations.node[j] = mapped_node;
}
}
/* Skip mutations and sites as they haven't been altered */
/* Note we can probably optimise the edge sort a bit here also by
* reasoning about when the first edge gets altered in the table.
*/
memset(&sort_start, 0, sizeof(sort_start));
sort_start.sites = tables->sites.num_rows;
sort_start.mutations = tables->mutations.num_rows;
ret = tsk_table_collection_sort(tables, &sort_start, 0);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init(
output, tables, TSK_TS_INIT_BUILD_INDEXES | TSK_TAKE_OWNERSHIP);
tables = NULL;
out:
if (tables != NULL) {
tsk_table_collection_free(tables);
tsk_safe_free(tables);
}
tsk_safe_free(split_edge);
return ret;
}
/* ======================================================== *
* tree_position
* ======================================================== */
static void
tsk_tree_position_set_null(tsk_tree_position_t *self)
{
self->index = -1;
self->interval.left = 0;
self->interval.right = 0;
}
int
tsk_tree_position_init(tsk_tree_position_t *self, const tsk_treeseq_t *tree_sequence,
tsk_flags_t TSK_UNUSED(options))
{
memset(self, 0, sizeof(*self));
self->tree_sequence = tree_sequence;
tsk_tree_position_set_null(self);
return 0;
}
int
tsk_tree_position_free(tsk_tree_position_t *TSK_UNUSED(self))
{
return 0;
}
int
tsk_tree_position_print_state(const tsk_tree_position_t *self, FILE *out)
{
fprintf(out, "Tree position state\n");
fprintf(out, "index = %d\n", (int) self->index);
fprintf(out, "interval = [%f,\t%f)\n", self->interval.left, self->interval.right);
fprintf(
out, "out = start=%d\tstop=%d\n", (int) self->out.start, (int) self->out.stop);
fprintf(
out, "in = start=%d\tstop=%d\n", (int) self->in.start, (int) self->in.stop);
return 0;
}
bool
tsk_tree_position_next(tsk_tree_position_t *self)
{
const tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;
const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;
const double *restrict left_coords = tables->edges.left;
const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;
const double *restrict right_coords = tables->edges.right;
const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;
const double *restrict breakpoints = self->tree_sequence->breakpoints;
tsk_id_t j, left_current_index, right_current_index;
double left;
if (self->index == -1) {
self->interval.right = 0;
self->in.stop = 0;
self->out.stop = 0;
self->direction = TSK_DIR_FORWARD;
}
if (self->direction == TSK_DIR_FORWARD) {
left_current_index = self->in.stop;
right_current_index = self->out.stop;
} else {
left_current_index = self->out.stop + 1;
right_current_index = self->in.stop + 1;
}
left = self->interval.right;
j = right_current_index;
self->out.start = j;
while (j < M && right_coords[right_order[j]] == left) {
j++;
}
self->out.stop = j;
self->out.order = right_order;
j = left_current_index;
self->in.start = j;
while (j < M && left_coords[left_order[j]] == left) {
j++;
}
self->in.stop = j;
self->in.order = left_order;
self->direction = TSK_DIR_FORWARD;
self->index++;
if (self->index == num_trees) {
tsk_tree_position_set_null(self);
} else {
self->interval.left = left;
self->interval.right = breakpoints[self->index + 1];
}
return self->index != -1;
}
bool
tsk_tree_position_prev(tsk_tree_position_t *self)
{
const tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;
const double sequence_length = tables->sequence_length;
const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;
const double *restrict left_coords = tables->edges.left;
const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;
const double *restrict right_coords = tables->edges.right;
const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;
const double *restrict breakpoints = self->tree_sequence->breakpoints;
tsk_id_t j, left_current_index, right_current_index;
double right;
if (self->index == -1) {
self->index = num_trees;
self->interval.left = sequence_length;
self->in.stop = M - 1;
self->out.stop = M - 1;
self->direction = TSK_DIR_REVERSE;
}
if (self->direction == TSK_DIR_REVERSE) {
left_current_index = self->out.stop;
right_current_index = self->in.stop;
} else {
left_current_index = self->in.stop - 1;
right_current_index = self->out.stop - 1;
}
right = self->interval.left;
j = left_current_index;
self->out.start = j;
while (j >= 0 && left_coords[left_order[j]] == right) {
j--;
}
self->out.stop = j;
self->out.order = left_order;
j = right_current_index;
self->in.start = j;
while (j >= 0 && right_coords[right_order[j]] == right) {
j--;
}
self->in.stop = j;
self->in.order = right_order;
self->index--;
self->direction = TSK_DIR_REVERSE;
if (self->index == -1) {
tsk_tree_position_set_null(self);
} else {
self->interval.left = breakpoints[self->index];
self->interval.right = right;
}
return self->index != -1;
}
int TSK_WARN_UNUSED
tsk_tree_position_seek_forward(tsk_tree_position_t *self, tsk_id_t index)
{
int ret = 0;
const tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;
const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;
const double *restrict left_coords = tables->edges.left;
const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;
const double *restrict right_coords = tables->edges.right;
const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;
const double *restrict breakpoints = self->tree_sequence->breakpoints;
tsk_id_t j, left_current_index, right_current_index;
double left;
tsk_bug_assert(index >= self->index && index < num_trees);
if (self->index == -1) {
self->interval.right = 0;
self->in.stop = 0;
self->out.stop = 0;
self->direction = TSK_DIR_FORWARD;
}
if (self->direction == TSK_DIR_FORWARD) {
left_current_index = self->in.stop;
right_current_index = self->out.stop;
} else {
left_current_index = self->out.stop + 1;
right_current_index = self->in.stop + 1;
}
self->direction = TSK_DIR_FORWARD;
left = breakpoints[index];
j = right_current_index;
self->out.start = j;
while (j < M && right_coords[right_order[j]] <= left) {
j++;
}
self->out.stop = j;
if (self->index == -1) {
self->out.start = self->out.stop;
}
j = left_current_index;
while (j < M && right_coords[left_order[j]] <= left) {
j++;
}
self->in.start = j;
while (j < M && left_coords[left_order[j]] <= left) {
j++;
}
self->in.stop = j;
self->interval.left = left;
self->interval.right = breakpoints[index + 1];
self->out.order = right_order;
self->in.order = left_order;
self->index = index;
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_position_seek_backward(tsk_tree_position_t *self, tsk_id_t index)
{
int ret = 0;
const tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t M = (tsk_id_t) tables->edges.num_rows;
const double sequence_length = tables->sequence_length;
const tsk_id_t num_trees = (tsk_id_t) self->tree_sequence->num_trees;
const double *restrict left_coords = tables->edges.left;
const tsk_id_t *restrict left_order = tables->indexes.edge_insertion_order;
const double *restrict right_coords = tables->edges.right;
const tsk_id_t *restrict right_order = tables->indexes.edge_removal_order;
const double *restrict breakpoints = self->tree_sequence->breakpoints;
tsk_id_t j, left_current_index, right_current_index;
double right;
if (self->index == -1) {
self->index = num_trees;
self->interval.left = sequence_length;
self->in.stop = M - 1;
self->out.stop = M - 1;
self->direction = TSK_DIR_REVERSE;
}
tsk_bug_assert(index <= self->index);
if (self->direction == TSK_DIR_REVERSE) {
left_current_index = self->out.stop;
right_current_index = self->in.stop;
} else {
left_current_index = self->in.stop - 1;
right_current_index = self->out.stop - 1;
}
self->direction = TSK_DIR_REVERSE;
right = breakpoints[index + 1];
j = left_current_index;
self->out.start = j;
while (j >= 0 && left_coords[left_order[j]] >= right) {
j--;
}
self->out.stop = j;
if (self->index == num_trees) {
self->out.start = self->out.stop;
}
j = right_current_index;
while (j >= 0 && left_coords[right_order[j]] >= right) {
j--;
}
self->in.start = j;
while (j >= 0 && right_coords[right_order[j]] >= right) {
j--;
}
self->in.stop = j;
self->interval.right = right;
self->interval.left = breakpoints[index];
self->out.order = left_order;
self->in.order = right_order;
self->index = index;
return ret;
}
/* ======================================================== *
* Tree
* ======================================================== */
/* Return the root for the specified node.
* NOTE: no bounds checking is done here.
*/
static tsk_id_t
tsk_tree_get_node_root(const tsk_tree_t *self, tsk_id_t u)
{
const tsk_id_t *restrict parent = self->parent;
while (parent[u] != TSK_NULL) {
u = parent[u];
}
return u;
}
int TSK_WARN_UNUSED
tsk_tree_init(tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options)
{
int ret = 0;
tsk_size_t num_samples, num_nodes, N;
tsk_memset(self, 0, sizeof(tsk_tree_t));
if (tree_sequence == NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
num_nodes = tree_sequence->tables->nodes.num_rows;
num_samples = tree_sequence->num_samples;
self->num_nodes = num_nodes;
self->virtual_root = (tsk_id_t) num_nodes;
self->tree_sequence = tree_sequence;
self->samples = tree_sequence->samples;
self->options = options;
self->root_threshold = 1;
/* Allocate space in the quintuply linked tree for the virtual root */
N = num_nodes + 1;
self->parent = tsk_malloc(N * sizeof(*self->parent));
self->left_child = tsk_malloc(N * sizeof(*self->left_child));
self->right_child = tsk_malloc(N * sizeof(*self->right_child));
self->left_sib = tsk_malloc(N * sizeof(*self->left_sib));
self->right_sib = tsk_malloc(N * sizeof(*self->right_sib));
self->num_children = tsk_calloc(N, sizeof(*self->num_children));
self->edge = tsk_malloc(N * sizeof(*self->edge));
if (self->parent == NULL || self->left_child == NULL || self->right_child == NULL
|| self->left_sib == NULL || self->right_sib == NULL
|| self->num_children == NULL || self->edge == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
self->num_samples = tsk_calloc(N, sizeof(*self->num_samples));
self->num_tracked_samples = tsk_calloc(N, sizeof(*self->num_tracked_samples));
if (self->num_samples == NULL || self->num_tracked_samples == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
if (self->options & TSK_SAMPLE_LISTS) {
self->left_sample = tsk_malloc(N * sizeof(*self->left_sample));
self->right_sample = tsk_malloc(N * sizeof(*self->right_sample));
self->next_sample = tsk_malloc(num_samples * sizeof(*self->next_sample));
if (self->left_sample == NULL || self->right_sample == NULL
|| self->next_sample == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
ret = tsk_tree_position_init(&self->tree_pos, tree_sequence, 0);
if (ret != 0) {
goto out;
}
ret = tsk_tree_clear(self);
out:
return ret;
}
int
tsk_tree_set_root_threshold(tsk_tree_t *self, tsk_size_t root_threshold)
{
int ret = 0;
if (root_threshold == 0) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
/* Don't allow the value to be set when the tree is out of the null
* state */
if (self->index != -1) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
self->root_threshold = root_threshold;
/* Reset the roots */
ret = tsk_tree_clear(self);
out:
return ret;
}
tsk_size_t
tsk_tree_get_root_threshold(const tsk_tree_t *self)
{
return self->root_threshold;
}
int
tsk_tree_free(tsk_tree_t *self)
{
tsk_safe_free(self->parent);
tsk_safe_free(self->left_child);
tsk_safe_free(self->right_child);
tsk_safe_free(self->left_sib);
tsk_safe_free(self->right_sib);
tsk_safe_free(self->num_samples);
tsk_safe_free(self->num_tracked_samples);
tsk_safe_free(self->left_sample);
tsk_safe_free(self->right_sample);
tsk_safe_free(self->next_sample);
tsk_safe_free(self->num_children);
tsk_safe_free(self->edge);
tsk_tree_position_free(&self->tree_pos);
return 0;
}
bool
tsk_tree_has_sample_lists(const tsk_tree_t *self)
{
return !!(self->options & TSK_SAMPLE_LISTS);
}
bool
tsk_tree_has_sample_counts(const tsk_tree_t *self)
{
return !(self->options & TSK_NO_SAMPLE_COUNTS);
}
static int TSK_WARN_UNUSED
tsk_tree_reset_tracked_samples(tsk_tree_t *self)
{
int ret = 0;
if (!tsk_tree_has_sample_counts(self)) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
tsk_memset(self->num_tracked_samples, 0,
(self->num_nodes + 1) * sizeof(*self->num_tracked_samples));
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_set_tracked_samples(
tsk_tree_t *self, tsk_size_t num_tracked_samples, const tsk_id_t *tracked_samples)
{
int ret = TSK_ERR_GENERIC;
tsk_size_t *tree_num_tracked_samples = self->num_tracked_samples;
const tsk_id_t *parent = self->parent;
tsk_size_t j;
tsk_id_t u;
/* TODO This is not needed when the tree is new. We should use the
* state machine to check and only reset the tracked samples when needed.
*/
ret = tsk_tree_reset_tracked_samples(self);
if (ret != 0) {
goto out;
}
self->num_tracked_samples[self->virtual_root] = num_tracked_samples;
for (j = 0; j < num_tracked_samples; j++) {
u = tracked_samples[j];
if (u < 0 || u >= (tsk_id_t) self->num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
if (!tsk_treeseq_is_sample(self->tree_sequence, u)) {
ret = tsk_trace_error(TSK_ERR_BAD_SAMPLES);
goto out;
}
if (self->num_tracked_samples[u] != 0) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
/* Propagate this upwards */
while (u != TSK_NULL) {
tree_num_tracked_samples[u]++;
u = parent[u];
}
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_track_descendant_samples(tsk_tree_t *self, tsk_id_t node)
{
int ret = 0;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
const tsk_id_t *restrict parent = self->parent;
const tsk_id_t *restrict left_child = self->left_child;
const tsk_id_t *restrict right_sib = self->right_sib;
const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;
tsk_size_t *num_tracked_samples = self->num_tracked_samples;
tsk_size_t n, j, num_nodes;
tsk_id_t u, v;
if (nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_tree_postorder_from(self, node, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
ret = tsk_tree_reset_tracked_samples(self);
if (ret != 0) {
goto out;
}
u = 0; /* keep the compiler happy */
for (j = 0; j < num_nodes; j++) {
u = nodes[j];
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
num_tracked_samples[u] += num_tracked_samples[v];
}
num_tracked_samples[u] += flags[u] & TSK_NODE_IS_SAMPLE ? 1 : 0;
}
n = num_tracked_samples[u];
u = parent[u];
while (u != TSK_NULL) {
num_tracked_samples[u] = n;
u = parent[u];
}
num_tracked_samples[self->virtual_root] = n;
out:
tsk_safe_free(nodes);
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options)
{
int ret = TSK_ERR_GENERIC;
tsk_size_t N = self->num_nodes + 1;
if (!(options & TSK_NO_INIT)) {
ret = tsk_tree_init(dest, self->tree_sequence, options);
if (ret != 0) {
goto out;
}
}
if (self->tree_sequence != dest->tree_sequence) {
ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE);
goto out;
}
dest->interval = self->interval;
dest->left_index = self->left_index;
dest->right_index = self->right_index;
dest->direction = self->direction;
dest->index = self->index;
dest->sites = self->sites;
dest->sites_length = self->sites_length;
dest->root_threshold = self->root_threshold;
dest->num_edges = self->num_edges;
dest->tree_pos = self->tree_pos;
tsk_memcpy(dest->parent, self->parent, N * sizeof(*self->parent));
tsk_memcpy(dest->left_child, self->left_child, N * sizeof(*self->left_child));
tsk_memcpy(dest->right_child, self->right_child, N * sizeof(*self->right_child));
tsk_memcpy(dest->left_sib, self->left_sib, N * sizeof(*self->left_sib));
tsk_memcpy(dest->right_sib, self->right_sib, N * sizeof(*self->right_sib));
tsk_memcpy(dest->num_children, self->num_children, N * sizeof(*self->num_children));
tsk_memcpy(dest->edge, self->edge, N * sizeof(*self->edge));
if (!(dest->options & TSK_NO_SAMPLE_COUNTS)) {
if (self->options & TSK_NO_SAMPLE_COUNTS) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
tsk_memcpy(dest->num_samples, self->num_samples, N * sizeof(*self->num_samples));
tsk_memcpy(dest->num_tracked_samples, self->num_tracked_samples,
N * sizeof(*self->num_tracked_samples));
}
if (dest->options & TSK_SAMPLE_LISTS) {
if (!(self->options & TSK_SAMPLE_LISTS)) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
tsk_memcpy(dest->left_sample, self->left_sample, N * sizeof(*self->left_sample));
tsk_memcpy(
dest->right_sample, self->right_sample, N * sizeof(*self->right_sample));
tsk_memcpy(dest->next_sample, self->next_sample,
self->tree_sequence->num_samples * sizeof(*self->next_sample));
}
ret = 0;
out:
return ret;
}
bool TSK_WARN_UNUSED
tsk_tree_equals(const tsk_tree_t *self, const tsk_tree_t *other)
{
bool ret = false;
if (self->tree_sequence == other->tree_sequence) {
ret = self->index == other->index;
}
return ret;
}
static int
tsk_tree_check_node(const tsk_tree_t *self, tsk_id_t u)
{
int ret = 0;
if (u < 0 || u > (tsk_id_t) self->num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
}
return ret;
}
bool
tsk_tree_is_descendant(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v)
{
bool ret = false;
tsk_id_t w = u;
tsk_id_t *restrict parent = self->parent;
if (tsk_tree_check_node(self, u) == 0 && tsk_tree_check_node(self, v) == 0) {
while (w != v && w != TSK_NULL) {
w = parent[w];
}
ret = w == v;
}
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_get_mrca(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v, tsk_id_t *mrca)
{
int ret = 0;
double tu, tv;
const tsk_id_t *restrict parent = self->parent;
const double *restrict time = self->tree_sequence->tables->nodes.time;
ret = tsk_tree_check_node(self, u);
if (ret != 0) {
goto out;
}
ret = tsk_tree_check_node(self, v);
if (ret != 0) {
goto out;
}
/* Simplest to make the virtual_root a special case here to avoid
* doing the time lookup. */
if (u == self->virtual_root || v == self->virtual_root) {
*mrca = self->virtual_root;
return 0;
}
tu = time[u];
tv = time[v];
while (u != v) {
if (tu < tv) {
u = parent[u];
if (u == TSK_NULL) {
break;
}
tu = time[u];
} else {
v = parent[v];
if (v == TSK_NULL) {
break;
}
tv = time[v];
}
}
*mrca = u == v ? u : TSK_NULL;
out:
return ret;
}
static int
tsk_tree_get_num_samples_by_traversal(
const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_samples)
{
int ret = 0;
tsk_size_t num_nodes, j;
tsk_size_t count = 0;
const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
tsk_id_t v;
if (nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_tree_preorder_from(self, u, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_nodes; j++) {
v = nodes[j];
if (flags[v] & TSK_NODE_IS_SAMPLE) {
count++;
}
}
*num_samples = count;
out:
tsk_safe_free(nodes);
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_get_num_samples(const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_samples)
{
int ret = 0;
ret = tsk_tree_check_node(self, u);
if (ret != 0) {
goto out;
}
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
*num_samples = (tsk_size_t) self->num_samples[u];
} else {
ret = tsk_tree_get_num_samples_by_traversal(self, u, num_samples);
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_get_num_tracked_samples(
const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_tracked_samples)
{
int ret = 0;
ret = tsk_tree_check_node(self, u);
if (ret != 0) {
goto out;
}
if (self->options & TSK_NO_SAMPLE_COUNTS) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
*num_tracked_samples = self->num_tracked_samples[u];
out:
return ret;
}
bool
tsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u)
{
return tsk_treeseq_is_sample(self->tree_sequence, u);
}
tsk_id_t
tsk_tree_get_left_root(const tsk_tree_t *self)
{
return self->left_child[self->virtual_root];
}
tsk_id_t
tsk_tree_get_right_root(const tsk_tree_t *self)
{
return self->right_child[self->virtual_root];
}
tsk_size_t
tsk_tree_get_num_roots(const tsk_tree_t *self)
{
return (tsk_size_t) self->num_children[self->virtual_root];
}
int TSK_WARN_UNUSED
tsk_tree_get_parent(const tsk_tree_t *self, tsk_id_t u, tsk_id_t *parent)
{
int ret = 0;
ret = tsk_tree_check_node(self, u);
if (ret != 0) {
goto out;
}
*parent = self->parent[u];
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_get_time(const tsk_tree_t *self, tsk_id_t u, double *t)
{
int ret = 0;
tsk_node_t node;
if (u == self->virtual_root) {
*t = INFINITY;
} else {
ret = tsk_treeseq_get_node(self->tree_sequence, u, &node);
if (ret != 0) {
goto out;
}
*t = node.time;
}
out:
return ret;
}
static inline double
tsk_tree_get_branch_length_unsafe(const tsk_tree_t *self, tsk_id_t u)
{
const double *times = self->tree_sequence->tables->nodes.time;
const tsk_id_t parent = self->parent[u];
return parent == TSK_NULL ? 0 : times[parent] - times[u];
}
int TSK_WARN_UNUSED
tsk_tree_get_branch_length(const tsk_tree_t *self, tsk_id_t u, double *ret_branch_length)
{
int ret = 0;
ret = tsk_tree_check_node(self, u);
if (ret != 0) {
goto out;
}
*ret_branch_length = tsk_tree_get_branch_length_unsafe(self, u);
out:
return ret;
}
int
tsk_tree_get_total_branch_length(const tsk_tree_t *self, tsk_id_t node, double *ret_tbl)
{
int ret = 0;
tsk_size_t j, num_nodes;
tsk_id_t u, v;
const tsk_id_t *restrict parent = self->parent;
const double *restrict time = self->tree_sequence->tables->nodes.time;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
double sum = 0;
if (nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_tree_preorder_from(self, node, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
/* We always skip the first node because we don't return the branch length
* over the input node. */
for (j = 1; j < num_nodes; j++) {
u = nodes[j];
v = parent[u];
if (v != TSK_NULL) {
sum += time[v] - time[u];
}
}
*ret_tbl = sum;
out:
tsk_safe_free(nodes);
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_get_sites(
const tsk_tree_t *self, const tsk_site_t **sites, tsk_size_t *sites_length)
{
*sites = self->sites;
*sites_length = self->sites_length;
return 0;
}
/* u must be a valid node in the tree. For internal use */
static int
tsk_tree_get_depth_unsafe(const tsk_tree_t *self, tsk_id_t u)
{
tsk_id_t v;
const tsk_id_t *restrict parent = self->parent;
int depth = 0;
if (u == self->virtual_root) {
return -1;
}
for (v = parent[u]; v != TSK_NULL; v = parent[v]) {
depth++;
}
return depth;
}
int TSK_WARN_UNUSED
tsk_tree_get_depth(const tsk_tree_t *self, tsk_id_t u, int *depth_ret)
{
int ret = 0;
ret = tsk_tree_check_node(self, u);
if (ret != 0) {
goto out;
}
*depth_ret = tsk_tree_get_depth_unsafe(self, u);
out:
return ret;
}
static tsk_id_t
tsk_tree_node_root(tsk_tree_t *self, tsk_id_t u)
{
tsk_id_t v = u;
while (self->parent[v] != TSK_NULL) {
v = self->parent[v];
}
return v;
}
static void
tsk_tree_check_state(const tsk_tree_t *self)
{
tsk_id_t u, v;
tsk_size_t j, num_samples;
int err, c;
tsk_site_t site;
tsk_id_t *children = tsk_malloc(self->num_nodes * sizeof(tsk_id_t));
bool *is_root = tsk_calloc(self->num_nodes, sizeof(bool));
tsk_bug_assert(children != NULL);
/* Check the virtual root properties */
tsk_bug_assert(self->parent[self->virtual_root] == TSK_NULL);
tsk_bug_assert(self->left_sib[self->virtual_root] == TSK_NULL);
tsk_bug_assert(self->right_sib[self->virtual_root] == TSK_NULL);
for (j = 0; j < self->tree_sequence->num_samples; j++) {
u = self->samples[j];
while (self->parent[u] != TSK_NULL) {
u = self->parent[u];
}
is_root[u] = true;
}
if (self->tree_sequence->num_samples == 0) {
tsk_bug_assert(self->left_child[self->virtual_root] == TSK_NULL);
}
/* Iterate over the roots and make sure they are set */
for (u = tsk_tree_get_left_root(self); u != TSK_NULL; u = self->right_sib[u]) {
tsk_bug_assert(is_root[u]);
is_root[u] = false;
}
for (u = 0; u < (tsk_id_t) self->num_nodes; u++) {
tsk_bug_assert(!is_root[u]);
c = 0;
for (v = self->left_child[u]; v != TSK_NULL; v = self->right_sib[v]) {
tsk_bug_assert(self->parent[v] == u);
children[c] = v;
c++;
}
for (v = self->right_child[u]; v != TSK_NULL; v = self->left_sib[v]) {
tsk_bug_assert(c > 0);
c--;
tsk_bug_assert(v == children[c]);
}
}
for (j = 0; j < self->sites_length; j++) {
site = self->sites[j];
tsk_bug_assert(self->interval.left <= site.position);
tsk_bug_assert(site.position < self->interval.right);
}
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
tsk_bug_assert(self->num_samples != NULL);
tsk_bug_assert(self->num_tracked_samples != NULL);
for (u = 0; u < (tsk_id_t) self->num_nodes; u++) {
err = tsk_tree_get_num_samples_by_traversal(self, u, &num_samples);
tsk_bug_assert(err == 0);
tsk_bug_assert(num_samples == (tsk_size_t) self->num_samples[u]);
}
} else {
tsk_bug_assert(self->num_samples == NULL);
tsk_bug_assert(self->num_tracked_samples == NULL);
}
if (self->options & TSK_SAMPLE_LISTS) {
tsk_bug_assert(self->right_sample != NULL);
tsk_bug_assert(self->left_sample != NULL);
tsk_bug_assert(self->next_sample != NULL);
} else {
tsk_bug_assert(self->right_sample == NULL);
tsk_bug_assert(self->left_sample == NULL);
tsk_bug_assert(self->next_sample == NULL);
}
free(children);
free(is_root);
}
void
tsk_tree_print_state(const tsk_tree_t *self, FILE *out)
{
tsk_size_t j;
tsk_site_t site;
fprintf(out, "Tree state:\n");
fprintf(out, "options = %d\n", self->options);
fprintf(out, "root_threshold = %lld\n", (long long) self->root_threshold);
fprintf(out, "left = %f\n", self->interval.left);
fprintf(out, "right = %f\n", self->interval.right);
fprintf(out, "index = %lld\n", (long long) self->index);
fprintf(out, "num_edges = %d\n", (int) self->num_edges);
fprintf(out, "node\tedge\tparent\tlchild\trchild\tlsib\trsib");
if (self->options & TSK_SAMPLE_LISTS) {
fprintf(out, "\thead\ttail");
}
fprintf(out, "\n");
for (j = 0; j < self->num_nodes + 1; j++) {
fprintf(out, "%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld", (long long) j,
(long long) self->edge[j], (long long) self->parent[j],
(long long) self->left_child[j], (long long) self->right_child[j],
(long long) self->left_sib[j], (long long) self->right_sib[j]);
if (self->options & TSK_SAMPLE_LISTS) {
fprintf(out, "\t%lld\t%lld\t", (long long) self->left_sample[j],
(long long) self->right_sample[j]);
}
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
fprintf(out, "\t%lld\t%lld", (long long) self->num_samples[j],
(long long) self->num_tracked_samples[j]);
}
fprintf(out, "\n");
}
fprintf(out, "sites = \n");
for (j = 0; j < self->sites_length; j++) {
site = self->sites[j];
fprintf(out, "\t%lld\t%f\n", (long long) site.id, site.position);
}
tsk_tree_check_state(self);
}
/* Methods for positioning the tree along the sequence */
/* The following methods are performance sensitive and so we use a
* lot of restrict pointers. Because we are saying that we don't have
* any aliases to these pointers, we pass around the reference to parent
* since it's used in all the functions. */
static inline void
tsk_tree_update_sample_lists(
tsk_tree_t *self, tsk_id_t node, const tsk_id_t *restrict parent)
{
tsk_id_t u, v, sample_index;
tsk_id_t *restrict left_child = self->left_child;
tsk_id_t *restrict right_sib = self->right_sib;
tsk_id_t *restrict left = self->left_sample;
tsk_id_t *restrict right = self->right_sample;
tsk_id_t *restrict next = self->next_sample;
const tsk_id_t *restrict sample_index_map = self->tree_sequence->sample_index_map;
for (u = node; u != TSK_NULL; u = parent[u]) {
sample_index = sample_index_map[u];
if (sample_index != TSK_NULL) {
right[u] = left[u];
} else {
left[u] = TSK_NULL;
right[u] = TSK_NULL;
}
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
if (left[v] != TSK_NULL) {
tsk_bug_assert(right[v] != TSK_NULL);
if (left[u] == TSK_NULL) {
left[u] = left[v];
right[u] = right[v];
} else {
next[right[u]] = left[v];
right[u] = right[v];
}
}
}
}
}
static inline void
tsk_tree_remove_branch(
tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t *restrict parent)
{
tsk_id_t *restrict left_child = self->left_child;
tsk_id_t *restrict right_child = self->right_child;
tsk_id_t *restrict left_sib = self->left_sib;
tsk_id_t *restrict right_sib = self->right_sib;
tsk_id_t *restrict num_children = self->num_children;
tsk_id_t lsib = left_sib[c];
tsk_id_t rsib = right_sib[c];
if (lsib == TSK_NULL) {
left_child[p] = rsib;
} else {
right_sib[lsib] = rsib;
}
if (rsib == TSK_NULL) {
right_child[p] = lsib;
} else {
left_sib[rsib] = lsib;
}
parent[c] = TSK_NULL;
left_sib[c] = TSK_NULL;
right_sib[c] = TSK_NULL;
num_children[p]--;
}
static inline void
tsk_tree_insert_branch(
tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t *restrict parent)
{
tsk_id_t *restrict left_child = self->left_child;
tsk_id_t *restrict right_child = self->right_child;
tsk_id_t *restrict left_sib = self->left_sib;
tsk_id_t *restrict right_sib = self->right_sib;
tsk_id_t *restrict num_children = self->num_children;
tsk_id_t u;
parent[c] = p;
u = right_child[p];
if (u == TSK_NULL) {
left_child[p] = c;
left_sib[c] = TSK_NULL;
right_sib[c] = TSK_NULL;
} else {
right_sib[u] = c;
left_sib[c] = u;
right_sib[c] = TSK_NULL;
}
right_child[p] = c;
num_children[p]++;
}
static inline void
tsk_tree_insert_root(tsk_tree_t *self, tsk_id_t root, tsk_id_t *restrict parent)
{
tsk_tree_insert_branch(self, self->virtual_root, root, parent);
parent[root] = TSK_NULL;
}
static inline void
tsk_tree_remove_root(tsk_tree_t *self, tsk_id_t root, tsk_id_t *restrict parent)
{
tsk_tree_remove_branch(self, self->virtual_root, root, parent);
}
static void
tsk_tree_remove_edge(
tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t TSK_UNUSED(edge_id))
{
tsk_id_t *restrict parent = self->parent;
tsk_size_t *restrict num_samples = self->num_samples;
tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples;
tsk_id_t *restrict edge = self->edge;
const tsk_size_t root_threshold = self->root_threshold;
tsk_id_t u;
tsk_id_t path_end = TSK_NULL;
bool path_end_was_root = false;
#define POTENTIAL_ROOT(U) (num_samples[U] >= root_threshold)
tsk_tree_remove_branch(self, p, c, parent);
self->num_edges--;
edge[c] = TSK_NULL;
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
u = p;
while (u != TSK_NULL) {
path_end = u;
path_end_was_root = POTENTIAL_ROOT(u);
num_samples[u] -= num_samples[c];
num_tracked_samples[u] -= num_tracked_samples[c];
u = parent[u];
}
if (path_end_was_root && !POTENTIAL_ROOT(path_end)) {
tsk_tree_remove_root(self, path_end, parent);
}
if (POTENTIAL_ROOT(c)) {
tsk_tree_insert_root(self, c, parent);
}
}
if (self->options & TSK_SAMPLE_LISTS) {
tsk_tree_update_sample_lists(self, p, parent);
}
}
static void
tsk_tree_insert_edge(tsk_tree_t *self, tsk_id_t p, tsk_id_t c, tsk_id_t edge_id)
{
tsk_id_t *restrict parent = self->parent;
tsk_size_t *restrict num_samples = self->num_samples;
tsk_size_t *restrict num_tracked_samples = self->num_tracked_samples;
tsk_id_t *restrict edge = self->edge;
const tsk_size_t root_threshold = self->root_threshold;
tsk_id_t u;
tsk_id_t path_end = TSK_NULL;
bool path_end_was_root = false;
#define POTENTIAL_ROOT(U) (num_samples[U] >= root_threshold)
if (!(self->options & TSK_NO_SAMPLE_COUNTS)) {
u = p;
while (u != TSK_NULL) {
path_end = u;
path_end_was_root = POTENTIAL_ROOT(u);
num_samples[u] += num_samples[c];
num_tracked_samples[u] += num_tracked_samples[c];
u = parent[u];
}
if (POTENTIAL_ROOT(c)) {
tsk_tree_remove_root(self, c, parent);
}
if (POTENTIAL_ROOT(path_end) && !path_end_was_root) {
tsk_tree_insert_root(self, path_end, parent);
}
}
tsk_tree_insert_branch(self, p, c, parent);
self->num_edges++;
edge[c] = edge_id;
if (self->options & TSK_SAMPLE_LISTS) {
tsk_tree_update_sample_lists(self, p, parent);
}
}
int TSK_WARN_UNUSED
tsk_tree_first(tsk_tree_t *self)
{
int ret = TSK_TREE_OK;
ret = tsk_tree_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_tree_next(self);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_last(tsk_tree_t *self)
{
int ret = TSK_TREE_OK;
ret = tsk_tree_clear(self);
if (ret != 0) {
goto out;
}
ret = tsk_tree_prev(self);
out:
return ret;
}
static void
tsk_tree_update_index_and_interval(tsk_tree_t *self)
{
tsk_table_collection_t *tables = self->tree_sequence->tables;
self->index = self->tree_pos.index;
self->interval.left = self->tree_pos.interval.left;
self->interval.right = self->tree_pos.interval.right;
if (tables->sites.num_rows > 0) {
self->sites = self->tree_sequence->tree_sites[self->index];
self->sites_length = self->tree_sequence->tree_sites_length[self->index];
}
}
int TSK_WARN_UNUSED
tsk_tree_next(tsk_tree_t *self)
{
int ret = 0;
tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
tsk_id_t j, e;
tsk_tree_position_t tree_pos;
bool valid;
valid = tsk_tree_position_next(&self->tree_pos);
tree_pos = self->tree_pos;
if (valid) {
for (j = tree_pos.out.start; j != tree_pos.out.stop; j++) {
e = tree_pos.out.order[j];
tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);
}
for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
ret = TSK_TREE_OK;
tsk_tree_update_index_and_interval(self);
} else {
ret = tsk_tree_clear(self);
}
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_prev(tsk_tree_t *self)
{
int ret = 0;
tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
tsk_id_t j, e;
tsk_tree_position_t tree_pos;
bool valid;
valid = tsk_tree_position_prev(&self->tree_pos);
tree_pos = self->tree_pos;
if (valid) {
for (j = tree_pos.out.start; j != tree_pos.out.stop; j--) {
e = tree_pos.out.order[j];
tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);
}
for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {
e = tree_pos.in.order[j];
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
ret = TSK_TREE_OK;
tsk_tree_update_index_and_interval(self);
} else {
ret = tsk_tree_clear(self);
}
return ret;
}
static inline bool
tsk_tree_position_in_interval(const tsk_tree_t *self, double x)
{
return self->interval.left <= x && x < self->interval.right;
}
static int
tsk_tree_seek_from_null(tsk_tree_t *self, double x, tsk_flags_t TSK_UNUSED(options))
{
int ret = 0;
tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
const double *restrict edge_left = tables->edges.left;
const double *restrict edge_right = tables->edges.right;
double interval_left, interval_right;
const double *restrict breakpoints = self->tree_sequence->breakpoints;
const tsk_size_t num_trees = self->tree_sequence->num_trees;
const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);
tsk_id_t j, e, index;
tsk_tree_position_t tree_pos;
index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, x);
if (breakpoints[index] > x) {
index--;
}
if (x <= L / 2.0) {
ret = tsk_tree_position_seek_forward(&self->tree_pos, index);
if (ret != 0) {
goto out;
}
// Since we are seeking from null, there are no edges to remove
tree_pos = self->tree_pos;
interval_left = tree_pos.interval.left;
for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
if (edge_left[e] <= interval_left && interval_left < edge_right[e]) {
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
}
} else {
ret = tsk_tree_position_seek_backward(&self->tree_pos, index);
if (ret != 0) {
goto out;
}
tree_pos = self->tree_pos;
interval_right = tree_pos.interval.right;
for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {
e = tree_pos.in.order[j];
if (edge_right[e] >= interval_right && interval_right > edge_left[e]) {
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
}
}
tsk_tree_update_index_and_interval(self);
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_tree_seek_forward(tsk_tree_t *self, tsk_id_t index)
{
int ret = 0;
tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
const double *restrict edge_left = tables->edges.left;
const double *restrict edge_right = tables->edges.right;
double interval_left, e_left;
const double old_right = self->interval.right;
tsk_id_t j, e;
tsk_tree_position_t tree_pos;
ret = tsk_tree_position_seek_forward(&self->tree_pos, index);
if (ret != 0) {
goto out;
}
tree_pos = self->tree_pos;
interval_left = tree_pos.interval.left;
for (j = tree_pos.out.start; j != tree_pos.out.stop; j++) {
e = tree_pos.out.order[j];
e_left = edge_left[e];
if (e_left < old_right) {
tsk_bug_assert(edge_parent[e] != TSK_NULL);
tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);
}
tsk_bug_assert(e_left < interval_left);
}
for (j = tree_pos.in.start; j != tree_pos.in.stop; j++) {
e = tree_pos.in.order[j];
if (edge_left[e] <= interval_left && interval_left < edge_right[e]) {
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
}
tsk_tree_update_index_and_interval(self);
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_tree_seek_backward(tsk_tree_t *self, tsk_id_t index)
{
int ret = 0;
tsk_table_collection_t *tables = self->tree_sequence->tables;
const tsk_id_t *restrict edge_parent = tables->edges.parent;
const tsk_id_t *restrict edge_child = tables->edges.child;
const double *restrict edge_left = tables->edges.left;
const double *restrict edge_right = tables->edges.right;
double interval_right, e_right;
const double old_right = self->interval.right;
tsk_id_t j, e;
tsk_tree_position_t tree_pos;
ret = tsk_tree_position_seek_backward(&self->tree_pos, index);
if (ret != 0) {
goto out;
}
tree_pos = self->tree_pos;
interval_right = tree_pos.interval.right;
for (j = tree_pos.out.start; j != tree_pos.out.stop; j--) {
e = tree_pos.out.order[j];
e_right = edge_right[e];
if (e_right >= old_right) {
tsk_bug_assert(edge_parent[e] != TSK_NULL);
tsk_tree_remove_edge(self, edge_parent[e], edge_child[e], e);
}
tsk_bug_assert(e_right > interval_right);
}
for (j = tree_pos.in.start; j != tree_pos.in.stop; j--) {
e = tree_pos.in.order[j];
if (edge_right[e] >= interval_right && interval_right > edge_left[e]) {
tsk_tree_insert_edge(self, edge_parent[e], edge_child[e], e);
}
}
tsk_tree_update_index_and_interval(self);
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_seek_index(tsk_tree_t *self, tsk_id_t tree, tsk_flags_t options)
{
int ret = 0;
double x;
if (tree < 0 || tree >= (tsk_id_t) self->tree_sequence->num_trees) {
ret = tsk_trace_error(TSK_ERR_SEEK_OUT_OF_BOUNDS);
goto out;
}
x = self->tree_sequence->breakpoints[tree];
ret = tsk_tree_seek(self, x, options);
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_tree_seek_linear(tsk_tree_t *self, double x)
{
const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);
const double t_l = self->interval.left;
const double t_r = self->interval.right;
int ret = 0;
double distance_left, distance_right;
if (x < t_l) {
/* |-----|-----|========|---------| */
/* 0 x t_l t_r L */
distance_left = t_l - x;
distance_right = L - t_r + x;
} else {
/* |------|========|------|-------| */
/* 0 t_l t_r x L */
distance_right = x - t_r;
distance_left = t_l + L - x;
}
if (distance_right <= distance_left) {
while (!tsk_tree_position_in_interval(self, x)) {
ret = tsk_tree_next(self);
if (ret < 0) {
goto out;
}
}
} else {
while (!tsk_tree_position_in_interval(self, x)) {
ret = tsk_tree_prev(self);
if (ret < 0) {
goto out;
}
}
}
ret = 0;
out:
return ret;
}
static int TSK_WARN_UNUSED
tsk_tree_seek_skip(tsk_tree_t *self, double x)
{
const double t_l = self->interval.left;
int ret = 0;
tsk_id_t index;
const tsk_size_t num_trees = self->tree_sequence->num_trees;
const double *restrict breakpoints = self->tree_sequence->breakpoints;
index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, x);
if (breakpoints[index] > x) {
index--;
}
if (x < t_l) {
ret = tsk_tree_seek_backward(self, index);
} else {
ret = tsk_tree_seek_forward(self, index);
}
tsk_bug_assert(tsk_tree_position_in_interval(self, x));
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_seek(tsk_tree_t *self, double x, tsk_flags_t options)
{
int ret = 0;
const double L = tsk_treeseq_get_sequence_length(self->tree_sequence);
if (x < 0 || x >= L) {
ret = tsk_trace_error(TSK_ERR_SEEK_OUT_OF_BOUNDS);
goto out;
}
if (self->index == -1) {
ret = tsk_tree_seek_from_null(self, x, options);
} else {
if (options & TSK_SEEK_SKIP) {
ret = tsk_tree_seek_skip(self, x);
} else {
ret = tsk_tree_seek_linear(self, x);
}
}
out:
return ret;
}
int TSK_WARN_UNUSED
tsk_tree_clear(tsk_tree_t *self)
{
int ret = 0;
tsk_size_t j;
tsk_id_t u;
const tsk_size_t N = self->num_nodes + 1;
const tsk_size_t num_samples = self->tree_sequence->num_samples;
const bool sample_counts = !(self->options & TSK_NO_SAMPLE_COUNTS);
const bool sample_lists = !!(self->options & TSK_SAMPLE_LISTS);
const tsk_flags_t *flags = self->tree_sequence->tables->nodes.flags;
self->interval.left = 0;
self->interval.right = 0;
self->num_edges = 0;
self->index = -1;
tsk_tree_position_set_null(&self->tree_pos);
/* TODO we should profile this method to see if just doing a single loop over
* the nodes would be more efficient than multiple memsets.
*/
tsk_memset(self->parent, 0xff, N * sizeof(*self->parent));
tsk_memset(self->left_child, 0xff, N * sizeof(*self->left_child));
tsk_memset(self->right_child, 0xff, N * sizeof(*self->right_child));
tsk_memset(self->left_sib, 0xff, N * sizeof(*self->left_sib));
tsk_memset(self->right_sib, 0xff, N * sizeof(*self->right_sib));
tsk_memset(self->num_children, 0, N * sizeof(*self->num_children));
tsk_memset(self->edge, 0xff, N * sizeof(*self->edge));
if (sample_counts) {
tsk_memset(self->num_samples, 0, N * sizeof(*self->num_samples));
/* We can't reset the tracked samples via memset because we don't
* know where the tracked samples are.
*/
for (j = 0; j < self->num_nodes; j++) {
if (!(flags[j] & TSK_NODE_IS_SAMPLE)) {
self->num_tracked_samples[j] = 0;
}
}
/* The total tracked_samples gets set in set_tracked_samples */
self->num_samples[self->virtual_root] = num_samples;
}
if (sample_lists) {
tsk_memset(self->left_sample, 0xff, N * sizeof(tsk_id_t));
tsk_memset(self->right_sample, 0xff, N * sizeof(tsk_id_t));
tsk_memset(self->next_sample, 0xff, num_samples * sizeof(tsk_id_t));
}
/* Set the sample attributes */
for (j = 0; j < num_samples; j++) {
u = self->samples[j];
if (sample_counts) {
self->num_samples[u] = 1;
}
if (sample_lists) {
/* We are mapping to *indexes* into the list of samples here */
self->left_sample[u] = (tsk_id_t) j;
self->right_sample[u] = (tsk_id_t) j;
}
}
if (sample_counts && self->root_threshold == 1 && num_samples > 0) {
for (j = 0; j < num_samples; j++) {
/* Set initial roots */
if (self->root_threshold == 1) {
tsk_tree_insert_root(self, self->samples[j], self->parent);
}
}
}
return ret;
}
tsk_size_t
tsk_tree_get_size_bound(const tsk_tree_t *self)
{
tsk_size_t bound = 0;
if (self->tree_sequence != NULL) {
/* This is a safe upper bound which can be computed cheaply.
* We have at most n roots and each edge adds at most one new
* node to the tree. We also allow space for the virtual root,
* to simplify client code.
*
* In the common case of a binary tree with a single root, we have
* 2n - 1 nodes in total, and 2n - 2 edges. Therefore, we return
* 3n - 1, which is an over-estimate of 1/2 and we allocate
* 1.5 times as much memory as we need.
*
* Since tracking the exact number of nodes in the tree would require
* storing the number of nodes beneath every node and complicate
* the tree transition method, this seems like a good compromise
* and will result in less memory usage overall in nearly all cases.
*/
bound = 1 + self->tree_sequence->num_samples + self->num_edges;
}
return bound;
}
/* Traversal orders */
static tsk_id_t *
tsk_tree_alloc_node_stack(const tsk_tree_t *self)
{
return tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(tsk_id_t));
}
int
tsk_tree_preorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)
{
return tsk_tree_preorder_from(self, -1, nodes, num_nodes_ret);
}
int
tsk_tree_preorder_from(
const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)
{
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
tsk_id_t *stack = tsk_tree_alloc_node_stack(self);
tsk_size_t num_nodes = 0;
tsk_id_t u, v;
int stack_top;
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if ((root == -1 || root == self->virtual_root)
&& !tsk_tree_has_sample_counts(self)) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
if (root == -1) {
stack_top = -1;
for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {
stack_top++;
stack[stack_top] = u;
}
} else {
ret = tsk_tree_check_node(self, root);
if (ret != 0) {
goto out;
}
stack_top = 0;
stack[stack_top] = root;
}
while (stack_top >= 0) {
u = stack[stack_top];
stack_top--;
nodes[num_nodes] = u;
num_nodes++;
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
stack_top++;
stack[stack_top] = v;
}
}
*num_nodes_ret = num_nodes;
out:
tsk_safe_free(stack);
return ret;
}
/* We could implement this using the preorder function, but since it's
* going to be performance critical we want to avoid the overhead
* of mallocing the intermediate node list (which will be bigger than
* the number of samples). */
int
tsk_tree_preorder_samples_from(
const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)
{
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
const tsk_flags_t *restrict flags = self->tree_sequence->tables->nodes.flags;
tsk_id_t *stack = tsk_tree_alloc_node_stack(self);
tsk_size_t num_nodes = 0;
tsk_id_t u, v;
int stack_top;
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
/* We could push the virtual_root onto the stack directly to simplify
* the code a little, but then we'd have to check put an extra check
* when looking up the flags array (which isn't defined for virtual_root).
*/
if (root == -1 || root == self->virtual_root) {
if (!tsk_tree_has_sample_counts(self)) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
stack_top = -1;
for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {
stack_top++;
stack[stack_top] = u;
}
} else {
ret = tsk_tree_check_node(self, root);
if (ret != 0) {
goto out;
}
stack_top = 0;
stack[stack_top] = root;
}
while (stack_top >= 0) {
u = stack[stack_top];
stack_top--;
if (flags[u] & TSK_NODE_IS_SAMPLE) {
nodes[num_nodes] = u;
num_nodes++;
}
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
stack_top++;
stack[stack_top] = v;
}
}
*num_nodes_ret = num_nodes;
out:
tsk_safe_free(stack);
return ret;
}
int
tsk_tree_postorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)
{
return tsk_tree_postorder_from(self, -1, nodes, num_nodes_ret);
}
int
tsk_tree_postorder_from(
const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes_ret)
{
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
const tsk_id_t *restrict parent = self->parent;
tsk_id_t *stack = tsk_tree_alloc_node_stack(self);
tsk_size_t num_nodes = 0;
tsk_id_t u, v, postorder_parent;
int stack_top;
bool is_virtual_root = root == self->virtual_root;
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (root == -1 || is_virtual_root) {
if (!tsk_tree_has_sample_counts(self)) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_OPERATION);
goto out;
}
stack_top = -1;
for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {
stack_top++;
stack[stack_top] = u;
}
} else {
ret = tsk_tree_check_node(self, root);
if (ret != 0) {
goto out;
}
stack_top = 0;
stack[stack_top] = root;
}
postorder_parent = TSK_NULL;
while (stack_top >= 0) {
u = stack[stack_top];
if (right_child[u] != TSK_NULL && u != postorder_parent) {
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
stack_top++;
stack[stack_top] = v;
}
} else {
stack_top--;
postorder_parent = parent[u];
nodes[num_nodes] = u;
num_nodes++;
}
}
if (is_virtual_root) {
nodes[num_nodes] = root;
num_nodes++;
}
*num_nodes_ret = num_nodes;
out:
tsk_safe_free(stack);
return ret;
}
/* Balance/imbalance metrics */
/* Result is a tsk_size_t value here because we could imagine the total
* depth overflowing a 32bit integer for a large tree. */
int
tsk_tree_sackin_index(const tsk_tree_t *self, tsk_size_t *result)
{
/* Keep the size of the stack elements to 8 bytes in total in the
* standard case. A tsk_id_t depth value is always safe, since
* depth counts the number of nodes encountered on a path.
*/
struct stack_elem {
tsk_id_t node;
tsk_id_t depth;
};
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
struct stack_elem *stack
= tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*stack));
int stack_top;
tsk_size_t total_depth;
tsk_id_t u;
struct stack_elem s = { .node = TSK_NULL, .depth = 0 };
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
stack_top = -1;
for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {
stack_top++;
s.node = u;
stack[stack_top] = s;
}
total_depth = 0;
while (stack_top >= 0) {
s = stack[stack_top];
stack_top--;
u = right_child[s.node];
if (u == TSK_NULL) {
total_depth += (tsk_size_t) s.depth;
} else {
s.depth++;
while (u != TSK_NULL) {
stack_top++;
s.node = u;
stack[stack_top] = s;
u = left_sib[u];
}
}
}
*result = total_depth;
out:
tsk_safe_free(stack);
return ret;
}
int
tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result)
{
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
tsk_id_t *num_leaves = tsk_calloc(self->num_nodes, sizeof(*num_leaves));
tsk_size_t j, num_nodes, total;
tsk_id_t num_children, u, v;
if (nodes == NULL || num_leaves == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (tsk_tree_get_num_roots(self) != 1) {
ret = tsk_trace_error(TSK_ERR_UNDEFINED_MULTIROOT);
goto out;
}
ret = tsk_tree_postorder(self, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
total = 0;
for (j = 0; j < num_nodes; j++) {
u = nodes[j];
/* Cheaper to compute this on the fly than to access the num_children array.
* since we're already iterating over the children. */
num_children = 0;
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
num_children++;
num_leaves[u] += num_leaves[v];
}
if (num_children == 0) {
num_leaves[u] = 1;
} else if (num_children == 2) {
v = right_child[u];
total += (tsk_size_t) llabs(num_leaves[v] - num_leaves[left_sib[v]]);
} else {
ret = tsk_trace_error(TSK_ERR_UNDEFINED_NONBINARY);
goto out;
}
}
*result = total;
out:
tsk_safe_free(nodes);
tsk_safe_free(num_leaves);
return ret;
}
int
tsk_tree_b1_index(const tsk_tree_t *self, double *result)
{
int ret = 0;
const tsk_id_t *restrict parent = self->parent;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
tsk_size_t *max_path_length = tsk_calloc(self->num_nodes, sizeof(*max_path_length));
tsk_size_t j, num_nodes, mpl;
double total = 0.0;
tsk_id_t u, v;
if (nodes == NULL || max_path_length == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = tsk_tree_postorder(self, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_nodes; j++) {
u = nodes[j];
if (parent[u] != TSK_NULL && right_child[u] != TSK_NULL) {
mpl = 0;
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
mpl = TSK_MAX(mpl, max_path_length[v]);
}
max_path_length[u] = mpl + 1;
total += 1 / (double) max_path_length[u];
}
}
*result = total;
out:
tsk_safe_free(nodes);
tsk_safe_free(max_path_length);
return ret;
}
static double
general_log(double x, double base)
{
return log(x) / log(base);
}
int
tsk_tree_b2_index(const tsk_tree_t *self, double base, double *result)
{
struct stack_elem {
tsk_id_t node;
double path_product;
};
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
struct stack_elem *stack
= tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*stack));
int stack_top;
double total_proba = 0;
double num_children;
tsk_id_t u;
struct stack_elem s = { .node = TSK_NULL, .path_product = 1 };
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (tsk_tree_get_num_roots(self) != 1) {
ret = tsk_trace_error(TSK_ERR_UNDEFINED_MULTIROOT);
goto out;
}
stack_top = 0;
s.node = tsk_tree_get_left_root(self);
stack[stack_top] = s;
while (stack_top >= 0) {
s = stack[stack_top];
stack_top--;
u = right_child[s.node];
if (u == TSK_NULL) {
total_proba -= s.path_product * general_log(s.path_product, base);
} else {
num_children = 0;
for (; u != TSK_NULL; u = left_sib[u]) {
num_children++;
}
s.path_product *= 1 / num_children;
for (u = right_child[s.node]; u != TSK_NULL; u = left_sib[u]) {
stack_top++;
s.node = u;
stack[stack_top] = s;
}
}
}
*result = total_proba;
out:
tsk_safe_free(stack);
return ret;
}
int
tsk_tree_num_lineages(const tsk_tree_t *self, double t, tsk_size_t *result)
{
int ret = 0;
const tsk_id_t *restrict right_child = self->right_child;
const tsk_id_t *restrict left_sib = self->left_sib;
const double *restrict time = self->tree_sequence->tables->nodes.time;
tsk_id_t *stack = tsk_tree_alloc_node_stack(self);
tsk_size_t num_lineages = 0;
int stack_top;
tsk_id_t u, v;
double child_time, parent_time;
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (!tsk_isfinite(t)) {
ret = tsk_trace_error(TSK_ERR_TIME_NONFINITE);
goto out;
}
/* Push the roots onto the stack */
stack_top = -1;
for (u = right_child[self->virtual_root]; u != TSK_NULL; u = left_sib[u]) {
stack_top++;
stack[stack_top] = u;
}
while (stack_top >= 0) {
u = stack[stack_top];
parent_time = time[u];
stack_top--;
for (v = right_child[u]; v != TSK_NULL; v = left_sib[v]) {
child_time = time[v];
/* Only traverse down the tree as far as we need to */
if (child_time > t) {
stack_top++;
stack[stack_top] = v;
} else if (t < parent_time) {
num_lineages++;
}
}
}
*result = num_lineages;
out:
tsk_safe_free(stack);
return ret;
}
/* Parsimony methods */
static inline uint64_t
set_bit(uint64_t value, int32_t bit)
{
return value | (1ULL << bit);
}
static inline bool
bit_is_set(uint64_t value, int32_t bit)
{
return (value & (1ULL << bit)) != 0;
}
static inline int8_t
get_smallest_set_bit(uint64_t v)
{
/* This is an inefficient implementation, there are several better
* approaches. On GCC we can use
* return (uint8_t) (__builtin_ffsll((long long) v) - 1);
*/
uint64_t t = 1;
int8_t r = 0;
assert(v != 0);
while ((v & t) == 0) {
t <<= 1;
r++;
}
return r;
}
#define HARTIGAN_MAX_ALLELES 64
/* This interface is experimental. In the future, we should provide the option to
* use a general cost matrix, in which case we'll use the Sankoff algorithm. For
* now this is unused.
*
* We should also vectorise the function so that several sites can be processed
* at once.
*
* The algorithm used here is Hartigan parsimony, "Minimum Mutation Fits to a
* Given Tree", Biometrics 1973.
*/
int TSK_WARN_UNUSED
tsk_tree_map_mutations(tsk_tree_t *self, int32_t *genotypes,
double *TSK_UNUSED(cost_matrix), tsk_flags_t options, int32_t *r_ancestral_state,
tsk_size_t *r_num_transitions, tsk_state_transition_t **r_transitions)
{
int ret = 0;
struct stack_elem {
tsk_id_t node;
tsk_id_t transition_parent;
int32_t state;
};
const tsk_size_t num_samples = self->tree_sequence->num_samples;
const tsk_id_t *restrict left_child = self->left_child;
const tsk_id_t *restrict right_sib = self->right_sib;
const tsk_size_t N = tsk_treeseq_get_num_nodes(self->tree_sequence);
const tsk_flags_t *restrict node_flags = self->tree_sequence->tables->nodes.flags;
tsk_id_t *nodes = tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*nodes));
/* Note: to use less memory here and to improve cache performance we should
* probably change to allocating exactly the number of nodes returned by
* a preorder traversal, and then lay the memory out in this order. So, we'd
* need a map from node ID to its index in the preorder traversal, but this
* is trivial to compute. Probably doesn't matter so much at the moment
* when we're doing a single site, but it would make a big difference if
* we were vectorising over lots of sites. */
uint64_t *restrict optimal_set = tsk_calloc(N + 1, sizeof(*optimal_set));
struct stack_elem *restrict preorder_stack
= tsk_malloc(tsk_tree_get_size_bound(self) * sizeof(*preorder_stack));
tsk_id_t u, v;
/* The largest possible number of transitions is one over every sample */
tsk_state_transition_t *transitions = tsk_malloc(num_samples * sizeof(*transitions));
int32_t allele, ancestral_state;
int stack_top;
struct stack_elem s;
tsk_size_t j, num_transitions, max_allele_count, num_nodes;
tsk_size_t allele_count[HARTIGAN_MAX_ALLELES];
tsk_size_t non_missing = 0;
int32_t num_alleles = 0;
if (optimal_set == NULL || preorder_stack == NULL || transitions == NULL
|| nodes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < num_samples; j++) {
if (genotypes[j] >= HARTIGAN_MAX_ALLELES || genotypes[j] < TSK_MISSING_DATA) {
ret = tsk_trace_error(TSK_ERR_BAD_GENOTYPE);
goto out;
}
u = self->tree_sequence->samples[j];
if (genotypes[j] == TSK_MISSING_DATA) {
/* All bits set */
optimal_set[u] = UINT64_MAX;
} else {
optimal_set[u] = set_bit(optimal_set[u], genotypes[j]);
num_alleles = TSK_MAX(genotypes[j], num_alleles);
non_missing++;
}
}
if (non_missing == 0) {
ret = tsk_trace_error(TSK_ERR_GENOTYPES_ALL_MISSING);
goto out;
}
num_alleles++;
ancestral_state = 0; /* keep compiler happy */
if (options & TSK_MM_FIXED_ANCESTRAL_STATE) {
ancestral_state = *r_ancestral_state;
if ((ancestral_state < 0) || (ancestral_state >= HARTIGAN_MAX_ALLELES)) {
ret = tsk_trace_error(TSK_ERR_BAD_ANCESTRAL_STATE);
goto out;
} else if (ancestral_state >= num_alleles) {
num_alleles = (int32_t) (ancestral_state + 1);
}
}
ret = tsk_tree_postorder_from(self, self->virtual_root, nodes, &num_nodes);
if (ret != 0) {
goto out;
}
for (j = 0; j < num_nodes; j++) {
u = nodes[j];
tsk_memset(allele_count, 0, ((size_t) num_alleles) * sizeof(*allele_count));
for (v = left_child[u]; v != TSK_NULL; v = right_sib[v]) {
for (allele = 0; allele < num_alleles; allele++) {
allele_count[allele] += bit_is_set(optimal_set[v], allele);
}
}
/* the virtual root has no flags defined */
if (u == (tsk_id_t) N || !(node_flags[u] & TSK_NODE_IS_SAMPLE)) {
max_allele_count = 0;
for (allele = 0; allele < num_alleles; allele++) {
max_allele_count = TSK_MAX(max_allele_count, allele_count[allele]);
}
for (allele = 0; allele < num_alleles; allele++) {
if (allele_count[allele] == max_allele_count) {
optimal_set[u] = set_bit(optimal_set[u], allele);
}
}
}
}
if (!(options & TSK_MM_FIXED_ANCESTRAL_STATE)) {
ancestral_state = get_smallest_set_bit(optimal_set[self->virtual_root]);
} else {
optimal_set[self->virtual_root] = UINT64_MAX;
}
num_transitions = 0;
/* Do a preorder traversal */
preorder_stack[0].node = self->virtual_root;
preorder_stack[0].state = ancestral_state;
preorder_stack[0].transition_parent = TSK_NULL;
stack_top = 0;
while (stack_top >= 0) {
s = preorder_stack[stack_top];
stack_top--;
if (!bit_is_set(optimal_set[s.node], s.state)) {
s.state = get_smallest_set_bit(optimal_set[s.node]);
transitions[num_transitions].node = s.node;
transitions[num_transitions].parent = s.transition_parent;
transitions[num_transitions].state = s.state;
s.transition_parent = (tsk_id_t) num_transitions;
num_transitions++;
}
for (v = left_child[s.node]; v != TSK_NULL; v = right_sib[v]) {
stack_top++;
s.node = v;
preorder_stack[stack_top] = s;
}
}
*r_transitions = transitions;
*r_num_transitions = num_transitions;
*r_ancestral_state = ancestral_state;
transitions = NULL;
out:
tsk_safe_free(transitions);
/* Cannot safe_free because of 'restrict' */
if (optimal_set != NULL) {
free(optimal_set);
}
if (preorder_stack != NULL) {
free(preorder_stack);
}
if (nodes != NULL) {
free(nodes);
}
return ret;
}
/* ======================================================== *
* KC Distance
* ======================================================== */
typedef struct {
tsk_size_t *m;
double *M;
tsk_id_t n;
tsk_id_t N;
} kc_vectors;
static int
kc_vectors_alloc(kc_vectors *self, tsk_id_t n)
{
int ret = 0;
self->n = n;
self->N = (n * (n - 1)) / 2;
self->m = tsk_calloc((size_t) (self->N + self->n), sizeof(*self->m));
self->M = tsk_calloc((size_t) (self->N + self->n), sizeof(*self->M));
if (self->m == NULL || self->M == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
static void
kc_vectors_free(kc_vectors *self)
{
tsk_safe_free(self->m);
tsk_safe_free(self->M);
}
static inline void
update_kc_vectors_single_sample(
const tsk_treeseq_t *ts, kc_vectors *kc_vecs, tsk_id_t u, double time)
{
const tsk_id_t *sample_index_map = ts->sample_index_map;
tsk_id_t u_index = sample_index_map[u];
kc_vecs->m[kc_vecs->N + u_index] = 1;
kc_vecs->M[kc_vecs->N + u_index] = time;
}
static inline void
update_kc_vectors_all_pairs(const tsk_tree_t *tree, kc_vectors *kc_vecs, tsk_id_t u,
tsk_id_t v, tsk_size_t depth, double time)
{
tsk_id_t sample1_index, sample2_index, n1, n2, tmp, pair_index;
const tsk_id_t *restrict left_sample = tree->left_sample;
const tsk_id_t *restrict right_sample = tree->right_sample;
const tsk_id_t *restrict next_sample = tree->next_sample;
tsk_size_t *restrict kc_m = kc_vecs->m;
double *restrict kc_M = kc_vecs->M;
sample1_index = left_sample[u];
while (sample1_index != TSK_NULL) {
sample2_index = left_sample[v];
while (sample2_index != TSK_NULL) {
n1 = sample1_index;
n2 = sample2_index;
if (n1 > n2) {
tmp = n1;
n1 = n2;
n2 = tmp;
}
/* We spend ~40% of our time here because these accesses
* are not in order and gets very poor cache behavior */
pair_index = n2 - n1 - 1 + (-1 * n1 * (n1 - 2 * kc_vecs->n + 1)) / 2;
kc_m[pair_index] = depth;
kc_M[pair_index] = time;
if (sample2_index == right_sample[v]) {
break;
}
sample2_index = next_sample[sample2_index];
}
if (sample1_index == right_sample[u]) {
break;
}
sample1_index = next_sample[sample1_index];
}
}
struct kc_stack_elmt {
tsk_id_t node;
tsk_size_t depth;
};
static int
fill_kc_vectors(const tsk_tree_t *t, kc_vectors *kc_vecs)
{
int stack_top;
tsk_size_t depth;
double time;
const double *times;
struct kc_stack_elmt *stack;
tsk_id_t root, u, c1, c2;
int ret = 0;
const tsk_treeseq_t *ts = t->tree_sequence;
stack = tsk_malloc(tsk_tree_get_size_bound(t) * sizeof(*stack));
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
times = t->tree_sequence->tables->nodes.time;
for (root = tsk_tree_get_left_root(t); root != TSK_NULL; root = t->right_sib[root]) {
stack_top = 0;
stack[stack_top].node = root;
stack[stack_top].depth = 0;
while (stack_top >= 0) {
u = stack[stack_top].node;
depth = stack[stack_top].depth;
stack_top--;
if (tsk_tree_is_sample(t, u)) {
time = tsk_tree_get_branch_length_unsafe(t, u);
update_kc_vectors_single_sample(ts, kc_vecs, u, time);
}
/* Don't bother going deeper if there are no samples under this node */
if (t->left_sample[u] != TSK_NULL) {
for (c1 = t->left_child[u]; c1 != TSK_NULL; c1 = t->right_sib[c1]) {
stack_top++;
stack[stack_top].node = c1;
stack[stack_top].depth = depth + 1;
for (c2 = t->right_sib[c1]; c2 != TSK_NULL; c2 = t->right_sib[c2]) {
time = times[root] - times[u];
update_kc_vectors_all_pairs(t, kc_vecs, c1, c2, depth, time);
}
}
}
}
}
out:
tsk_safe_free(stack);
return ret;
}
static double
norm_kc_vectors(kc_vectors *self, kc_vectors *other, double lambda)
{
double vT1, vT2, distance_sum;
tsk_id_t i;
distance_sum = 0;
for (i = 0; i < self->n + self->N; i++) {
vT1 = ((double) self->m[i] * (1 - lambda)) + (lambda * self->M[i]);
vT2 = ((double) other->m[i] * (1 - lambda)) + (lambda * other->M[i]);
distance_sum += (vT1 - vT2) * (vT1 - vT2);
}
return sqrt(distance_sum);
}
static int
check_kc_distance_tree_inputs(const tsk_tree_t *self)
{
tsk_id_t u, num_nodes, left_child;
int ret = 0;
if (tsk_tree_get_num_roots(self) != 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_ROOTS);
goto out;
}
if (!tsk_tree_has_sample_lists(self)) {
ret = tsk_trace_error(TSK_ERR_NO_SAMPLE_LISTS);
goto out;
}
num_nodes = (tsk_id_t) tsk_treeseq_get_num_nodes(self->tree_sequence);
for (u = 0; u < num_nodes; u++) {
left_child = self->left_child[u];
if (left_child != TSK_NULL && left_child == self->right_child[u]) {
ret = tsk_trace_error(TSK_ERR_UNARY_NODES);
goto out;
}
}
out:
return ret;
}
static int
check_kc_distance_samples_inputs(const tsk_treeseq_t *self, const tsk_treeseq_t *other)
{
const tsk_id_t *samples, *other_samples;
tsk_id_t i, n;
int ret = 0;
if (self->num_samples != other->num_samples) {
ret = tsk_trace_error(TSK_ERR_SAMPLE_SIZE_MISMATCH);
goto out;
}
samples = self->samples;
other_samples = other->samples;
n = (tsk_id_t) self->num_samples;
for (i = 0; i < n; i++) {
if (samples[i] != other_samples[i]) {
ret = tsk_trace_error(TSK_ERR_SAMPLES_NOT_EQUAL);
goto out;
}
}
out:
return ret;
}
int
tsk_tree_kc_distance(
const tsk_tree_t *self, const tsk_tree_t *other, double lambda, double *result)
{
tsk_id_t n, i;
kc_vectors vecs[2];
const tsk_tree_t *trees[2] = { self, other };
int ret = 0;
for (i = 0; i < 2; i++) {
tsk_memset(&vecs[i], 0, sizeof(kc_vectors));
}
ret = check_kc_distance_samples_inputs(self->tree_sequence, other->tree_sequence);
if (ret != 0) {
goto out;
}
for (i = 0; i < 2; i++) {
ret = check_kc_distance_tree_inputs(trees[i]);
if (ret != 0) {
goto out;
}
}
n = (tsk_id_t) self->tree_sequence->num_samples;
for (i = 0; i < 2; i++) {
ret = kc_vectors_alloc(&vecs[i], n);
if (ret != 0) {
goto out;
}
ret = fill_kc_vectors(trees[i], &vecs[i]);
if (ret != 0) {
goto out;
}
}
*result = norm_kc_vectors(&vecs[0], &vecs[1], lambda);
out:
for (i = 0; i < 2; i++) {
kc_vectors_free(&vecs[i]);
}
return ret;
}
static int
check_kc_distance_tree_sequence_inputs(
const tsk_treeseq_t *self, const tsk_treeseq_t *other)
{
int ret = 0;
if (self->tables->sequence_length != other->tables->sequence_length) {
ret = tsk_trace_error(TSK_ERR_SEQUENCE_LENGTH_MISMATCH);
goto out;
}
ret = check_kc_distance_samples_inputs(self, other);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static void
update_kc_pair_with_sample(const tsk_tree_t *self, kc_vectors *kc, tsk_id_t sample,
tsk_size_t *depths, double root_time)
{
tsk_id_t c, p, sib;
double time;
tsk_size_t depth;
double *times = self->tree_sequence->tables->nodes.time;
c = sample;
for (p = self->parent[sample]; p != TSK_NULL; p = self->parent[p]) {
time = root_time - times[p];
depth = depths[p];
for (sib = self->left_child[p]; sib != TSK_NULL; sib = self->right_sib[sib]) {
if (sib != c) {
update_kc_vectors_all_pairs(self, kc, sample, sib, depth, time);
}
}
c = p;
}
}
static int
update_kc_subtree_state(
tsk_tree_t *t, kc_vectors *kc, tsk_id_t u, tsk_size_t *depths, double root_time)
{
int stack_top;
tsk_id_t v, c;
tsk_id_t *stack = NULL;
int ret = 0;
stack = tsk_malloc(tsk_tree_get_size_bound(t) * sizeof(*stack));
if (stack == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
stack_top = 0;
stack[stack_top] = u;
while (stack_top >= 0) {
v = stack[stack_top];
stack_top--;
if (tsk_tree_is_sample(t, v)) {
update_kc_pair_with_sample(t, kc, v, depths, root_time);
}
for (c = t->left_child[v]; c != TSK_NULL; c = t->right_sib[c]) {
if (depths[c] != 0) {
depths[c] = depths[v] + 1;
stack_top++;
stack[stack_top] = c;
}
}
}
out:
tsk_safe_free(stack);
return ret;
}
static int
update_kc_incremental(tsk_tree_t *tree, kc_vectors *kc, tsk_size_t *depths)
{
int ret = 0;
tsk_id_t u, v, e, j;
double root_time, time;
const double *restrict times = tree->tree_sequence->tables->nodes.time;
const tsk_id_t *restrict edges_child = tree->tree_sequence->tables->edges.child;
const tsk_id_t *restrict edges_parent = tree->tree_sequence->tables->edges.parent;
tsk_tree_position_t tree_pos = tree->tree_pos;
/* Update state of detached subtrees */
for (j = tree_pos.out.stop - 1; j >= tree_pos.out.start; j--) {
e = tree_pos.out.order[j];
u = edges_child[e];
depths[u] = 0;
if (tree->parent[u] == TSK_NULL) {
root_time = times[tsk_tree_node_root(tree, u)];
ret = update_kc_subtree_state(tree, kc, u, depths, root_time);
if (ret != 0) {
goto out;
}
}
}
/* Propagate state change down into reattached subtrees. */
for (j = tree_pos.in.stop - 1; j >= tree_pos.in.start; j--) {
e = tree_pos.in.order[j];
u = edges_child[e];
v = edges_parent[e];
tsk_bug_assert(depths[u] == 0);
depths[u] = depths[v] + 1;
root_time = times[tsk_tree_node_root(tree, u)];
ret = update_kc_subtree_state(tree, kc, u, depths, root_time);
if (ret != 0) {
goto out;
}
if (tsk_tree_is_sample(tree, u)) {
time = tsk_tree_get_branch_length_unsafe(tree, u);
update_kc_vectors_single_sample(tree->tree_sequence, kc, u, time);
}
}
out:
return ret;
}
int
tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
double lambda_, double *result)
{
int i;
tsk_id_t n;
tsk_size_t num_nodes;
double left, span, total;
const tsk_treeseq_t *treeseqs[2] = { self, other };
tsk_tree_t trees[2];
kc_vectors kcs[2];
tsk_size_t *depths[2];
int ret = 0;
for (i = 0; i < 2; i++) {
tsk_memset(&trees[i], 0, sizeof(trees[i]));
tsk_memset(&kcs[i], 0, sizeof(kcs[i]));
depths[i] = NULL;
}
ret = check_kc_distance_tree_sequence_inputs(self, other);
if (ret != 0) {
goto out;
}
n = (tsk_id_t) self->num_samples;
for (i = 0; i < 2; i++) {
ret = tsk_tree_init(&trees[i], treeseqs[i], TSK_SAMPLE_LISTS);
if (ret != 0) {
goto out;
}
ret = kc_vectors_alloc(&kcs[i], n);
if (ret != 0) {
goto out;
}
num_nodes = tsk_treeseq_get_num_nodes(treeseqs[i]);
depths[i] = tsk_calloc(num_nodes, sizeof(*depths[i]));
if (depths[i] == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
total = 0;
left = 0;
ret = tsk_tree_first(&trees[0]);
if (ret != TSK_TREE_OK) {
goto out;
}
ret = check_kc_distance_tree_inputs(&trees[0]);
if (ret != 0) {
goto out;
}
ret = update_kc_incremental(&trees[0], &kcs[0], depths[0]);
if (ret != 0) {
goto out;
}
while ((ret = tsk_tree_next(&trees[1])) == TSK_TREE_OK) {
ret = check_kc_distance_tree_inputs(&trees[1]);
if (ret != 0) {
goto out;
}
ret = update_kc_incremental(&trees[1], &kcs[1], depths[1]);
if (ret != 0) {
goto out;
}
while (trees[0].interval.right < trees[1].interval.right) {
span = trees[0].interval.right - left;
total += norm_kc_vectors(&kcs[0], &kcs[1], lambda_) * span;
left = trees[0].interval.right;
ret = tsk_tree_next(&trees[0]);
tsk_bug_assert(ret == TSK_TREE_OK);
ret = check_kc_distance_tree_inputs(&trees[0]);
if (ret != 0) {
goto out;
}
ret = update_kc_incremental(&trees[0], &kcs[0], depths[0]);
if (ret != 0) {
goto out;
}
}
span = trees[1].interval.right - left;
left = trees[1].interval.right;
total += norm_kc_vectors(&kcs[0], &kcs[1], lambda_) * span;
}
if (ret != 0) {
goto out;
}
*result = total / self->tables->sequence_length;
out:
for (i = 0; i < 2; i++) {
tsk_tree_free(&trees[i]);
kc_vectors_free(&kcs[i]);
tsk_safe_free(depths[i]);
}
return ret;
}
/*
* Divergence matrix
*/
typedef struct {
/* Note it's a waste storing the triply linked tree here, but the code
* is written on the assumption of 1-based trees and the algorithm is
* frighteningly subtle, so it doesn't seem worth messing with it
* unless we really need to save some memory */
tsk_id_t *parent;
tsk_id_t *child;
tsk_id_t *sib;
tsk_id_t *lambda;
tsk_id_t *pi;
tsk_id_t *tau;
tsk_id_t *beta;
tsk_id_t *alpha;
} sv_tables_t;
static int
sv_tables_init(sv_tables_t *self, tsk_size_t n)
{
int ret = 0;
self->parent = tsk_malloc(n * sizeof(*self->parent));
self->child = tsk_malloc(n * sizeof(*self->child));
self->sib = tsk_malloc(n * sizeof(*self->sib));
self->pi = tsk_malloc(n * sizeof(*self->pi));
self->lambda = tsk_malloc(n * sizeof(*self->lambda));
self->tau = tsk_malloc(n * sizeof(*self->tau));
self->beta = tsk_malloc(n * sizeof(*self->beta));
self->alpha = tsk_malloc(n * sizeof(*self->alpha));
if (self->parent == NULL || self->child == NULL || self->sib == NULL
|| self->lambda == NULL || self->tau == NULL || self->beta == NULL
|| self->alpha == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
out:
return ret;
}
static int
sv_tables_free(sv_tables_t *self)
{
tsk_safe_free(self->parent);
tsk_safe_free(self->child);
tsk_safe_free(self->sib);
tsk_safe_free(self->lambda);
tsk_safe_free(self->pi);
tsk_safe_free(self->tau);
tsk_safe_free(self->beta);
tsk_safe_free(self->alpha);
return 0;
}
static void
sv_tables_reset(sv_tables_t *self, tsk_tree_t *tree)
{
const tsk_size_t n = 1 + tree->num_nodes;
tsk_memset(self->parent, 0, n * sizeof(*self->parent));
tsk_memset(self->child, 0, n * sizeof(*self->child));
tsk_memset(self->sib, 0, n * sizeof(*self->sib));
tsk_memset(self->pi, 0, n * sizeof(*self->pi));
tsk_memset(self->lambda, 0, n * sizeof(*self->lambda));
tsk_memset(self->tau, 0, n * sizeof(*self->tau));
tsk_memset(self->beta, 0, n * sizeof(*self->beta));
tsk_memset(self->alpha, 0, n * sizeof(*self->alpha));
}
static void
sv_tables_convert_tree(sv_tables_t *self, tsk_tree_t *tree)
{
const tsk_size_t n = 1 + tree->num_nodes;
const tsk_id_t *restrict tsk_parent = tree->parent;
tsk_id_t *restrict child = self->child;
tsk_id_t *restrict parent = self->parent;
tsk_id_t *restrict sib = self->sib;
tsk_size_t j;
tsk_id_t u, v;
for (j = 0; j < n - 1; j++) {
u = (tsk_id_t) j + 1;
v = tsk_parent[j] + 1;
sib[u] = child[v];
child[v] = u;
parent[u] = v;
}
}
#define LAMBDA 0
static void
sv_tables_build_index(sv_tables_t *self)
{
const tsk_id_t *restrict child = self->child;
const tsk_id_t *restrict parent = self->parent;
const tsk_id_t *restrict sib = self->sib;
tsk_id_t *restrict lambda = self->lambda;
tsk_id_t *restrict pi = self->pi;
tsk_id_t *restrict tau = self->tau;
tsk_id_t *restrict beta = self->beta;
tsk_id_t *restrict alpha = self->alpha;
tsk_id_t a, n, p, h;
p = child[LAMBDA];
n = 0;
lambda[0] = -1;
while (p != LAMBDA) {
while (true) {
n++;
pi[p] = n;
tau[n] = LAMBDA;
lambda[n] = 1 + lambda[n >> 1];
if (child[p] != LAMBDA) {
p = child[p];
} else {
break;
}
}
beta[p] = n;
while (true) {
tau[beta[p]] = parent[p];
if (sib[p] != LAMBDA) {
p = sib[p];
break;
} else {
p = parent[p];
if (p != LAMBDA) {
h = lambda[n & -pi[p]];
beta[p] = ((n >> h) | 1) << h;
} else {
break;
}
}
}
}
/* Begin the second traversal */
lambda[0] = lambda[n];
pi[LAMBDA] = 0;
beta[LAMBDA] = 0;
alpha[LAMBDA] = 0;
p = child[LAMBDA];
while (p != LAMBDA) {
while (true) {
a = alpha[parent[p]] | (beta[p] & -beta[p]);
alpha[p] = a;
if (child[p] != LAMBDA) {
p = child[p];
} else {
break;
}
}
while (true) {
if (sib[p] != LAMBDA) {
p = sib[p];
break;
} else {
p = parent[p];
if (p == LAMBDA) {
break;
}
}
}
}
}
static void
sv_tables_build(sv_tables_t *self, tsk_tree_t *tree)
{
sv_tables_reset(self, tree);
sv_tables_convert_tree(self, tree);
sv_tables_build_index(self);
}
static tsk_id_t
sv_tables_mrca_one_based(const sv_tables_t *self, tsk_id_t x, tsk_id_t y)
{
const tsk_id_t *restrict lambda = self->lambda;
const tsk_id_t *restrict pi = self->pi;
const tsk_id_t *restrict tau = self->tau;
const tsk_id_t *restrict beta = self->beta;
const tsk_id_t *restrict alpha = self->alpha;
tsk_id_t h, k, xhat, yhat, ell, j, z;
if (beta[x] <= beta[y]) {
h = lambda[beta[y] & -beta[x]];
} else {
h = lambda[beta[x] & -beta[y]];
}
k = alpha[x] & alpha[y] & -(1 << h);
h = lambda[k & -k];
j = ((beta[x] >> h) | 1) << h;
if (j == beta[x]) {
xhat = x;
} else {
ell = lambda[alpha[x] & ((1 << h) - 1)];
xhat = tau[((beta[x] >> ell) | 1) << ell];
}
if (j == beta[y]) {
yhat = y;
} else {
ell = lambda[alpha[y] & ((1 << h) - 1)];
yhat = tau[((beta[y] >> ell) | 1) << ell];
}
if (pi[xhat] <= pi[yhat]) {
z = xhat;
} else {
z = yhat;
}
return z;
}
static tsk_id_t
sv_tables_mrca(const sv_tables_t *self, tsk_id_t x, tsk_id_t y)
{
/* Convert to 1-based indexes and back */
return sv_tables_mrca_one_based(self, x + 1, y + 1) - 1;
}
static int
tsk_treeseq_divergence_matrix_branch(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *restrict sample_set_sizes,
const tsk_id_t *restrict sample_sets, tsk_size_t num_windows,
const double *restrict windows, tsk_flags_t options, double *restrict result)
{
int ret = 0;
tsk_tree_t tree;
const double *restrict nodes_time = self->tables->nodes.time;
const tsk_size_t N = num_sample_sets;
tsk_size_t i, j, k, offset, sj, sk;
tsk_id_t u, v, w, u_root, v_root;
double tu, tv, d, span, left, right, span_left, span_right;
double *restrict D;
sv_tables_t sv;
tsk_size_t *ss_offsets = tsk_malloc((num_sample_sets + 1) * sizeof(*ss_offsets));
memset(&sv, 0, sizeof(sv));
ret = tsk_tree_init(&tree, self, 0);
if (ret != 0) {
goto out;
}
ret = sv_tables_init(&sv, self->tables->nodes.num_rows + 1);
if (ret != 0) {
goto out;
}
if (ss_offsets == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
if (self->time_uncalibrated && !(options & TSK_STAT_ALLOW_TIME_UNCALIBRATED)) {
ret = tsk_trace_error(TSK_ERR_TIME_UNCALIBRATED);
goto out;
}
ss_offsets[0] = 0;
offset = 0;
for (j = 0; j < N; j++) {
offset += sample_set_sizes[j];
ss_offsets[j + 1] = offset;
}
for (i = 0; i < num_windows; i++) {
left = windows[i];
right = windows[i + 1];
D = result + i * N * N;
ret = tsk_tree_seek(&tree, left, 0);
if (ret != 0) {
goto out;
}
while (tree.interval.left < right && tree.index != -1) {
span_left = TSK_MAX(tree.interval.left, left);
span_right = TSK_MIN(tree.interval.right, right);
span = span_right - span_left;
sv_tables_build(&sv, &tree);
for (sj = 0; sj < N; sj++) {
for (j = ss_offsets[sj]; j < ss_offsets[sj + 1]; j++) {
u = sample_sets[j];
for (sk = sj; sk < N; sk++) {
for (k = ss_offsets[sk]; k < ss_offsets[sk + 1]; k++) {
v = sample_sets[k];
if (u == v) {
/* This case contributes zero to divergence, so
* short-circuit to save time.
* TODO is there a better way to do this? */
continue;
}
w = sv_tables_mrca(&sv, u, v);
if (w != TSK_NULL) {
u_root = w;
v_root = w;
} else {
/* Slow path - only happens for nodes in disconnected
* subtrees in a tree with multiple roots */
u_root = tsk_tree_get_node_root(&tree, u);
v_root = tsk_tree_get_node_root(&tree, v);
}
tu = nodes_time[u_root] - nodes_time[u];
tv = nodes_time[v_root] - nodes_time[v];
d = (tu + tv) * span;
D[sj * N + sk] += d;
}
}
}
}
ret = tsk_tree_next(&tree);
if (ret < 0) {
goto out;
}
}
}
ret = 0;
out:
tsk_tree_free(&tree);
sv_tables_free(&sv);
tsk_safe_free(ss_offsets);
return ret;
}
// FIXME see #2817
// Just including this here for now as it's the simplest option. Everything
// will probably move to stats.[c,h] in the near future though, and it
// can pull in ``genotypes.h`` without issues.
#include
static void
update_site_divergence(const tsk_variant_t *var, const tsk_id_t *restrict A,
const tsk_size_t *restrict offsets, const tsk_size_t num_sample_sets, double *D)
{
const tsk_size_t num_alleles = var->num_alleles;
tsk_size_t a, b, j, k;
tsk_id_t u, v;
double increment;
for (a = 0; a < num_alleles; a++) {
for (b = a + 1; b < num_alleles; b++) {
for (j = offsets[a]; j < offsets[a + 1]; j++) {
for (k = offsets[b]; k < offsets[b + 1]; k++) {
u = A[j];
v = A[k];
/* Only increment the upper triangle to (hopefully) improve memory
* access patterns */
if (u > v) {
u = A[k];
v = A[j];
}
increment = 1;
if (u == v) {
increment = 2;
}
D[u * (tsk_id_t) num_sample_sets + v] += increment;
}
}
}
}
}
static void
group_alleles(const tsk_variant_t *var, tsk_id_t *restrict A, tsk_size_t *offsets)
{
const tsk_size_t n = var->num_samples;
const int32_t *restrict genotypes = var->genotypes;
tsk_id_t a;
tsk_size_t j, k;
k = 0;
offsets[0] = 0;
for (a = 0; a < (tsk_id_t) var->num_alleles; a++) {
offsets[a + 1] = offsets[a];
for (j = 0; j < n; j++) {
if (genotypes[j] == a) {
offsets[a + 1]++;
A[k] = (tsk_id_t) j;
k++;
}
}
}
}
static void
remap_to_sample_sets(const tsk_size_t num_samples, const tsk_id_t *restrict samples,
const tsk_id_t *restrict sample_set_index_map, tsk_id_t *restrict A)
{
tsk_size_t j;
tsk_id_t u;
for (j = 0; j < num_samples; j++) {
u = samples[A[j]];
tsk_bug_assert(u >= 0);
tsk_bug_assert(sample_set_index_map[u] >= 0);
A[j] = sample_set_index_map[u];
}
}
static int
tsk_treeseq_divergence_matrix_site(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_id_t *restrict sample_set_index_map, const tsk_size_t num_samples,
const tsk_id_t *restrict samples, tsk_size_t num_windows,
const double *restrict windows, tsk_flags_t TSK_UNUSED(options),
double *restrict result)
{
int ret = 0;
tsk_size_t i;
tsk_id_t site_id;
double left, right;
double *restrict D;
const tsk_id_t num_sites = (tsk_id_t) self->tables->sites.num_rows;
const double *restrict sites_position = self->tables->sites.position;
tsk_id_t *A = tsk_malloc(num_samples * sizeof(*A));
/* Allocate the allele offsets at the first variant */
tsk_size_t max_alleles = 0;
tsk_size_t *allele_offsets = NULL;
tsk_variant_t variant;
/* FIXME it's not clear that using TSK_ISOLATED_NOT_MISSING is
* correct here */
ret = tsk_variant_init(
&variant, self, samples, num_samples, NULL, TSK_ISOLATED_NOT_MISSING);
if (ret != 0) {
goto out;
}
if (A == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
site_id = 0;
while (site_id < num_sites && sites_position[site_id] < windows[0]) {
site_id++;
}
for (i = 0; i < num_windows; i++) {
left = windows[i];
right = windows[i + 1];
D = result + i * num_sample_sets * num_sample_sets;
if (site_id < num_sites) {
tsk_bug_assert(sites_position[site_id] >= left);
}
while (site_id < num_sites && sites_position[site_id] < right) {
ret = tsk_variant_decode(&variant, site_id, 0);
if (ret != 0) {
goto out;
}
if (variant.num_alleles > max_alleles) {
/* could do some kind of doubling here, but there's no
* point - just keep it simple for testing. */
max_alleles = variant.num_alleles;
tsk_safe_free(allele_offsets);
allele_offsets = tsk_malloc((max_alleles + 1) * sizeof(*allele_offsets));
if (allele_offsets == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
}
group_alleles(&variant, A, allele_offsets);
remap_to_sample_sets(num_samples, samples, sample_set_index_map, A);
update_site_divergence(&variant, A, allele_offsets, num_sample_sets, D);
site_id++;
}
}
ret = 0;
out:
tsk_variant_free(&variant);
tsk_safe_free(A);
tsk_safe_free(allele_offsets);
return ret;
}
/* Return the mapping from node IDs to the index of the sample set
* they belong to, or -1 of none. Error if a node is in more than one
* set.
*/
static int
get_sample_set_index_map(const tsk_treeseq_t *self, const tsk_size_t num_sample_sets,
const tsk_size_t *restrict sample_set_sizes, const tsk_id_t *restrict sample_sets,
tsk_size_t *ret_total_samples, tsk_id_t *restrict node_index_map)
{
int ret = 0;
tsk_size_t i, j, k;
tsk_id_t u;
tsk_size_t total_samples = 0;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_flags_t *restrict node_flags = self->tables->nodes.flags;
for (j = 0; j < num_nodes; j++) {
node_index_map[j] = TSK_NULL;
}
i = 0;
for (j = 0; j < num_sample_sets; j++) {
total_samples += sample_set_sizes[j];
for (k = 0; k < sample_set_sizes[j]; k++) {
u = sample_sets[i];
i++;
if (u < 0 || u >= (tsk_id_t) num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
/* Note: we require nodes to be samples because we have to think
* about how to normalise by the length of genome that the node
* is 'in' the tree for each window otherwise. */
if (!(node_flags[u] & TSK_NODE_IS_SAMPLE)) {
ret = tsk_trace_error(TSK_ERR_BAD_SAMPLES);
goto out;
}
if (node_index_map[u] != TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_DUPLICATE_SAMPLE);
goto out;
}
node_index_map[u] = (tsk_id_t) j;
}
}
*ret_total_samples = total_samples;
out:
return ret;
}
static void
fill_lower_triangle_count_normalise(const tsk_size_t num_windows, const tsk_size_t n,
const tsk_size_t *set_sizes, double *restrict result)
{
tsk_size_t i, j, k;
double denom;
double *restrict D;
/* TODO there's probably a better striding pattern that could be used here */
for (i = 0; i < num_windows; i++) {
D = result + i * n * n;
for (j = 0; j < n; j++) {
denom = (double) set_sizes[j] * (double) (set_sizes[j] - 1);
if (denom != 0) {
D[j * n + j] /= denom;
}
for (k = j + 1; k < n; k++) {
denom = (double) set_sizes[j] * (double) set_sizes[k];
D[j * n + k] /= denom;
D[k * n + j] = D[j * n + k];
}
}
}
}
int
tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_sets_in,
const tsk_size_t *sample_set_sizes_in, const tsk_id_t *sample_sets_in,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result)
{
int ret = 0;
tsk_size_t N, total_samples;
const tsk_size_t *sample_set_sizes;
const tsk_id_t *sample_sets;
tsk_size_t *tmp_sample_set_sizes = NULL;
const double default_windows[] = { 0, self->tables->sequence_length };
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_branch = !!(options & TSK_STAT_BRANCH);
bool stat_node = !!(options & TSK_STAT_NODE);
tsk_id_t *sample_set_index_map
= tsk_malloc(num_nodes * sizeof(*sample_set_index_map));
tsk_size_t j;
if (stat_node) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
/* If no mode is specified, we default to site mode */
if (!(stat_site || stat_branch)) {
stat_site = true;
}
/* It's an error to specify more than one mode */
if (stat_site + stat_branch > 1) {
ret = tsk_trace_error(TSK_ERR_MULTIPLE_STAT_MODES);
goto out;
}
if (options & TSK_STAT_POLARISED) {
ret = tsk_trace_error(TSK_ERR_STAT_POLARISED_UNSUPPORTED);
goto out;
}
if (windows == NULL) {
num_windows = 1;
windows = default_windows;
} else {
ret = tsk_treeseq_check_windows(self, num_windows, windows, 0);
if (ret != 0) {
goto out;
}
}
/* If sample_sets is NULL, use self->samples and ignore input
* num_sample_sets */
sample_sets = sample_sets_in;
N = num_sample_sets_in;
if (sample_sets_in == NULL) {
sample_sets = self->samples;
if (sample_set_sizes_in == NULL) {
N = self->num_samples;
}
}
sample_set_sizes = sample_set_sizes_in;
/* If sample_set_sizes is NULL, assume its N 1S */
if (sample_set_sizes_in == NULL) {
tmp_sample_set_sizes = tsk_malloc(N * sizeof(*tmp_sample_set_sizes));
if (tmp_sample_set_sizes == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < N; j++) {
tmp_sample_set_sizes[j] = 1;
}
sample_set_sizes = tmp_sample_set_sizes;
}
ret = get_sample_set_index_map(
self, N, sample_set_sizes, sample_sets, &total_samples, sample_set_index_map);
if (ret != 0) {
goto out;
}
tsk_memset(result, 0, num_windows * N * N * sizeof(*result));
if (stat_branch) {
ret = tsk_treeseq_divergence_matrix_branch(self, N, sample_set_sizes,
sample_sets, num_windows, windows, options, result);
} else {
tsk_bug_assert(stat_site);
ret = tsk_treeseq_divergence_matrix_site(self, N, sample_set_index_map,
total_samples, sample_sets, num_windows, windows, options, result);
}
if (ret != 0) {
goto out;
}
fill_lower_triangle_count_normalise(num_windows, N, sample_set_sizes, result);
if (options & TSK_STAT_SPAN_NORMALISE) {
span_normalise(num_windows, windows, N * N, result);
}
out:
tsk_safe_free(sample_set_index_map);
tsk_safe_free(tmp_sample_set_sizes);
return ret;
}
/* ======================================================== *
* Extend haplotypes
* ======================================================== */
typedef struct _edge_list_t {
tsk_id_t edge;
// the `extended` flags records whether we have decided to extend
// this entry to the current tree?
int extended;
struct _edge_list_t *next;
} edge_list_t;
static void
edge_list_print(edge_list_t **head, tsk_edge_table_t *edges, FILE *out)
{
int n = 0;
edge_list_t *px;
fprintf(out, "Edge list:\n");
for (px = *head; px != NULL; px = px->next) {
fprintf(out, " %d: %d (%d); ", n, (int) px->edge, px->extended);
if (px->edge >= 0 && edges != NULL) {
fprintf(out, "%d->%d on [%.1f, %.1f)", (int) edges->child[px->edge],
(int) edges->parent[px->edge], edges->left[px->edge],
edges->right[px->edge]);
} else {
fprintf(out, "(null)");
}
fprintf(out, "\n");
n += 1;
}
fprintf(out, "length = %d\n", n);
}
static void
edge_list_append_entry(
edge_list_t **head, edge_list_t **tail, edge_list_t *x, tsk_id_t edge, int extended)
{
x->edge = edge;
x->extended = extended;
x->next = NULL;
if (*tail == NULL) {
*head = x;
} else {
(*tail)->next = x;
}
*tail = x;
}
static void
remove_unextended(edge_list_t **head, edge_list_t **tail)
{
edge_list_t *px, *x;
px = *head;
while (px != NULL && px->extended == 0) {
px = px->next;
}
*head = px;
if (px != NULL) {
px->extended = 0;
x = px->next;
while (x != NULL) {
if (x->extended > 0) {
x->extended = 0;
px->next = x;
px = x;
}
x = x->next;
}
px->next = NULL;
}
*tail = px;
}
static void
edge_list_set_extended(edge_list_t **head, tsk_id_t edge_id)
{
// finds the entry with edge 'edge_id'
// and sets its 'extended' flag to 1
edge_list_t *px;
px = *head;
tsk_bug_assert(px != NULL);
while (px->edge != edge_id) {
px = px->next;
tsk_bug_assert(px != NULL);
}
tsk_bug_assert(px->edge == edge_id);
px->extended = 1;
}
static int
tsk_treeseq_slide_mutation_nodes_up(
const tsk_treeseq_t *self, tsk_mutation_table_t *mutations)
{
int ret = 0;
double t;
tsk_id_t c, p, next_mut;
const tsk_size_t num_nodes = self->tables->nodes.num_rows;
const double *sites_position = self->tables->sites.position;
const double *nodes_time = self->tables->nodes.time;
tsk_tree_t tree;
ret = tsk_tree_init(&tree, self, TSK_NO_SAMPLE_COUNTS);
if (ret != 0) {
goto out;
}
next_mut = 0;
for (ret = tsk_tree_first(&tree); ret == TSK_TREE_OK; ret = tsk_tree_next(&tree)) {
while (next_mut < (tsk_id_t) mutations->num_rows
&& sites_position[mutations->site[next_mut]] < tree.interval.right) {
t = mutations->time[next_mut];
if (tsk_is_unknown_time(t)) {
ret = tsk_trace_error(TSK_ERR_DISALLOWED_UNKNOWN_MUTATION_TIME);
goto out;
}
c = mutations->node[next_mut];
tsk_bug_assert(c < (tsk_id_t) num_nodes);
p = tree.parent[c];
while (p != TSK_NULL && nodes_time[p] <= t) {
c = p;
p = tree.parent[c];
}
tsk_bug_assert(nodes_time[c] <= t);
mutations->node[next_mut] = c;
next_mut++;
}
}
if (ret != 0) {
goto out;
}
out:
tsk_tree_free(&tree);
return ret;
}
typedef struct {
const tsk_treeseq_t *ts;
tsk_edge_table_t *edges;
int direction;
tsk_id_t *last_degree, *next_degree;
tsk_id_t *last_nodes_edge, *next_nodes_edge;
tsk_id_t *parent_out, *parent_in;
bool *not_sample;
double *near_side, *far_side;
edge_list_t *edges_out_head, *edges_out_tail;
edge_list_t *edges_in_head, *edges_in_tail;
tsk_blkalloc_t edge_list_heap;
} haplotype_extender_t;
static int
haplotype_extender_init(haplotype_extender_t *self, const tsk_treeseq_t *ts,
int direction, tsk_edge_table_t *edges)
{
int ret = 0;
tsk_id_t tj;
tsk_size_t num_nodes = tsk_treeseq_get_num_nodes(ts);
tsk_memset(self, 0, sizeof(haplotype_extender_t));
self->ts = ts;
self->edges = edges;
ret = tsk_edge_table_copy(&ts->tables->edges, self->edges, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
self->direction = direction;
if (direction == TSK_DIR_FORWARD) {
self->near_side = self->edges->left;
self->far_side = self->edges->right;
} else {
self->near_side = self->edges->right;
self->far_side = self->edges->left;
}
self->edges_in_head = NULL;
self->edges_in_tail = NULL;
self->edges_out_head = NULL;
self->edges_out_tail = NULL;
ret = tsk_blkalloc_init(&self->edge_list_heap, 8192);
if (ret != 0) {
goto out;
}
self->last_degree = tsk_calloc(num_nodes, sizeof(*self->last_degree));
self->next_degree = tsk_calloc(num_nodes, sizeof(*self->next_degree));
self->last_nodes_edge = tsk_malloc(num_nodes * sizeof(*self->last_nodes_edge));
self->next_nodes_edge = tsk_malloc(num_nodes * sizeof(*self->next_nodes_edge));
self->parent_out = tsk_malloc(num_nodes * sizeof(*self->parent_out));
self->parent_in = tsk_malloc(num_nodes * sizeof(*self->parent_in));
self->not_sample = tsk_malloc(num_nodes * sizeof(*self->not_sample));
if (self->last_degree == NULL || self->next_degree == NULL
|| self->last_nodes_edge == NULL || self->next_nodes_edge == NULL
|| self->parent_out == NULL || self->parent_in == NULL
|| self->not_sample == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(self->last_nodes_edge, 0xff, num_nodes * sizeof(*self->last_nodes_edge));
tsk_memset(self->next_nodes_edge, 0xff, num_nodes * sizeof(*self->next_nodes_edge));
tsk_memset(self->parent_out, 0xff, num_nodes * sizeof(*self->parent_out));
tsk_memset(self->parent_in, 0xff, num_nodes * sizeof(*self->parent_in));
for (tj = 0; tj < (tsk_id_t) num_nodes; tj++) {
self->not_sample[tj] = ((ts->tables->nodes.flags[tj] & TSK_NODE_IS_SAMPLE) == 0);
}
out:
return ret;
}
static void
haplotype_extender_print_state(haplotype_extender_t *self, FILE *out)
{
fprintf(out, "\n======= haplotype extender ===========\n");
fprintf(out, "parent in:\n");
for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {
fprintf(out, " %d: %d\n", j, (int) self->parent_in[j]);
}
fprintf(out, "parent out:\n");
for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {
fprintf(out, " %d: %d\n", j, (int) self->parent_out[j]);
}
fprintf(out, "last nodes edge:\n");
for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {
tsk_id_t ej = self->last_nodes_edge[j];
fprintf(out, " %d: %d, ", j, (int) ej);
if (self->last_nodes_edge[j] != TSK_NULL) {
fprintf(out, "(%d->%d, %.1f-%.1f)", (int) self->edges->child[ej],
(int) self->edges->parent[ej], self->edges->left[ej],
self->edges->right[ej]);
} else {
fprintf(out, "(null);");
}
fprintf(out, "\n");
}
fprintf(out, "next nodes edge:\n");
for (int j = 0; j < (int) self->ts->tables->nodes.num_rows; j++) {
tsk_id_t ej = self->next_nodes_edge[j];
fprintf(out, " %d: %d, ", j, (int) ej);
if (self->next_nodes_edge[j] != TSK_NULL) {
fprintf(out, "(%d->%d, %.1f-%.1f)", (int) self->edges->child[ej],
(int) self->edges->parent[ej], self->edges->left[ej],
self->edges->right[ej]);
} else {
fprintf(out, "(null);");
}
fprintf(out, "\n");
}
fprintf(out, "edges out:\n");
edge_list_print(&self->edges_out_head, self->edges, out);
fprintf(out, "edges in:\n");
edge_list_print(&self->edges_in_head, self->edges, out);
}
static int
haplotype_extender_free(haplotype_extender_t *self)
{
tsk_blkalloc_free(&self->edge_list_heap);
tsk_safe_free(self->last_degree);
tsk_safe_free(self->next_degree);
tsk_safe_free(self->last_nodes_edge);
tsk_safe_free(self->next_nodes_edge);
tsk_safe_free(self->parent_out);
tsk_safe_free(self->parent_in);
tsk_safe_free(self->not_sample);
return 0;
}
static int
haplotype_extender_next_tree(haplotype_extender_t *self, tsk_tree_position_t *tree_pos)
{
int ret = 0;
tsk_id_t tj, e;
edge_list_t *ex_out, *ex_in;
edge_list_t *new_ex;
const tsk_id_t *edges_child = self->edges->child;
const tsk_id_t *edges_parent = self->edges->parent;
for (ex_out = self->edges_out_head; ex_out != NULL; ex_out = ex_out->next) {
e = ex_out->edge;
self->parent_out[edges_child[e]] = TSK_NULL;
// note we only adjust near_side of edges_in, not edges_out,
// so no need to check for zero-length edges
if (ex_out->extended > 1) {
// this is needed to catch newly-created edges
self->last_nodes_edge[edges_child[e]] = e;
self->last_degree[edges_child[e]] += 1;
self->last_degree[edges_parent[e]] += 1;
} else if (ex_out->extended == 0) {
self->last_nodes_edge[edges_child[e]] = TSK_NULL;
self->last_degree[edges_child[e]] -= 1;
self->last_degree[edges_parent[e]] -= 1;
}
}
remove_unextended(&self->edges_out_head, &self->edges_out_tail);
for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {
e = ex_in->edge;
self->parent_in[edges_child[e]] = TSK_NULL;
if (ex_in->extended == 0 && self->near_side[e] != self->far_side[e]) {
self->last_nodes_edge[edges_child[e]] = e;
self->last_degree[edges_child[e]] += 1;
self->last_degree[edges_parent[e]] += 1;
}
}
remove_unextended(&self->edges_in_head, &self->edges_in_tail);
// done cleanup from last tree transition;
// now we set the state up for this tree transition
for (tj = tree_pos->out.start; tj != tree_pos->out.stop; tj += self->direction) {
e = tree_pos->out.order[tj];
if (self->near_side[e] != self->far_side[e]) {
new_ex = tsk_blkalloc_get(&self->edge_list_heap, sizeof(*new_ex));
if (new_ex == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
edge_list_append_entry(
&self->edges_out_head, &self->edges_out_tail, new_ex, e, 0);
}
}
for (ex_out = self->edges_out_head; ex_out != NULL; ex_out = ex_out->next) {
e = ex_out->edge;
self->parent_out[edges_child[e]] = edges_parent[e];
self->next_nodes_edge[edges_child[e]] = TSK_NULL;
self->next_degree[edges_child[e]] -= 1;
self->next_degree[edges_parent[e]] -= 1;
}
for (tj = tree_pos->in.start; tj != tree_pos->in.stop; tj += self->direction) {
e = tree_pos->in.order[tj];
// add edge to pending_in
new_ex = tsk_blkalloc_get(&self->edge_list_heap, sizeof(*new_ex));
if (new_ex == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
edge_list_append_entry(&self->edges_in_head, &self->edges_in_tail, new_ex, e, 0);
}
for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {
e = ex_in->edge;
self->parent_in[edges_child[e]] = edges_parent[e];
self->next_nodes_edge[edges_child[e]] = e;
self->next_degree[edges_child[e]] += 1;
self->next_degree[edges_parent[e]] += 1;
}
out:
return ret;
}
static int
haplotype_extender_add_or_extend_edge(haplotype_extender_t *self, tsk_id_t new_parent,
tsk_id_t child, double left, double right)
{
int ret = 0;
double there;
tsk_id_t old_edge, e_out, old_parent;
edge_list_t *ex_in;
edge_list_t *new_ex = NULL;
tsk_id_t e_in;
there = (self->direction == TSK_DIR_FORWARD) ? right : left;
old_edge = self->next_nodes_edge[child];
if (old_edge != TSK_NULL) {
old_parent = self->edges->parent[old_edge];
} else {
old_parent = TSK_NULL;
}
if (new_parent != old_parent) {
if (self->parent_out[child] == new_parent) {
// if our new edge is in edges_out, it should be extended
e_out = self->last_nodes_edge[child];
self->far_side[e_out] = there;
edge_list_set_extended(&self->edges_out_head, e_out);
} else {
e_out = tsk_edge_table_add_row(
self->edges, left, right, new_parent, child, NULL, 0);
if (e_out < 0) {
ret = (int) e_out;
goto out;
}
/* pointers to left/right might have changed! */
if (self->direction == TSK_DIR_FORWARD) {
self->near_side = self->edges->left;
self->far_side = self->edges->right;
} else {
self->near_side = self->edges->right;
self->far_side = self->edges->left;
}
new_ex = tsk_blkalloc_get(&self->edge_list_heap, sizeof(*new_ex));
if (new_ex == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
edge_list_append_entry(
&self->edges_out_head, &self->edges_out_tail, new_ex, e_out, 2);
}
self->next_nodes_edge[child] = e_out;
self->next_degree[child] += 1;
self->next_degree[new_parent] += 1;
self->parent_out[child] = TSK_NULL;
if (old_edge != TSK_NULL) {
for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {
e_in = ex_in->edge;
if (e_in == old_edge) {
self->near_side[e_in] = there;
if (self->far_side[e_in] != there) {
ex_in->extended = 1;
}
self->next_degree[child] -= 1;
self->next_degree[self->parent_in[child]] -= 1;
self->parent_in[child] = TSK_NULL;
}
}
}
}
out:
return ret;
}
static float
haplotype_extender_mergeable(haplotype_extender_t *self, tsk_id_t c)
{
// returns the number of new edges needed
// if the paths in parent_in and parent_out
// up through nodes that aren't in the other tree
// end at the same place and don't have conflicting times;
// otherwise, return infinity
tsk_id_t p_in, p_out, child;
float num_new_edges; // needs to be float so we can have infinity
int num_extended;
double t_in, t_out;
bool climb_in, climb_out;
const double *nodes_time = self->ts->tables->nodes.time;
p_out = self->parent_out[c];
p_in = self->parent_in[c];
t_out = (p_out == TSK_NULL) ? INFINITY : nodes_time[p_out];
t_in = (p_in == TSK_NULL) ? INFINITY : nodes_time[p_in];
child = c;
num_new_edges = 0;
num_extended = 0;
while (true) {
climb_in = (p_in != TSK_NULL && self->last_degree[p_in] == 0
&& self->not_sample[p_in] && t_in < t_out);
climb_out = (p_out != TSK_NULL && self->next_degree[p_out] == 0
&& self->not_sample[p_out] && t_out < t_in);
if (climb_in) {
if (self->parent_in[child] != p_in) {
num_new_edges += 1;
}
child = p_in;
p_in = self->parent_in[p_in];
t_in = (p_in == TSK_NULL) ? INFINITY : nodes_time[p_in];
} else if (climb_out) {
if (self->parent_out[child] != p_out) {
num_new_edges += 1;
}
child = p_out;
p_out = self->parent_out[p_out];
t_out = (p_out == TSK_NULL) ? INFINITY : nodes_time[p_out];
num_extended += 1;
} else {
break;
}
}
if ((num_extended == 0) || (p_in != p_out) || (p_in == TSK_NULL)) {
num_new_edges = INFINITY;
}
return num_new_edges;
}
static int
haplotype_extender_merge_paths(
haplotype_extender_t *self, tsk_id_t c, double left, double right)
{
int ret = 0;
tsk_id_t p_in, p_out, child;
double t_in, t_out;
bool climb_in, climb_out;
const double *nodes_time = self->ts->tables->nodes.time;
p_out = self->parent_out[c];
p_in = self->parent_in[c];
t_out = nodes_time[p_out];
t_in = nodes_time[p_in];
child = c;
while (true) {
climb_in = (p_in != TSK_NULL && self->last_degree[p_in] == 0
&& self->not_sample[p_in] && t_in < t_out);
climb_out = (p_out != TSK_NULL && self->next_degree[p_out] == 0
&& self->not_sample[p_out] && t_out < t_in);
if (climb_in) {
ret = haplotype_extender_add_or_extend_edge(self, p_in, child, left, right);
if (ret != 0) {
goto out;
}
child = p_in;
p_in = self->parent_in[p_in];
t_in = (p_in == TSK_NULL) ? INFINITY : nodes_time[p_in];
} else if (climb_out) {
ret = haplotype_extender_add_or_extend_edge(self, p_out, child, left, right);
if (ret != 0) {
goto out;
}
child = p_out;
p_out = self->parent_out[p_out];
t_out = (p_out == TSK_NULL) ? INFINITY : nodes_time[p_out];
} else {
break;
}
}
tsk_bug_assert(p_out == p_in);
ret = haplotype_extender_add_or_extend_edge(self, p_out, child, left, right);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
haplotype_extender_extend_paths(haplotype_extender_t *self)
{
int ret = 0;
bool valid;
double left, right;
float ne, max_new_edges, next_max_new_edges;
tsk_tree_position_t tree_pos;
edge_list_t *ex_in;
tsk_id_t e_in, c, e;
tsk_size_t num_edges;
tsk_bool_t *keep = NULL;
tsk_memset(&tree_pos, 0, sizeof(tree_pos));
ret = tsk_tree_position_init(&tree_pos, self->ts, 0);
if (ret != 0) {
goto out;
}
if (self->direction == TSK_DIR_FORWARD) {
valid = tsk_tree_position_next(&tree_pos);
} else {
valid = tsk_tree_position_prev(&tree_pos);
}
while (valid) {
left = tree_pos.interval.left;
right = tree_pos.interval.right;
ret = haplotype_extender_next_tree(self, &tree_pos);
if (ret != 0) {
goto out;
}
max_new_edges = 0;
next_max_new_edges = INFINITY;
while (max_new_edges < INFINITY) {
for (ex_in = self->edges_in_head; ex_in != NULL; ex_in = ex_in->next) {
e_in = ex_in->edge;
c = self->edges->child[e_in];
if (self->last_degree[c] > 0) {
ne = haplotype_extender_mergeable(self, c);
if (ne <= max_new_edges) {
ret = haplotype_extender_merge_paths(self, c, left, right);
if (ret != 0) {
goto out;
}
} else {
next_max_new_edges = TSK_MIN(ne, next_max_new_edges);
}
}
}
max_new_edges = next_max_new_edges;
next_max_new_edges = INFINITY;
}
if (self->direction == TSK_DIR_FORWARD) {
valid = tsk_tree_position_next(&tree_pos);
} else {
valid = tsk_tree_position_prev(&tree_pos);
}
}
/* Get rid of adjacent, identical edges */
/* note: we need to calloc this here instead of at the start
* because we don't know how big it will need to be until now */
num_edges = self->edges->num_rows;
keep = tsk_calloc(num_edges, sizeof(*keep));
if (keep == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (e = 0; e < (tsk_id_t) num_edges - 1; e++) {
if (self->edges->parent[e] == self->edges->parent[e + 1]
&& self->edges->child[e] == self->edges->child[e + 1]
&& self->edges->right[e] == self->edges->left[e + 1]) {
self->edges->right[e] = self->edges->right[e + 1];
self->edges->left[e + 1] = self->edges->right[e + 1];
}
}
for (e = 0; e < (tsk_id_t) num_edges; e++) {
keep[e] = self->edges->left[e] < self->edges->right[e];
}
ret = tsk_edge_table_keep_rows(self->edges, keep, 0, NULL);
out:
tsk_tree_position_free(&tree_pos);
tsk_safe_free(keep);
return ret;
}
static int
extend_haplotypes_iter(const tsk_treeseq_t *self, int direction, tsk_edge_table_t *edges,
tsk_flags_t options)
{
int ret = 0;
haplotype_extender_t haplotype_extender;
tsk_memset(&haplotype_extender, 0, sizeof(haplotype_extender));
ret = haplotype_extender_init(&haplotype_extender, self, direction, edges);
if (ret != 0) {
goto out;
}
ret = haplotype_extender_extend_paths(&haplotype_extender);
if (ret != 0) {
goto out;
}
if (!!(options & TSK_DEBUG)) {
haplotype_extender_print_state(&haplotype_extender, tsk_get_debug_stream());
}
out:
haplotype_extender_free(&haplotype_extender);
return ret;
}
int TSK_WARN_UNUSED
tsk_treeseq_extend_haplotypes(
const tsk_treeseq_t *self, int max_iter, tsk_flags_t options, tsk_treeseq_t *output)
{
int ret = 0;
tsk_table_collection_t tables;
tsk_treeseq_t ts;
int iter, j;
tsk_size_t last_num_edges;
tsk_bookmark_t sort_start;
const int direction[] = { TSK_DIR_FORWARD, TSK_DIR_REVERSE };
tsk_memset(&tables, 0, sizeof(tables));
tsk_memset(&ts, 0, sizeof(ts));
tsk_memset(output, 0, sizeof(*output));
if (max_iter <= 0) {
ret = tsk_trace_error(TSK_ERR_EXTEND_EDGES_BAD_MAXITER);
goto out;
}
if (tsk_treeseq_get_num_migrations(self) != 0) {
ret = tsk_trace_error(TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
goto out;
}
/* Note: there is a fair bit of copying of table data in this implementation
* currently, as we create a new tree sequence for each iteration, which
* takes a full copy of the input tables. We could streamline this by
* adding a flag to treeseq_init which says "steal a reference to these
* tables and *don't* free them at the end". Then, we would only need
* one copy of the full tables, and could pass in a standalone edge
* table to use for in-place updating.
*/
ret = tsk_table_collection_copy(self->tables, &tables, 0);
if (ret != 0) {
goto out;
}
ret = tsk_mutation_table_clear(&tables.mutations);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init(&ts, &tables, 0);
if (ret != 0) {
goto out;
}
last_num_edges = tsk_treeseq_get_num_edges(&ts);
for (iter = 0; iter < max_iter; iter++) {
for (j = 0; j < 2; j++) {
ret = extend_haplotypes_iter(&ts, direction[j], &tables.edges, options);
if (ret != 0) {
goto out;
}
/* We're done with the current ts now */
tsk_treeseq_free(&ts);
/* no need to sort sites and mutations */
memset(&sort_start, 0, sizeof(sort_start));
sort_start.sites = tables.sites.num_rows;
sort_start.mutations = tables.mutations.num_rows;
ret = tsk_table_collection_sort(&tables, &sort_start, 0);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
if (ret != 0) {
goto out;
}
}
if (last_num_edges == tsk_treeseq_get_num_edges(&ts)) {
break;
}
last_num_edges = tsk_treeseq_get_num_edges(&ts);
}
/* Remap mutation nodes */
ret = tsk_mutation_table_copy(
&self->tables->mutations, &tables.mutations, TSK_NO_INIT);
if (ret != 0) {
goto out;
}
/* Note: to allow migrations we'd also have to do this same operation
* on the migration nodes; however it's a can of worms because the interval
* covering the migration might no longer make sense. */
ret = tsk_treeseq_slide_mutation_nodes_up(&ts, &tables.mutations);
if (ret != 0) {
goto out;
}
tsk_treeseq_free(&ts);
ret = tsk_treeseq_init(&ts, &tables, TSK_TS_INIT_BUILD_INDEXES);
if (ret != 0) {
goto out;
}
/* Hand ownership of the tree sequence to the calling code */
tsk_memcpy(output, &ts, sizeof(ts));
tsk_memset(&ts, 0, sizeof(*output));
out:
tsk_treeseq_free(&ts);
tsk_table_collection_free(&tables);
return ret;
}
/* ======================================================== *
* Pair coalescence
* ======================================================== */
static int
check_node_bin_map(
const tsk_size_t num_nodes, const tsk_size_t num_bins, const tsk_id_t *node_bin_map)
{
int ret = 0;
tsk_id_t max_index, index;
tsk_size_t i;
max_index = TSK_NULL;
for (i = 0; i < num_nodes; i++) {
index = node_bin_map[i];
if (index < TSK_NULL) {
ret = tsk_trace_error(TSK_ERR_BAD_NODE_BIN_MAP);
goto out;
}
if (index > max_index) {
max_index = index;
}
}
if (num_bins < 1 || (tsk_id_t) num_bins < max_index + 1) {
ret = tsk_trace_error(TSK_ERR_BAD_NODE_BIN_MAP_DIM);
goto out;
}
out:
return ret;
}
static inline void
TRANSPOSE_2D(tsk_size_t rows, tsk_size_t cols, const double *source, double *dest)
{
tsk_size_t i, j;
for (i = 0; i < rows; ++i) {
for (j = 0; j < cols; ++j) {
dest[j * rows + i] = source[i * cols + j];
}
}
}
static inline void
pair_coalescence_count(tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_sample_sets, const double *parent_count, const double *child_count,
const double *parent_state, const double *inside, double *outside, double *result)
{
tsk_size_t i;
tsk_id_t j, k;
for (i = 0; i < num_sample_sets; i++) {
outside[i] = parent_count[i] - child_count[i] - parent_state[i];
}
for (i = 0; i < num_set_indexes; i++) {
j = set_indexes[2 * i];
k = set_indexes[2 * i + 1];
result[i] = outside[j] * inside[k];
if (j != k) {
result[i] += outside[k] * inside[j];
}
}
}
int
tsk_treeseq_pair_coalescence_stat(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_set_indexes, const tsk_id_t *set_indexes, tsk_size_t num_windows,
const double *windows, tsk_size_t num_bins, const tsk_id_t *node_bin_map,
pair_coalescence_stat_func_t *summary_func, tsk_size_t summary_func_dim,
void *summary_func_args, tsk_flags_t options, double *result)
{
int ret = 0;
double left, right, remaining_span, missing_span, window_span, denominator, x, t;
tsk_id_t e, p, c, u, v, w, i, j;
tsk_size_t num_samples, num_edges;
tsk_tree_position_t tree_pos;
const tsk_table_collection_t *tables = self->tables;
const tsk_size_t num_nodes = tables->nodes.num_rows;
const double *restrict nodes_time = self->tables->nodes.time;
const double sequence_length = tables->sequence_length;
const tsk_size_t num_outputs = summary_func_dim;
/* buffers */
bool *visited = NULL;
tsk_id_t *nodes_sample_set = NULL;
tsk_id_t *nodes_parent = NULL;
double *coalescing_pairs = NULL;
double *coalescence_time = NULL;
double *nodes_sample = NULL;
double *sample_count = NULL;
double *bin_weight = NULL;
double *bin_values = NULL;
double *pair_count = NULL;
double *total_pair = NULL;
double *outside = NULL;
/* row pointers */
double *inside = NULL;
double *weight = NULL;
double *values = NULL;
double *output = NULL;
double *above = NULL;
double *below = NULL;
double *state = NULL;
double *pairs = NULL;
double *times = NULL;
tsk_memset(&tree_pos, 0, sizeof(tree_pos));
/* check inputs */
ret = tsk_treeseq_check_windows(self, num_windows, windows, TSK_REQUIRE_FULL_SPAN);
if (ret != 0) {
goto out;
}
ret = check_set_indexes(num_sample_sets, 2 * num_set_indexes, set_indexes);
if (ret != 0) {
goto out;
}
ret = tsk_treeseq_check_sample_sets(
self, num_sample_sets, sample_set_sizes, sample_sets);
if (ret != 0) {
goto out;
}
ret = check_node_bin_map(num_nodes, num_bins, node_bin_map);
if (ret != 0) {
goto out;
}
/* map nodes to sample sets */
nodes_sample_set = tsk_malloc(num_nodes * sizeof(*nodes_sample_set));
if (nodes_sample_set == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
ret = get_sample_set_index_map(self, num_sample_sets, sample_set_sizes, sample_sets,
&num_samples, nodes_sample_set);
if (ret != 0) {
goto out;
}
visited = tsk_malloc(num_nodes * sizeof(*visited));
outside = tsk_malloc(num_sample_sets * sizeof(*outside));
nodes_parent = tsk_malloc(num_nodes * sizeof(*nodes_parent));
nodes_sample = tsk_calloc(num_nodes * num_sample_sets, sizeof(*nodes_sample));
sample_count = tsk_malloc(num_nodes * num_sample_sets * sizeof(*sample_count));
coalescing_pairs = tsk_calloc(num_bins * num_set_indexes, sizeof(*coalescing_pairs));
coalescence_time = tsk_calloc(num_bins * num_set_indexes, sizeof(*coalescence_time));
bin_weight = tsk_malloc(num_bins * num_set_indexes * sizeof(*bin_weight));
bin_values = tsk_malloc(num_bins * num_set_indexes * sizeof(*bin_values));
pair_count = tsk_malloc(num_set_indexes * sizeof(*pair_count));
total_pair = tsk_malloc(num_set_indexes * sizeof(*total_pair));
if (nodes_parent == NULL || nodes_sample == NULL || sample_count == NULL
|| coalescing_pairs == NULL || bin_weight == NULL || bin_values == NULL
|| outside == NULL || pair_count == NULL || visited == NULL
|| total_pair == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {
u = set_indexes[2 * i];
v = set_indexes[2 * i + 1];
total_pair[i] = (double) sample_set_sizes[u] * (double) sample_set_sizes[v];
if (u == v) {
total_pair[i] -= (double) sample_set_sizes[v];
total_pair[i] /= 2;
}
}
/* initialize internal state */
for (c = 0; c < (tsk_id_t) num_nodes; c++) {
i = nodes_sample_set[c];
if (i != TSK_NULL) {
state = GET_2D_ROW(nodes_sample, num_sample_sets, c);
state[i] = 1.0;
}
nodes_parent[c] = TSK_NULL;
visited[c] = false;
}
tsk_memcpy(
sample_count, nodes_sample, num_nodes * num_sample_sets * sizeof(*sample_count));
ret = tsk_tree_position_init(&tree_pos, self, 0);
if (ret != 0) {
goto out;
}
num_edges = 0;
missing_span = 0.0;
w = 0;
while (true) {
tsk_tree_position_next(&tree_pos);
if (tree_pos.index == TSK_NULL) {
break;
}
left = tree_pos.interval.left;
right = tree_pos.interval.right;
remaining_span = sequence_length - left;
for (u = tree_pos.out.start; u != tree_pos.out.stop; u++) {
e = tree_pos.out.order[u];
p = tables->edges.parent[e];
c = tables->edges.child[e];
nodes_parent[c] = TSK_NULL;
inside = GET_2D_ROW(sample_count, num_sample_sets, c);
while (p != TSK_NULL) { /* downdate statistic */
v = node_bin_map[p];
t = nodes_time[p];
if (v != TSK_NULL) {
above = GET_2D_ROW(sample_count, num_sample_sets, p);
below = GET_2D_ROW(sample_count, num_sample_sets, c);
state = GET_2D_ROW(nodes_sample, num_sample_sets, p);
pairs = GET_2D_ROW(coalescing_pairs, num_set_indexes, v);
times = GET_2D_ROW(coalescence_time, num_set_indexes, v);
pair_coalescence_count(num_set_indexes, set_indexes, num_sample_sets,
above, below, state, inside, outside, pair_count);
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {
x = pair_count[i] * remaining_span;
pairs[i] -= x;
times[i] -= t * x;
}
}
c = p;
p = nodes_parent[c];
}
p = tables->edges.parent[e];
while (p != TSK_NULL) { /* downdate state */
above = GET_2D_ROW(sample_count, num_sample_sets, p);
for (i = 0; i < (tsk_id_t) num_sample_sets; i++) {
above[i] -= inside[i];
}
p = nodes_parent[p];
}
num_edges -= 1;
}
for (u = tree_pos.in.start; u != tree_pos.in.stop; u++) {
e = tree_pos.in.order[u];
p = tables->edges.parent[e];
c = tables->edges.child[e];
nodes_parent[c] = p;
inside = GET_2D_ROW(sample_count, num_sample_sets, c);
while (p != TSK_NULL) { /* update state */
above = GET_2D_ROW(sample_count, num_sample_sets, p);
for (i = 0; i < (tsk_id_t) num_sample_sets; i++) {
above[i] += inside[i];
}
p = nodes_parent[p];
}
p = tables->edges.parent[e];
while (p != TSK_NULL) { /* update statistic */
v = node_bin_map[p];
t = nodes_time[p];
if (v != TSK_NULL) {
above = GET_2D_ROW(sample_count, num_sample_sets, p);
below = GET_2D_ROW(sample_count, num_sample_sets, c);
state = GET_2D_ROW(nodes_sample, num_sample_sets, p);
pairs = GET_2D_ROW(coalescing_pairs, num_set_indexes, v);
times = GET_2D_ROW(coalescence_time, num_set_indexes, v);
pair_coalescence_count(num_set_indexes, set_indexes, num_sample_sets,
above, below, state, inside, outside, pair_count);
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {
x = pair_count[i] * remaining_span;
pairs[i] += x;
times[i] += t * x;
}
}
c = p;
p = nodes_parent[c];
}
num_edges += 1;
}
if (num_edges == 0) {
missing_span += right - left;
}
/* flush windows */
while (w < (tsk_id_t) num_windows && windows[w + 1] <= right) {
TRANSPOSE_2D(num_bins, num_set_indexes, coalescing_pairs, bin_weight);
TRANSPOSE_2D(num_bins, num_set_indexes, coalescence_time, bin_values);
tsk_memset(coalescing_pairs, 0,
num_bins * num_set_indexes * sizeof(*coalescing_pairs));
tsk_memset(coalescence_time, 0,
num_bins * num_set_indexes * sizeof(*coalescence_time));
remaining_span = sequence_length - windows[w + 1];
for (j = 0; j < (tsk_id_t) num_samples; j++) { /* truncate at tree */
c = sample_sets[j];
p = nodes_parent[c];
while (!visited[c] && p != TSK_NULL) {
v = node_bin_map[p];
t = nodes_time[p];
if (v != TSK_NULL) {
above = GET_2D_ROW(sample_count, num_sample_sets, p);
below = GET_2D_ROW(sample_count, num_sample_sets, c);
state = GET_2D_ROW(nodes_sample, num_sample_sets, p);
pairs = GET_2D_ROW(coalescing_pairs, num_set_indexes, v);
times = GET_2D_ROW(coalescence_time, num_set_indexes, v);
pair_coalescence_count(num_set_indexes, set_indexes,
num_sample_sets, above, below, state, below, outside,
pair_count);
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {
weight = GET_2D_ROW(bin_weight, num_bins, i);
values = GET_2D_ROW(bin_values, num_bins, i);
x = pair_count[i] * remaining_span / 2;
pairs[i] += x;
times[i] += t * x;
weight[v] -= x;
values[v] -= t * x;
}
}
visited[c] = true;
c = p;
p = nodes_parent[c];
}
}
for (j = 0; j < (tsk_id_t) num_samples; j++) { /* reset tree */
c = sample_sets[j];
p = nodes_parent[c];
while (visited[c] && p != TSK_NULL) {
visited[c] = false;
c = p;
p = nodes_parent[c];
}
}
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) { /* normalise values */
weight = GET_2D_ROW(bin_weight, num_bins, i);
values = GET_2D_ROW(bin_values, num_bins, i);
for (v = 0; v < (tsk_id_t) num_bins; v++) {
values[v] /= weight[v];
}
}
/* normalise weights */
if (options & (TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE)) {
window_span = windows[w + 1] - windows[w] - missing_span;
missing_span = 0.0;
if (num_edges == 0) {
/* missing interval, so remove overcounted missing span */
remaining_span = right - windows[w + 1];
window_span += remaining_span;
missing_span += remaining_span;
}
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) {
denominator = 1.0;
if (options & TSK_STAT_SPAN_NORMALISE) {
denominator *= window_span;
}
if (options & TSK_STAT_PAIR_NORMALISE) {
denominator *= total_pair[i];
}
weight = GET_2D_ROW(bin_weight, num_bins, i);
for (v = 0; v < (tsk_id_t) num_bins; v++) {
weight[v] *= denominator == 0.0 ? 0.0 : 1 / denominator;
}
}
}
for (i = 0; i < (tsk_id_t) num_set_indexes; i++) { /* summarise bins */
weight = GET_2D_ROW(bin_weight, num_bins, i);
values = GET_2D_ROW(bin_values, num_bins, i);
output = GET_3D_ROW(
result, num_set_indexes, num_outputs, (tsk_size_t) w, i);
ret = summary_func(
num_bins, weight, values, num_outputs, output, summary_func_args);
if (ret != 0) {
goto out;
}
};
w += 1;
}
}
out:
tsk_tree_position_free(&tree_pos);
tsk_safe_free(nodes_sample_set);
tsk_safe_free(coalescing_pairs);
tsk_safe_free(coalescence_time);
tsk_safe_free(nodes_parent);
tsk_safe_free(nodes_sample);
tsk_safe_free(sample_count);
tsk_safe_free(bin_weight);
tsk_safe_free(bin_values);
tsk_safe_free(pair_count);
tsk_safe_free(total_pair);
tsk_safe_free(visited);
tsk_safe_free(outside);
return ret;
}
static int
pair_coalescence_weights(tsk_size_t TSK_UNUSED(input_dim), const double *weight,
const double *TSK_UNUSED(values), tsk_size_t output_dim, double *output,
void *TSK_UNUSED(params))
{
int ret = 0;
tsk_memcpy(output, weight, output_dim * sizeof(*output));
return ret;
}
int
tsk_treeseq_pair_coalescence_counts(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,
const tsk_id_t *node_bin_map, tsk_flags_t options, double *result)
{
return tsk_treeseq_pair_coalescence_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_set_indexes, set_indexes, num_windows, windows, num_bins,
node_bin_map, pair_coalescence_weights, num_bins, NULL, options, result);
}
static int
pair_coalescence_quantiles(tsk_size_t input_dim, const double *weight,
const double *values, tsk_size_t output_dim, double *output, void *params)
{
int ret = 0;
double coalesced, timepoint;
double *quantiles = (double *) params;
tsk_size_t i, j;
j = 0;
coalesced = 0.0;
timepoint = TSK_UNKNOWN_TIME;
for (i = 0; i < output_dim; i++) {
output[i] = NAN;
}
for (i = 0; i < input_dim; i++) {
if (weight[i] > 0) {
coalesced += weight[i];
timepoint = values[i];
while (j < output_dim && quantiles[j] <= coalesced) {
output[j] = timepoint;
j += 1;
}
}
}
if (quantiles[output_dim - 1] == 1.0) {
output[output_dim - 1] = timepoint;
}
return ret;
}
static int
check_quantiles(const tsk_size_t num_quantiles, const double *quantiles)
{
int ret = 0;
tsk_size_t i;
double last = -INFINITY;
for (i = 0; i < num_quantiles; i++) {
if (quantiles[i] <= last || quantiles[i] < 0.0 || quantiles[i] > 1.0) {
ret = tsk_trace_error(TSK_ERR_BAD_QUANTILES);
goto out;
}
last = quantiles[i];
}
out:
return ret;
}
static int
check_sorted_node_bin_map(
const tsk_treeseq_t *self, tsk_size_t num_bins, const tsk_id_t *node_bin_map)
{
int ret = 0;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const double *nodes_time = self->tables->nodes.time;
double last;
tsk_id_t i, j;
double *min_time = tsk_malloc(num_bins * sizeof(*min_time));
double *max_time = tsk_malloc(num_bins * sizeof(*max_time));
if (min_time == NULL || max_time == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < (tsk_id_t) num_bins; j++) {
min_time[j] = TSK_UNKNOWN_TIME;
max_time[j] = TSK_UNKNOWN_TIME;
}
for (i = 0; i < (tsk_id_t) num_nodes; i++) {
j = node_bin_map[i];
if (j < 0 || j >= (tsk_id_t) num_bins) {
continue;
}
if (tsk_is_unknown_time(max_time[j]) || nodes_time[i] > max_time[j]) {
max_time[j] = nodes_time[i];
}
if (tsk_is_unknown_time(min_time[j]) || nodes_time[i] < min_time[j]) {
min_time[j] = nodes_time[i];
}
}
last = -INFINITY;
for (j = 0; j < (tsk_id_t) num_bins; j++) {
if (tsk_is_unknown_time(min_time[j])) {
continue;
}
if (min_time[j] < last) {
ret = tsk_trace_error(TSK_ERR_UNSORTED_TIMES);
goto out;
} else {
last = max_time[j];
}
}
out:
tsk_safe_free(min_time);
tsk_safe_free(max_time);
return ret;
}
int
tsk_treeseq_pair_coalescence_quantiles(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,
const tsk_id_t *node_bin_map, tsk_size_t num_quantiles, double *quantiles,
tsk_flags_t options, double *result)
{
int ret = 0;
void *params = (void *) quantiles;
ret = check_quantiles(num_quantiles, quantiles);
if (ret != 0) {
goto out;
}
ret = check_sorted_node_bin_map(self, num_bins, node_bin_map);
if (ret != 0) {
goto out;
}
options |= TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE;
ret = tsk_treeseq_pair_coalescence_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_set_indexes, set_indexes, num_windows, windows, num_bins,
node_bin_map, pair_coalescence_quantiles, num_quantiles, params, options,
result);
if (ret != 0) {
goto out;
}
out:
return ret;
}
static int
pair_coalescence_rates(tsk_size_t input_dim, const double *weight, const double *values,
tsk_size_t output_dim, double *output, void *params)
{
int ret = 0;
double coalesced, rate, waiting_time, a, b;
double *time_windows = (double *) params;
tsk_id_t i, j;
tsk_bug_assert(input_dim == output_dim);
for (j = (tsk_id_t) output_dim; j > 0; j--) { /* find last window with data */
if (weight[j - 1] == 0) {
output[j - 1] = NAN; /* TODO: should fill value be zero instead? */
} else {
break;
}
}
coalesced = 0.0;
for (i = 0; i < j; i++) {
a = time_windows[i];
b = time_windows[i + 1];
if (i + 1 == j) {
waiting_time = values[i] < a ? 0.0 : values[i] - a;
rate = 1 / waiting_time;
} else {
rate = log(1 - weight[i] / (1 - coalesced)) / (a - b);
}
// avoid tiny negative values from fp error
output[i] = rate > 0 ? rate : 0;
coalesced += weight[i];
}
return ret;
}
static int
check_coalescence_rate_time_windows(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_time_windows,
const tsk_id_t *node_time_window, const double *time_windows)
{
int ret = 0;
double timepoint;
const double *nodes_time = self->tables->nodes.time;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
tsk_id_t i, j, k;
tsk_id_t n;
if (num_time_windows == 0) {
ret = tsk_trace_error(TSK_ERR_BAD_TIME_WINDOWS_DIM);
goto out;
}
/* time windows are sorted */
timepoint = time_windows[0];
for (i = 0; i < (tsk_id_t) num_time_windows; i++) {
if (time_windows[i + 1] <= timepoint) {
ret = tsk_trace_error(TSK_ERR_BAD_TIME_WINDOWS);
goto out;
}
timepoint = time_windows[i + 1];
}
if (timepoint != INFINITY) {
ret = tsk_trace_error(TSK_ERR_BAD_TIME_WINDOWS_END);
goto out;
}
/* all sample times align with start of first time window */
k = 0;
for (i = 0; i < (tsk_id_t) num_sample_sets; i++) {
for (j = 0; j < (tsk_id_t) sample_set_sizes[i]; j++) {
n = sample_sets[k++];
if (nodes_time[n] != time_windows[0]) {
ret = tsk_trace_error(TSK_ERR_BAD_SAMPLE_PAIR_TIMES);
goto out;
}
}
}
/* nodes are correctly assigned to time windows */
for (i = 0; i < (tsk_id_t) num_nodes; i++) {
j = node_time_window[i];
if (j < 0) {
continue;
}
if (j >= (tsk_id_t) num_time_windows) {
ret = tsk_trace_error(TSK_ERR_BAD_NODE_BIN_MAP_DIM);
goto out;
}
if (nodes_time[i] < time_windows[j] || nodes_time[i] >= time_windows[j + 1]) {
ret = tsk_trace_error(TSK_ERR_BAD_NODE_TIME_WINDOW);
goto out;
}
}
out:
return ret;
}
int
tsk_treeseq_pair_coalescence_rates(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_set_indexes, const tsk_id_t *set_indexes, tsk_size_t num_windows,
const double *windows, tsk_size_t num_time_windows, const tsk_id_t *node_time_window,
double *time_windows, tsk_flags_t options, double *result)
{
int ret = 0;
void *params = (void *) time_windows;
ret = check_coalescence_rate_time_windows(self, num_sample_sets, sample_set_sizes,
sample_sets, num_time_windows, node_time_window, time_windows);
if (ret != 0) {
goto out;
}
options |= TSK_STAT_SPAN_NORMALISE | TSK_STAT_PAIR_NORMALISE;
ret = tsk_treeseq_pair_coalescence_stat(self, num_sample_sets, sample_set_sizes,
sample_sets, num_set_indexes, set_indexes, num_windows, windows,
num_time_windows, node_time_window, pair_coalescence_rates, num_time_windows,
params, options, result);
if (ret != 0) {
goto out;
}
out:
return ret;
}
/* ======================================================== *
* Relatedness matrix-vector product
* ======================================================== */
typedef struct {
const tsk_treeseq_t *ts;
tsk_size_t num_weights;
const double *weights;
tsk_size_t num_windows;
const double *windows;
tsk_size_t num_focal_nodes;
const tsk_id_t *focal_nodes;
tsk_flags_t options;
double *result;
tsk_tree_position_t tree_pos;
double position;
tsk_size_t num_nodes;
tsk_id_t *parent;
double *x;
double *w;
double *v;
} tsk_matvec_calculator_t;
static void
tsk_matvec_calculator_print_state(const tsk_matvec_calculator_t *self, FILE *out)
{
tsk_id_t j;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(self->ts);
fprintf(out, "Matvec state:\n");
fprintf(out, "options = %d\n", self->options);
fprintf(out, "position = %f\n", self->position);
fprintf(out, "focal nodes = %lld: [", (long long) self->num_focal_nodes);
fprintf(out, "tree_pos:\n");
tsk_tree_position_print_state(&self->tree_pos, out);
fprintf(out, "samples = %lld: [", (long long) num_samples);
fprintf(out, "]\n");
fprintf(out, "node\tparent\tx\tv\tw");
fprintf(out, "\n");
for (j = 0; j < (tsk_id_t) self->num_nodes; j++) {
fprintf(out, "%lld\t", (long long) j);
fprintf(out, "%lld\t%g\t%g\t%g\n", (long long) self->parent[j], self->x[j],
self->v[j], self->w[j]);
}
}
static int
tsk_matvec_calculator_init(tsk_matvec_calculator_t *self, const tsk_treeseq_t *ts,
tsk_size_t num_weights, const double *weights, tsk_size_t num_windows,
const double *windows, tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes,
tsk_flags_t options, double *result)
{
int ret = 0;
tsk_size_t num_samples = tsk_treeseq_get_num_samples(ts);
const tsk_size_t num_nodes = ts->tables->nodes.num_rows;
const double *row;
double *new_row;
tsk_size_t k;
tsk_id_t index, u, j;
double *weight_means = tsk_malloc(num_weights * sizeof(*weight_means));
const tsk_size_t num_trees = ts->num_trees;
const double *restrict breakpoints = ts->breakpoints;
self->ts = ts;
self->num_weights = num_weights;
self->weights = weights;
self->num_windows = num_windows;
self->windows = windows;
self->num_focal_nodes = num_focal_nodes;
self->focal_nodes = focal_nodes;
self->options = options;
self->result = result;
self->num_nodes = num_nodes;
self->position = windows[0];
self->parent = tsk_malloc(num_nodes * sizeof(*self->parent));
self->x = tsk_calloc(num_nodes, sizeof(*self->x));
self->v = tsk_calloc(num_nodes * num_weights, sizeof(*self->v));
self->w = tsk_calloc(num_nodes * num_weights, sizeof(*self->w));
if (self->parent == NULL || self->x == NULL || self->w == NULL || self->v == NULL
|| weight_means == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
tsk_memset(result, 0, num_windows * num_focal_nodes * num_weights * sizeof(*result));
tsk_memset(self->parent, TSK_NULL, num_nodes * sizeof(*self->parent));
for (j = 0; j < (tsk_id_t) num_focal_nodes; j++) {
if (focal_nodes[j] < 0 || (tsk_size_t) focal_nodes[j] >= num_nodes) {
ret = tsk_trace_error(TSK_ERR_NODE_OUT_OF_BOUNDS);
goto out;
}
}
ret = tsk_tree_position_init(&self->tree_pos, ts, 0);
if (ret != 0) {
goto out;
}
/* seek to the first window */
index = (tsk_id_t) tsk_search_sorted(breakpoints, num_trees + 1, windows[0]);
if (breakpoints[index] > windows[0]) {
index--;
}
ret = tsk_tree_position_seek_forward(&self->tree_pos, index);
if (ret != 0) {
goto out;
}
for (k = 0; k < num_weights; k++) {
weight_means[k] = 0.0;
}
/* centre the input */
if (!(options & TSK_STAT_NONCENTRED)) {
for (j = 0; j < (tsk_id_t) num_samples; j++) {
row = GET_2D_ROW(weights, num_weights, j);
for (k = 0; k < num_weights; k++) {
weight_means[k] += row[k];
}
}
for (k = 0; k < num_weights; k++) {
weight_means[k] /= (double) num_samples;
}
}
/* set the initial state */
for (j = 0; j < (tsk_id_t) num_samples; j++) {
u = ts->samples[j];
row = GET_2D_ROW(weights, num_weights, j);
new_row = GET_2D_ROW(self->w, num_weights, u);
for (k = 0; k < num_weights; k++) {
new_row[k] = row[k] - weight_means[k];
}
}
out:
tsk_safe_free(weight_means);
return ret;
}
static int
tsk_matvec_calculator_free(tsk_matvec_calculator_t *self)
{
tsk_safe_free(self->parent);
tsk_safe_free(self->x);
tsk_safe_free(self->w);
tsk_safe_free(self->v);
tsk_tree_position_free(&self->tree_pos);
/* Make this safe for multiple free calls */
memset(self, 0, sizeof(*self));
return 0;
}
static inline void
tsk_matvec_calculator_add_z(tsk_id_t u, tsk_id_t p, const double position,
double *restrict x, const tsk_size_t num_weights, double *restrict w,
double *restrict v, const double *restrict nodes_time)
{
double t, span;
tsk_size_t j;
double *restrict v_row, *restrict w_row;
if (p != TSK_NULL) {
t = nodes_time[p] - nodes_time[u];
span = position - x[u];
// do this: self->v[u] += t * span * self->w[u];
w_row = GET_2D_ROW(w, num_weights, u);
v_row = GET_2D_ROW(v, num_weights, u);
for (j = 0; j < num_weights; j++) {
v_row[j] += t * span * w_row[j];
}
}
x[u] = position;
}
static void
tsk_matvec_calculator_adjust_path_up(
tsk_matvec_calculator_t *self, tsk_id_t p, tsk_id_t c, double sign)
{
tsk_size_t j;
double *p_row, *c_row;
const tsk_id_t *restrict parent = self->parent;
const double position = self->position;
double *restrict x = self->x;
const tsk_size_t num_weights = self->num_weights;
double *restrict w = self->w;
double *restrict v = self->v;
const double *restrict nodes_time = self->ts->tables->nodes.time;
// sign = -1 for removing edges, +1 for adding
while (p != TSK_NULL) {
tsk_matvec_calculator_add_z(
p, parent[p], position, x, num_weights, w, v, nodes_time);
// do this: self->v[c] -= sign * self->v[p];
p_row = GET_2D_ROW(v, num_weights, p);
c_row = GET_2D_ROW(v, num_weights, c);
for (j = 0; j < num_weights; j++) {
c_row[j] -= sign * p_row[j];
}
// do this: self->w[p] += sign * self->w[c];
p_row = GET_2D_ROW(w, num_weights, p);
c_row = GET_2D_ROW(w, num_weights, c);
for (j = 0; j < num_weights; j++) {
p_row[j] += sign * c_row[j];
}
p = parent[p];
}
}
static void
tsk_matvec_calculator_remove_edge(tsk_matvec_calculator_t *self, tsk_id_t p, tsk_id_t c)
{
tsk_id_t *parent = self->parent;
const double position = self->position;
double *restrict x = self->x;
const tsk_size_t num_weights = self->num_weights;
double *restrict w = self->w;
double *restrict v = self->v;
const double *restrict nodes_time = self->ts->tables->nodes.time;
tsk_matvec_calculator_add_z(
c, parent[c], position, x, num_weights, w, v, nodes_time);
parent[c] = TSK_NULL;
tsk_matvec_calculator_adjust_path_up(self, p, c, -1);
}
static void
tsk_matvec_calculator_insert_edge(tsk_matvec_calculator_t *self, tsk_id_t p, tsk_id_t c)
{
tsk_id_t *parent = self->parent;
tsk_matvec_calculator_adjust_path_up(self, p, c, +1);
self->x[c] = self->position;
parent[c] = p;
}
static int
tsk_matvec_calculator_write_output(tsk_matvec_calculator_t *self, double *restrict y)
{
int ret = 0;
tsk_id_t u;
tsk_size_t j, k;
const tsk_size_t n = self->num_focal_nodes;
const tsk_size_t num_weights = self->num_weights;
const double position = self->position;
double *u_row, *out_row;
double *out_means = tsk_malloc(num_weights * sizeof(*out_means));
const tsk_id_t *restrict parent = self->parent;
const double *restrict nodes_time = self->ts->tables->nodes.time;
double *restrict x = self->x;
double *restrict w = self->w;
double *restrict v = self->v;
const tsk_id_t *restrict focal_nodes = self->focal_nodes;
if (out_means == NULL) {
ret = tsk_trace_error(TSK_ERR_NO_MEMORY);
goto out;
}
for (j = 0; j < n; j++) {
out_row = GET_2D_ROW(y, num_weights, j);
u = focal_nodes[j];
while (u != TSK_NULL) {
if (x[u] != position) {
tsk_matvec_calculator_add_z(
u, parent[u], position, x, num_weights, w, v, nodes_time);
}
u_row = GET_2D_ROW(v, num_weights, u);
for (k = 0; k < num_weights; k++) {
out_row[k] += u_row[k];
}
u = parent[u];
}
}
if (!(self->options & TSK_STAT_NONCENTRED)) {
for (k = 0; k < num_weights; k++) {
out_means[k] = 0.0;
}
for (j = 0; j < n; j++) {
out_row = GET_2D_ROW(y, num_weights, j);
for (k = 0; k < num_weights; k++) {
out_means[k] += out_row[k];
}
}
for (k = 0; k < num_weights; k++) {
out_means[k] /= (double) n;
}
for (j = 0; j < n; j++) {
out_row = GET_2D_ROW(y, num_weights, j);
for (k = 0; k < num_weights; k++) {
out_row[k] -= out_means[k];
}
}
}
/* zero out v */
tsk_memset(self->v, 0, self->num_nodes * num_weights * sizeof(*self->v));
out:
tsk_safe_free(out_means);
return ret;
}
static int
tsk_matvec_calculator_run(tsk_matvec_calculator_t *self)
{
int ret = 0;
tsk_size_t j, k, m;
tsk_id_t e, p, c;
const tsk_size_t out_size = self->num_weights * self->num_focal_nodes;
const tsk_size_t num_edges = self->ts->tables->edges.num_rows;
const double *restrict edge_right = self->ts->tables->edges.right;
const double *restrict edge_left = self->ts->tables->edges.left;
const tsk_id_t *restrict edge_child = self->ts->tables->edges.child;
const tsk_id_t *restrict edge_parent = self->ts->tables->edges.parent;
const double *restrict windows = self->windows;
double *restrict out;
tsk_tree_position_t tree_pos = self->tree_pos;
const tsk_id_t *restrict in_order = tree_pos.in.order;
const tsk_id_t *restrict out_order = tree_pos.out.order;
bool valid;
double next_position;
m = 0;
self->position = windows[0];
for (j = (tsk_size_t) tree_pos.in.start; j != (tsk_size_t) tree_pos.in.stop; j++) {
e = in_order[j];
tsk_bug_assert(edge_left[e] <= self->position);
if (self->position < edge_right[e]) {
p = edge_parent[e];
c = edge_child[e];
tsk_matvec_calculator_insert_edge(self, p, c);
}
}
valid = tsk_tree_position_next(&tree_pos);
j = (tsk_size_t) tree_pos.in.start;
k = (tsk_size_t) tree_pos.out.start;
while (m < self->num_windows) {
if (valid && self->position == tree_pos.interval.left) {
for (k = (tsk_size_t) tree_pos.out.start;
k != (tsk_size_t) tree_pos.out.stop; k++) {
e = out_order[k];
p = edge_parent[e];
c = edge_child[e];
tsk_matvec_calculator_remove_edge(self, p, c);
}
for (j = (tsk_size_t) tree_pos.in.start; j != (tsk_size_t) tree_pos.in.stop;
j++) {
e = in_order[j];
p = edge_parent[e];
c = edge_child[e];
tsk_matvec_calculator_insert_edge(self, p, c);
}
valid = tsk_tree_position_next(&tree_pos);
}
next_position = windows[m + 1];
if (j < num_edges) {
next_position = TSK_MIN(next_position, edge_left[in_order[j]]);
}
if (k < num_edges) {
next_position = TSK_MIN(next_position, edge_right[out_order[k]]);
}
tsk_bug_assert(self->position < next_position);
self->position = next_position;
if (self->position == windows[m + 1]) {
out = GET_2D_ROW(self->result, out_size, m);
tsk_matvec_calculator_write_output(self, out);
m += 1;
}
if (self->options & TSK_DEBUG) {
tsk_matvec_calculator_print_state(self, tsk_get_debug_stream());
}
}
if (!!(self->options & TSK_STAT_SPAN_NORMALISE)) {
span_normalise(self->num_windows, windows, out_size, self->result);
}
/* out: */
return ret;
}
int
tsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes, double *result,
tsk_flags_t options)
{
int ret = 0;
bool stat_site = !!(options & TSK_STAT_SITE);
bool stat_node = !!(options & TSK_STAT_NODE);
tsk_matvec_calculator_t calc;
memset(&calc, 0, sizeof(calc));
if (stat_node || stat_site) {
ret = tsk_trace_error(TSK_ERR_UNSUPPORTED_STAT_MODE);
goto out;
}
ret = tsk_treeseq_check_windows(self, num_windows, windows, 0);
if (ret != 0) {
goto out;
}
ret = tsk_matvec_calculator_init(&calc, self, num_weights, weights, num_windows,
windows, num_focal_nodes, focal_nodes, options, result);
if (ret != 0) {
goto out;
}
if (options & TSK_DEBUG) {
tsk_matvec_calculator_print_state(&calc, tsk_get_debug_stream());
}
ret = tsk_matvec_calculator_run(&calc);
out:
tsk_matvec_calculator_free(&calc);
return ret;
}
================================================
FILE: c/tskit/trees.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
* Copyright (c) 2015-2018 University of Oxford
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* @file trees.h
* @brief Tskit core tree sequence operations.
*/
#ifndef TSK_TREES_H
#define TSK_TREES_H
#ifdef __cplusplus
extern "C" {
#endif
#include
// clang-format off
/*
* These are both undocumented options for tsk_tree_init
*/
#define TSK_SAMPLE_LISTS (1 << 1)
#define TSK_NO_SAMPLE_COUNTS (1 << 2)
#define TSK_STAT_SITE (1 << 0)
#define TSK_STAT_BRANCH (1 << 1)
#define TSK_STAT_NODE (1 << 2)
/* Leave room for other stat types */
#define TSK_STAT_POLARISED (1 << 10)
#define TSK_STAT_SPAN_NORMALISE (1 << 11)
#define TSK_STAT_ALLOW_TIME_UNCALIBRATED (1 << 12)
#define TSK_STAT_PAIR_NORMALISE (1 << 13)
#define TSK_STAT_NONCENTRED (1 << 14)
/* Options for map_mutations */
#define TSK_MM_FIXED_ANCESTRAL_STATE (1 << 0)
#define TSK_DIR_FORWARD 1
#define TSK_DIR_REVERSE -1
/**
@defgroup API_FLAGS_TS_INIT_GROUP :c:func:`tsk_treeseq_init` specific flags.
@{
*/
/**
If specified edge indexes will be built and stored in the table collection
when the tree sequence is initialised. Indexes are required for a valid
tree sequence, and are not built by default for performance reasons.
*/
#define TSK_TS_INIT_BUILD_INDEXES (1 << 0)
/**
If specified, mutation parents in the table collection will be overwritten
with those computed from the topology when the tree sequence is initialised.
*/
#define TSK_TS_INIT_COMPUTE_MUTATION_PARENTS (1 << 1)
/** @} */
// clang-format on
/**
@brief The tree sequence object.
*/
typedef struct {
tsk_size_t num_trees;
tsk_size_t num_samples;
tsk_id_t *samples;
/* Does this tree sequence have time_units == "uncalibrated" */
bool time_uncalibrated;
/* Are all genome coordinates discrete? */
bool discrete_genome;
/* Are all time values discrete? */
bool discrete_time;
/* Min and max time in node table and mutation table */
double min_time;
double max_time;
/* Breakpoints along the sequence, including 0 and L. */
double *breakpoints;
/* If a node is a sample, map to its index in the samples list */
tsk_id_t *sample_index_map;
/* Map individuals to the list of nodes that reference them */
tsk_id_t *individual_nodes_mem;
tsk_id_t **individual_nodes;
tsk_size_t *individual_nodes_length;
/* For each tree, a list of sites on that tree */
tsk_site_t *tree_sites_mem;
tsk_site_t **tree_sites;
tsk_size_t *tree_sites_length;
/* For each site, a list of mutations at that site */
tsk_mutation_t *site_mutations_mem;
tsk_mutation_t **site_mutations;
tsk_size_t *site_mutations_length;
/** @brief The table collection underlying this tree sequence, This table
* collection must be treated as read-only, and any changes to it will
* lead to undefined behaviour. */
tsk_table_collection_t *tables;
} tsk_treeseq_t;
typedef struct {
tsk_id_t index;
struct {
double left;
double right;
} interval;
struct {
tsk_id_t start;
tsk_id_t stop;
const tsk_id_t *order;
} in;
struct {
tsk_id_t start;
tsk_id_t stop;
const tsk_id_t *order;
} out;
tsk_id_t left_current_index;
tsk_id_t right_current_index;
int direction;
const tsk_treeseq_t *tree_sequence;
} tsk_tree_position_t;
/**
@brief A single tree in a tree sequence.
@rst
A ``tsk_tree_t`` object has two basic functions:
1. Represent the state of a single tree in a tree sequence;
2. Provide methods to transform this state into different trees in the sequence.
The state of a single tree in the tree sequence is represented using the
quintuply linked encoding: please see the
:ref:`data model ` section for details on
how this works. The left-to-right ordering of nodes in this encoding
is arbitrary, and may change depending on the order in which trees are
accessed within the sequence. Please see the
:ref:`sec_c_api_examples_tree_traversals` examples for recommended
usage.
On initialisation, a tree is in the :ref:`null state` and
we must call one of the :ref:`seeking` methods to make
the state of the tree object correspond to a particular tree in the sequence.
Please see the :ref:`sec_c_api_examples_tree_iteration` examples for
recommended usage.
@endrst
*/
typedef struct {
/**
* @brief The parent tree sequence.
*/
const tsk_treeseq_t *tree_sequence;
/**
@brief The ID of the "virtual root" whose children are the roots of the
tree.
*/
tsk_id_t virtual_root;
/**
@brief The parent of node u is parent[u]. Equal to ``TSK_NULL`` if node u is
a root or is not a node in the current tree.
*/
tsk_id_t *parent;
/**
@brief The leftmost child of node u is left_child[u]. Equal to ``TSK_NULL``
if node u is a leaf or is not a node in the current tree.
*/
tsk_id_t *left_child;
/**
@brief The rightmost child of node u is right_child[u]. Equal to ``TSK_NULL``
if node u is a leaf or is not a node in the current tree.
*/
tsk_id_t *right_child;
/**
@brief The sibling to the left of node u is left_sib[u]. Equal to
``TSK_NULL`` if node u has no siblings to its left.
*/
tsk_id_t *left_sib;
/**
@brief The sibling to the right of node u is right_sib[u]. Equal to
``TSK_NULL`` if node u has no siblings to its right.
*/
tsk_id_t *right_sib;
/**
@brief The number of children of node u is num_children[u].
*/
tsk_id_t *num_children;
/**
@brief Array of edge ids where ``edge[u]`` is the edge that encodes the
relationship between the child node ``u`` and its parent. Equal to
``TSK_NULL`` if node ``u`` is a root, virtual root or is not a node in the
current tree.
*/
tsk_id_t *edge;
/**
@brief The total number of edges defining the topology of this tree.
This is equal to the number of tree sequence edges that intersect with
the tree's genomic interval.
*/
tsk_size_t num_edges;
/**
@brief Left and right coordinates of the genomic interval that this
tree covers. The left coordinate is inclusive and the right coordinate
exclusive.
@rst
Example:
.. code-block:: c
tsk_tree_t tree;
int ret;
// initialise etc
ret = tsk_tree_first(&tree);
// Check for error
assert(ret == TSK_TREE_OK);
printf("Coordinates covered by first tree are left=%f, right=%f\n",
tree.interval.left, tree.interval.right);
@endrst
*/
struct {
double left;
double right;
} interval;
/**
@brief The index of this tree in the tree sequence.
@rst
This attribute provides the zero-based index of the tree represented by the
current state of the struct within the parent tree sequence. For example,
immediately after we call ``tsk_tree_first(&tree)``, ``tree.index`` will
be zero, and after we call ``tsk_tree_last(&tree)``, ``tree.index`` will
be the number of trees - 1 (see :c:func:`tsk_treeseq_get_num_trees`)
When the tree is in the null state (immediately after initialisation,
or after, e.g., calling :c:func:`tsk_tree_prev` on the first tree)
the value of the ``index`` is -1.
@endrst
*/
tsk_id_t index;
/* Attributes below are private and should not be used in client code. */
tsk_size_t num_nodes;
tsk_flags_t options;
tsk_size_t root_threshold;
const tsk_id_t *samples;
/*
These are involved in the optional sample tracking; num_samples counts
all samples below a give node, and num_tracked_samples counts those
from a specific subset. By default sample counts are tracked and roots
maintained. If ``TSK_NO_SAMPLE_COUNTS`` is specified, then neither sample
counts or roots are available.
*/
tsk_size_t *num_samples;
tsk_size_t *num_tracked_samples;
/* These are for the optional sample list tracking. */
tsk_id_t *left_sample;
tsk_id_t *right_sample;
tsk_id_t *next_sample;
/* The sites on this tree */
const tsk_site_t *sites;
tsk_size_t sites_length;
/* Counters needed for next() and prev() transformations. */
int direction;
tsk_id_t left_index;
tsk_id_t right_index;
tsk_tree_position_t tree_pos;
} tsk_tree_t;
/****************************************************************************/
/* Tree sequence.*/
/****************************************************************************/
/**
@defgroup TREESEQ_API_GROUP Tree sequence API
@{
*/
/**
@brief Initialises the tree sequence based on the specified table collection.
@rst
This method will copy the supplied table collection unless :c:macro:`TSK_TAKE_OWNERSHIP`
is specified. The table collection will be checked for integrity and index maps built.
This must be called before any operations are performed on the tree sequence.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
If specified, TSK_TAKE_OWNERSHIP takes immediate ownership of the tables, regardless
of error conditions.
**Options**
- :c:macro:`TSK_TS_INIT_BUILD_INDEXES`
- :c:macro:`TSK_TAKE_OWNERSHIP` (applies to the table collection).
@endrst
@param self A pointer to an uninitialised tsk_table_collection_t object.
@param tables A pointer to a tsk_table_collection_t object.
@param options Allocation time options. See above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_init(
tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options);
/**
@brief Load a tree sequence from a file path.
@rst
Loads the data from the specified file into this tree sequence.
The tree sequence is also initialised.
The resources allocated must be freed using
:c:func:`tsk_treeseq_free` even in error conditions.
Works similarly to :c:func:`tsk_table_collection_load` please see
that function's documentation for details and options.
**Examples**
.. code-block:: c
int ret;
tsk_treeseq_t ts;
ret = tsk_treeseq_load(&ts, "data.trees", 0);
if (ret != 0) {
fprintf(stderr, "Load error:%s\n", tsk_strerror(ret));
exit(EXIT_FAILURE);
}
@endrst
@param self A pointer to an uninitialised tsk_treeseq_t object
@param filename A NULL terminated string containing the filename.
@param options Bitwise options. See above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_load(tsk_treeseq_t *self, const char *filename, tsk_flags_t options);
/**
@brief Load a tree sequence from a stream.
@rst
Loads a tree sequence from the specified file stream. The tree sequence
is also initialised. The resources allocated must be freed using
:c:func:`tsk_treeseq_free` even in error conditions.
Works similarly to :c:func:`tsk_table_collection_loadf` please
see that function's documentation for details and options.
@endrst
@param self A pointer to an uninitialised tsk_treeseq_t object.
@param file A FILE stream opened in an appropriate mode for reading (e.g.
"r", "r+" or "w+") positioned at the beginning of a tree sequence
definition.
@param options Bitwise options. See above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_loadf(tsk_treeseq_t *self, FILE *file, tsk_flags_t options);
/**
@brief Write a tree sequence to file.
@rst
Writes the data from this tree sequence to the specified file.
If an error occurs the file path is deleted, ensuring that only complete
and well formed files will be written.
@endrst
@param self A pointer to an initialised tsk_treeseq_t object.
@param filename A NULL terminated string containing the filename.
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_dump(
const tsk_treeseq_t *self, const char *filename, tsk_flags_t options);
/**
@brief Write a tree sequence to a stream.
@rst
Writes the data from this tree sequence to the specified FILE stream.
Semantics are identical to :c:func:`tsk_treeseq_dump`.
Please see the :ref:`sec_c_api_examples_file_streaming` section for an example
of how to sequentially dump and load tree sequences from a stream.
@endrst
@param self A pointer to an initialised tsk_treeseq_t object.
@param file A FILE stream opened in an appropriate mode for writing (e.g.
"w", "a", "r+" or "w+").
@param options Bitwise options. Currently unused; should be
set to zero to ensure compatibility with later versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_dumpf(const tsk_treeseq_t *self, FILE *file, tsk_flags_t options);
/**
@brief Copies the state of the table collection underlying this tree sequence
into the specified destination table collection.
@rst
By default the method initialises the specified destination table collection. If the
destination is already initialised, the :c:macro:`TSK_NO_INIT` option should
be supplied to avoid leaking memory.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param tables A pointer to a tsk_table_collection_t object. If the TSK_NO_INIT
option is specified, this must be an initialised table collection. If not, it must be an
uninitialised table collection.
@param options Bitwise option flags.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_copy_tables(
const tsk_treeseq_t *self, tsk_table_collection_t *tables, tsk_flags_t options);
/**
@brief Free the internal memory for the specified tree sequence.
@param self A pointer to an initialised tsk_treeseq_t object.
@return Always returns 0.
*/
int tsk_treeseq_free(tsk_treeseq_t *self);
/**
@brief Print out the state of this tree sequence to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_treeseq_t object.
@param out The stream to write the summary to.
*/
void tsk_treeseq_print_state(const tsk_treeseq_t *self, FILE *out);
/**
@brief Get the number of nodes
@rst
Returns the number of nodes in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of nodes.
*/
tsk_size_t tsk_treeseq_get_num_nodes(const tsk_treeseq_t *self);
/**
@brief Get the number of edges
@rst
Returns the number of edges in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of edges.
*/
tsk_size_t tsk_treeseq_get_num_edges(const tsk_treeseq_t *self);
/**
@brief Get the number of migrations
@rst
Returns the number of migrations in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of migrations.
*/
tsk_size_t tsk_treeseq_get_num_migrations(const tsk_treeseq_t *self);
/**
@brief Get the number of sites
@rst
Returns the number of sites in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of sites.
*/
tsk_size_t tsk_treeseq_get_num_sites(const tsk_treeseq_t *self);
/**
@brief Get the number of mutations
@rst
Returns the number of mutations in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of mutations.
*/
tsk_size_t tsk_treeseq_get_num_mutations(const tsk_treeseq_t *self);
/**
@brief Get the number of provenances
@rst
Returns the number of provenances in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of provenances.
*/
tsk_size_t tsk_treeseq_get_num_provenances(const tsk_treeseq_t *self);
/**
@brief Get the number of populations
@rst
Returns the number of populations in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of populations.
*/
tsk_size_t tsk_treeseq_get_num_populations(const tsk_treeseq_t *self);
/**
@brief Get the number of individuals
@rst
Returns the number of individuals in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of individuals.
*/
tsk_size_t tsk_treeseq_get_num_individuals(const tsk_treeseq_t *self);
/**
@brief Return the number of trees in this tree sequence.
@rst
This is a constant time operation.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return The number of trees in the tree sequence.
*/
tsk_size_t tsk_treeseq_get_num_trees(const tsk_treeseq_t *self);
/**
@brief Get the number of samples
@rst
Returns the number of nodes marked as samples in this tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the number of samples.
*/
tsk_size_t tsk_treeseq_get_num_samples(const tsk_treeseq_t *self);
/**
@brief Get the top-level tree sequence metadata.
@rst
Returns a pointer to the metadata string, which is owned by the tree sequence and
not null-terminated.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns a pointer to the metadata.
*/
const char *tsk_treeseq_get_metadata(const tsk_treeseq_t *self);
/**
@brief Get the length of top-level tree sequence metadata
@rst
Returns the length of the metadata string.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the length of the metadata.
*/
tsk_size_t tsk_treeseq_get_metadata_length(const tsk_treeseq_t *self);
/**
@brief Get the top-level tree sequence metadata schema.
@rst
Returns a pointer to the metadata schema string, which is owned by the tree sequence and
not null-terminated.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns a pointer to the metadata schema.
*/
const char *tsk_treeseq_get_metadata_schema(const tsk_treeseq_t *self);
/**
@brief Get the length of the top-level tree sequence metadata schema.
@rst
Returns the length of the metadata schema string.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the length of the metadata schema.
*/
tsk_size_t tsk_treeseq_get_metadata_schema_length(const tsk_treeseq_t *self);
/**
@brief Get the time units string
@rst
Returns a pointer to the time units string, which is owned by the tree sequence and
not null-terminated.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns a pointer to the time units.
*/
const char *tsk_treeseq_get_time_units(const tsk_treeseq_t *self);
/**
@brief Get the length of time units string
@rst
Returns the length of the time units string.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the length of the time units.
*/
tsk_size_t tsk_treeseq_get_time_units_length(const tsk_treeseq_t *self);
/**
@brief Get the file uuid
@rst
Returns a pointer to the null-terminated file uuid string, which is owned by the tree
sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns a pointer to the null-terminated file uuid.
*/
const char *tsk_treeseq_get_file_uuid(const tsk_treeseq_t *self);
/**
@brief Get the sequence length
@rst
Returns the sequence length of this tree sequence
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the sequence length.
*/
double tsk_treeseq_get_sequence_length(const tsk_treeseq_t *self);
/**
@brief Get the breakpoints
@rst
Returns an array of breakpoint locations, the array is owned by the tree sequence.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the pointer to the breakpoint array.
*/
const double *tsk_treeseq_get_breakpoints(const tsk_treeseq_t *self);
/**
@brief Get the samples
@rst
Returns an array of ids of sample nodes in this tree sequence.
I.e. nodes that have the :c:macro:`TSK_NODE_IS_SAMPLE` flag set.
The array is owned by the tree sequence and should not be modified or free'd.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the pointer to the sample node id array.
*/
const tsk_id_t *tsk_treeseq_get_samples(const tsk_treeseq_t *self);
/**
@brief Get the map of node id to sample index
@rst
Returns the location of each node in the list of samples or
:c:macro:`TSK_NULL` for nodes that are not samples.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the pointer to the array of sample indexes.
*/
const tsk_id_t *tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self);
/**
@brief Check if a node is a sample
@rst
Returns the sample status of a given node id.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param u The id of the node to be checked.
@return Returns true if the node is a sample.
*/
bool tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u);
/**
@brief Get the discrete genome status
@rst
If all the genomic locations in the tree sequence are discrete integer values
then this flag will be true.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns true if all genomic locations are discrete.
*/
bool tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self);
/**
@brief Get the discrete time status
@rst
If all times in the tree sequence are discrete integer values
then this flag will be true
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns true if all times are discrete.
*/
bool tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self);
/**
@brief Get the min time in node table and mutation table
@rst
The times stored in both the node and mutation tables are considered.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the min time of all nodes and mutations.
*/
double tsk_treeseq_get_min_time(const tsk_treeseq_t *self);
/**
@brief Get the max time in node table and mutation table
@rst
The times stored in both the node and mutation tables are considered.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@return Returns the max time of all nodes and mutations.
*/
double tsk_treeseq_get_max_time(const tsk_treeseq_t *self);
/**
@brief Get a node by its index
@rst
Copies a node from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The node index to copy
@param node A pointer to a tsk_node_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node);
/**
@brief Get a edge by its index
@rst
Copies a edge from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The edge index to copy
@param edge A pointer to a tsk_edge_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge);
/**
@brief Get a edge by its index
@rst
Copies a migration from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The migration index to copy
@param migration A pointer to a tsk_migration_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_migration(
const tsk_treeseq_t *self, tsk_id_t index, tsk_migration_t *migration);
/**
@brief Get a site by its index
@rst
Copies a site from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The site index to copy
@param site A pointer to a tsk_site_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_site(const tsk_treeseq_t *self, tsk_id_t index, tsk_site_t *site);
/**
@brief Get a mutation by its index
@rst
Copies a mutation from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The mutation index to copy
@param mutation A pointer to a tsk_mutation_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_mutation(
const tsk_treeseq_t *self, tsk_id_t index, tsk_mutation_t *mutation);
/**
@brief Get a provenance by its index
@rst
Copies a provenance from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The provenance index to copy
@param provenance A pointer to a tsk_provenance_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_provenance(
const tsk_treeseq_t *self, tsk_id_t index, tsk_provenance_t *provenance);
/**
@brief Get a population by its index
@rst
Copies a population from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The population index to copy
@param population A pointer to a tsk_population_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_population(
const tsk_treeseq_t *self, tsk_id_t index, tsk_population_t *population);
/**
@brief Get a individual by its index
@rst
Copies a individual from this tree sequence to the specified destination.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param index The individual index to copy
@param individual A pointer to a tsk_individual_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_get_individual(
const tsk_treeseq_t *self, tsk_id_t index, tsk_individual_t *individual);
/**
@brief Create a simplified instance of this tree sequence
@rst
Copies this tree sequence to the specified destination and performs simplification.
The destination tree sequence should be uninitialised.
Simplification transforms the tables to remove redundancy and canonicalise
tree sequence data. See the :ref:`simplification ` tutorial for
more details.
For full details and flags see :c:func:`tsk_table_collection_simplify` which performs
the same operation in place.
@endrst
@param self A pointer to a uninitialised tsk_treeseq_t object.
@param samples Either NULL or an array of num_samples distinct and valid node IDs.
If non-null the nodes in this array will be marked as samples in the output.
If NULL, the num_samples parameter is ignored and the samples in the output
will be the same as the samples in the input. This is equivalent to populating
the samples array with all of the sample nodes in the input in increasing
order of ID.
@param num_samples The number of node IDs in the input samples array. Ignored
if the samples array is NULL.
@param options Simplify options; see above for the available bitwise flags.
For the default behaviour, a value of 0 should be provided.
@param output A pointer to an uninitialised tsk_treeseq_t object.
@param node_map If not NULL, this array will be filled to define the mapping
between nodes IDs in the table collection before and after simplification.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_simplify(const tsk_treeseq_t *self, const tsk_id_t *samples,
tsk_size_t num_samples, tsk_flags_t options, tsk_treeseq_t *output,
tsk_id_t *node_map);
/**
@brief Extends haplotypes
Returns a new tree sequence in which the span covered by ancestral nodes
is "extended" to regions of the genome according to the following rule:
If an ancestral segment corresponding to node `n` has ancestor `p` and
descendant `c` on some portion of the genome, and on an adjacent segment of
genome `p` is still an ancestor of `c`, then `n` is inserted into the
path from `p` to `c`. For instance, if `p` is the parent of `n` and `n`
is the parent of `c`, then the span of the edges from `p` to `n` and
`n` to `c` are extended, and the span of the edge from `p` to `c` is
reduced. However, any edges whose child node is a sample are not
modified. See Fritze et al. (2025):
https://doi.org/10.1093/genetics/iyaf198 for more details.
The method works by iterating over the genome to look for edges that can
be extended in this way; the maximum number of such iterations is
controlled by ``max_iter``.
The `node` of certain mutations may also be remapped; to do this
unambiguously we need to know mutation times. If mutations times are unknown,
use `tsk_table_collection_compute_mutation_times` first.
The method will not affect any tables except the edge table, or the node
column in the mutation table.
@rst
**Options**: None currently defined.
@endrst
@param self A pointer to a tsk_treeseq_t object.
@param max_iter The maximum number of iterations over the tree sequence.
@param options Bitwise option flags. (UNUSED)
@param output A pointer to an uninitialised tsk_treeseq_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_extend_haplotypes(
const tsk_treeseq_t *self, int max_iter, tsk_flags_t options, tsk_treeseq_t *output);
/** @} */
int tsk_treeseq_split_edges(const tsk_treeseq_t *self, double time, tsk_flags_t flags,
tsk_id_t population, const char *metadata, tsk_size_t metadata_length,
tsk_flags_t options, tsk_treeseq_t *output);
bool tsk_treeseq_has_reference_sequence(const tsk_treeseq_t *self);
/**
@brief Decode full-length alignments for specified nodes over an interval.
@rst
Fills a caller-provided buffer with per-node sequence alignments for the interval
``[left, right)``. Each row is exactly ``L = right - left`` bytes with no trailing
terminator, and rows are tightly packed in row-major order in the output buffer.
The output at non-site positions comes from the provided ``ref_seq`` slice
(``ref_seq[left:right]``); per-site alleles are overlaid onto this for each node.
If the :c:macro:`TSK_ISOLATED_NOT_MISSING` option is
not set, nodes that are isolated (no parent and no children) within a tree
interval in ``[left, right)`` are rendered as the ``missing_data_character`` for
that interval. At site positions, decoded genotypes override any previous value;
if a genotype is missing (``TSK_MISSING_DATA``), the ``missing_data_character`` is
overlaid onto the reference base.
Requirements and validation:
- The tree sequence must have a discrete genome.
- ``left`` and ``right`` must be integers with ``0 <= left < right <= sequence_length``.
- ``ref_seq`` must be non-NULL and ``ref_seq_length == sequence_length``.
- Each allele at a site must be exactly one byte; alleles equal to
``missing_data_character`` are not permitted.
@endrst
@param self A pointer to a :c:type:`tsk_treeseq_t` object.
@param ref_seq Pointer to a reference sequence buffer of length ``ref_seq_length``.
@param ref_seq_length The total length of ``ref_seq``; must equal the tree sequence
length.
@param nodes Array of node IDs to decode (may include non-samples).
@param num_nodes The number of nodes in ``nodes`` and rows in the output.
@param left The inclusive-left genomic coordinate of the output interval.
@param right The exclusive-right genomic coordinate of the output interval.
@param missing_data_character The byte to use for missing data.
@param alignments_out Output buffer of size at least ``num_nodes * (right - left)``.
@param options Bitwise option flags; supports :c:macro:`TSK_ISOLATED_NOT_MISSING`.
@return Return 0 on success or a negative value on failure.
*/
int tsk_treeseq_decode_alignments(const tsk_treeseq_t *self, const char *ref_seq,
tsk_size_t ref_seq_length, const tsk_id_t *nodes, tsk_size_t num_nodes, double left,
double right, char missing_data_character, char *alignments_out,
tsk_flags_t options);
int tsk_treeseq_get_individuals_population(const tsk_treeseq_t *self, tsk_id_t *output);
int tsk_treeseq_get_individuals_time(const tsk_treeseq_t *self, double *output);
int tsk_treeseq_kc_distance(const tsk_treeseq_t *self, const tsk_treeseq_t *other,
double lambda_, double *result);
int tsk_treeseq_genealogical_nearest_neighbours(const tsk_treeseq_t *self,
const tsk_id_t *focal, tsk_size_t num_focal, const tsk_id_t *const *reference_sets,
const tsk_size_t *reference_set_size, tsk_size_t num_reference_sets,
tsk_flags_t options, double *ret_array);
int tsk_treeseq_mean_descendants(const tsk_treeseq_t *self,
const tsk_id_t *const *reference_sets, const tsk_size_t *reference_set_size,
tsk_size_t num_reference_sets, tsk_flags_t options, double *ret_array);
typedef int general_stat_func_t(tsk_size_t state_dim, const double *state,
tsk_size_t result_dim, double *result, void *params);
int tsk_treeseq_general_stat(const tsk_treeseq_t *self, tsk_size_t K, const double *W,
tsk_size_t M, general_stat_func_t *f, void *f_params, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
typedef int norm_func_t(tsk_size_t result_dim, const double *hap_weights, tsk_size_t n_a,
tsk_size_t n_b, double *result, void *params);
int tsk_treeseq_two_locus_count_stat(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t result_dim, const tsk_id_t *set_indexes,
general_stat_func_t *f, norm_func_t *norm_f, tsk_size_t out_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t out_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
/* One way weighted stats */
typedef int one_way_weighted_method(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result);
int tsk_treeseq_trait_covariance(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result);
int tsk_treeseq_trait_correlation(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result);
/* One way weighted stats with covariates */
typedef int one_way_covariates_method(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_covariates, const double *covariates,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_trait_linear_model(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_covariates, const double *covariates,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
/* Two way weighted stats with covariates */
typedef int two_way_weighted_method(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_index_tuples, const tsk_id_t *index_tuples,
tsk_size_t num_windows, const double *windows, double *result, tsk_flags_t options);
int tsk_treeseq_genetic_relatedness_weighted(const tsk_treeseq_t *self,
tsk_size_t num_weights, const double *weights, tsk_size_t num_index_tuples,
const tsk_id_t *index_tuples, tsk_size_t num_windows, const double *windows,
double *result, tsk_flags_t options);
/* One way weighted stats with vector output */
typedef int weighted_vector_method(const tsk_treeseq_t *self, tsk_size_t num_weights,
const double *weights, tsk_size_t num_windows, const double *windows,
tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes, double *result,
tsk_flags_t options);
int tsk_treeseq_genetic_relatedness_vector(const tsk_treeseq_t *self,
tsk_size_t num_weights, const double *weights, tsk_size_t num_windows,
const double *windows, tsk_size_t num_focal_nodes, const tsk_id_t *focal_nodes,
double *result, tsk_flags_t options);
/* One way sample set stats */
typedef int one_way_sample_stat_method(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result);
int tsk_treeseq_diversity(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_segregating_sites(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_Y1(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_allele_frequency_spectrum(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_windows, const double *windows,
tsk_size_t num_time_windows, const double *time_windows, tsk_flags_t options,
double *result);
typedef int general_sample_stat_method(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_indexes, const tsk_id_t *indexes,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
typedef int two_locus_count_stat_method(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_rows, const tsk_id_t *row_sites,
const double *row_positions, tsk_size_t num_cols, const tsk_id_t *col_sites,
const double *col_positions, tsk_flags_t options, double *result);
int tsk_treeseq_D(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_D2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_r2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_D_prime(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_r(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_Dz(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_pi2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_D2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_Dz_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_pi2_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
typedef int k_way_two_locus_count_stat_method(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_index_tuples,
const tsk_id_t *index_tuples, tsk_size_t num_rows, const tsk_id_t *row_sites,
const double *row_positions, tsk_size_t num_cols, const tsk_id_t *col_sites,
const double *col_positions, tsk_flags_t options, double *result);
/* Two way sample set stats */
int tsk_treeseq_divergence(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_Y2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_f2(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_genetic_relatedness(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_index_tuples,
const tsk_id_t *index_tuples, tsk_size_t num_windows, const double *windows,
tsk_flags_t options, double *result);
int tsk_treeseq_D2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_D2_ij_unbiased(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
int tsk_treeseq_r2_ij(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_rows,
const tsk_id_t *row_sites, const double *row_positions, tsk_size_t num_cols,
const tsk_id_t *col_sites, const double *col_positions, tsk_flags_t options,
double *result);
/* Three way sample set stats */
int tsk_treeseq_Y3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_f3(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
/* Four way sample set stats */
int tsk_treeseq_f4(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_index_tuples, const tsk_id_t *index_tuples, tsk_size_t num_windows,
const double *windows, tsk_flags_t options, double *result);
int tsk_treeseq_divergence_matrix(const tsk_treeseq_t *self, tsk_size_t num_sample_sets,
const tsk_size_t *sample_set_sizes, const tsk_id_t *sample_sets,
tsk_size_t num_windows, const double *windows, tsk_flags_t options, double *result);
/* Coalescence rates */
typedef int pair_coalescence_stat_func_t(tsk_size_t input_dim, const double *atoms,
const double *weights, tsk_size_t result_dim, double *result, void *params);
int tsk_treeseq_pair_coalescence_stat(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,
const tsk_id_t *node_bin_map, pair_coalescence_stat_func_t *summary_func,
tsk_size_t summary_func_dim, void *summary_func_args, tsk_flags_t options,
double *result);
int tsk_treeseq_pair_coalescence_counts(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,
const tsk_id_t *node_bin_map, tsk_flags_t options, double *result);
int tsk_treeseq_pair_coalescence_quantiles(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_windows, const double *windows, tsk_size_t num_bins,
const tsk_id_t *node_bin_map, tsk_size_t num_quantiles, double *quantiles,
tsk_flags_t options, double *result);
int tsk_treeseq_pair_coalescence_rates(const tsk_treeseq_t *self,
tsk_size_t num_sample_sets, const tsk_size_t *sample_set_sizes,
const tsk_id_t *sample_sets, tsk_size_t num_set_indexes, const tsk_id_t *set_indexes,
tsk_size_t num_windows, const double *windows, tsk_size_t num_time_windows,
const tsk_id_t *node_time_window, double *time_windows, tsk_flags_t options,
double *result);
/****************************************************************************/
/* Tree */
/****************************************************************************/
/**
@defgroup TREE_API_LIFECYCLE_GROUP Tree lifecycle
@{
*/
/**
@brief Initialises the tree by allocating internal memory and associating
with the specified tree sequence.
@rst
This must be called before any operations are performed on the tree.
The specified tree sequence object must be initialised, and must be
valid for the full lifetime of this tree.
See the :ref:`sec_c_api_overview_structure` for details on how objects
are initialised and freed.
The ``options`` parameter is provided to support future expansions
of the API. A number of undocumented internal features are controlled
via this parameter, and it **must** be set to 0 to ensure that operations
work as expected and for compatibility with future versions of tskit.
@endrst
@param self A pointer to an uninitialised tsk_tree_t object.
@param tree_sequence A pointer to an initialised tsk_treeseq_t object.
@param options Allocation time options. Must be 0, or behaviour is undefined.
@return Return 0 on success or a negative value on failure.
*/
int tsk_tree_init(
tsk_tree_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options);
/**
@brief Free the internal memory for the specified tree.
@param self A pointer to an initialised tsk_tree_t object.
@return Always returns 0.
*/
int tsk_tree_free(tsk_tree_t *self);
/**
@brief Copies the state of this tree into the specified destination.
@rst
By default (``options`` = 0) the method initialises the specified destination
tree by calling :c:func:`tsk_tree_init`. If the destination is already
initialised, the :c:macro:`TSK_NO_INIT` option should be supplied to avoid
leaking memory. If :c:macro:`TSK_NO_INIT` is supplied and the tree sequence associated
with the ``dest`` tree is not equal to the tree sequence associated
with ``self``, an error is raised.
The destination tree will keep a reference to the tree sequence object
associated with the source tree, and this tree sequence must be
valid for the full lifetime of the destination tree.
**Options**
- :c:macro:`TSK_NO_INIT`
If :c:macro:`TSK_NO_INIT` is not specified, options for :c:func:`tsk_tree_init`
can be provided and will be passed on.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@param dest A pointer to a tsk_tree_t object. If the TSK_NO_INIT option
is specified, this must be an initialised tree. If not, it must
be an uninitialised tree.
@param options Copy and allocation time options. See the notes above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_tree_copy(const tsk_tree_t *self, tsk_tree_t *dest, tsk_flags_t options);
/** @} */
/**
@defgroup TREE_API_SEEKING_GROUP Seeking along the sequence
@{
*/
/** @brief Option to seek by skipping to the target tree, adding and removing as few
edges as possible. If not specified, a linear time algorithm is used instead.
@ingroup TREE_API_SEEKING_GROUP
*/
#define TSK_SEEK_SKIP (1 << 0)
/**
@brief Seek to the first tree in the sequence.
@rst
Set the state of this tree to reflect the first tree in parent
tree sequence.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Return TSK_TREE_OK on success; or a negative value if an error occurs.
*/
int tsk_tree_first(tsk_tree_t *self);
/**
@brief Seek to the last tree in the sequence.
@rst
Set the state of this tree to reflect the last tree in parent
tree sequence.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Return TSK_TREE_OK on success; or a negative value if an error occurs.
*/
int tsk_tree_last(tsk_tree_t *self);
/**
@brief Seek to the next tree in the sequence.
@rst
Set the state of this tree to reflect the next tree in parent
tree sequence. If the index of the current tree is ``j``,
then the after this operation the index will be ``j + 1``.
Calling :c:func:`tsk_tree_next` a tree in the
:ref:`null state` is equivalent to calling
:c:func:`tsk_tree_first`.
Calling :c:func:`tsk_tree_next` on the last tree in the
sequence will transform it into the
:ref:`null state` (equivalent to
calling :c:func:`tsk_tree_clear`).
Please see the :ref:`sec_c_api_examples_tree_iteration` examples for
recommended usage.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Return TSK_TREE_OK on successfully transforming to a
non-null tree; 0 on successfully transforming into the null
tree; or a negative value if an error occurs.
*/
int tsk_tree_next(tsk_tree_t *self);
/**
@brief Seek to the previous tree in the sequence.
@rst
Set the state of this tree to reflect the previous tree in parent
tree sequence. If the index of the current tree is ``j``,
then the after this operation the index will be ``j - 1``.
Calling :c:func:`tsk_tree_prev` a tree in the
:ref:`null state` is equivalent to calling
:c:func:`tsk_tree_last`.
Calling :c:func:`tsk_tree_prev` on the first tree in the
sequence will transform it into the
:ref:`null state` (equivalent to
calling :c:func:`tsk_tree_clear`).
Please see the :ref:`sec_c_api_examples_tree_iteration` examples for
recommended usage.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Return TSK_TREE_OK on successfully transforming to a
non-null tree; 0 on successfully transforming into the null
tree; or a negative value if an error occurs.
*/
int tsk_tree_prev(tsk_tree_t *self);
/**
@brief Set the tree into the null state.
@rst
Transform this tree into the :ref:`null state`.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Return 0 on success or a negative value on failure.
*/
int tsk_tree_clear(tsk_tree_t *self);
/**
@brief Seek to a particular position on the genome.
@rst
Set the state of this tree to reflect the tree in parent
tree sequence covering the specified ``position``. That is, on success
we will have ``tree.interval.left <= position`` and
we will have ``position < tree.interval.right``.
Seeking to a position currently covered by the tree is
a constant time operation.
Seeking to a position from a non-null tree uses a linear time
algorithm by default, unless the option :c:macro:`TSK_SEEK_SKIP`
is specified. In this case, a faster algorithm is employed which skips
to the target tree by removing and adding the minimal number of edges
possible. However, this approach does not guarantee that edges are
inserted and removed in time-sorted order.
.. warning:: Using the :c:macro:`TSK_SEEK_SKIP` option
may lead to edges not being inserted or removed in time-sorted order.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@param position The position in genome coordinates
@param options Seek options. See the notes above for details.
@return Return 0 on success or a negative value on failure.
*/
int tsk_tree_seek(tsk_tree_t *self, double position, tsk_flags_t options);
/**
@brief Seek to a specific tree in a tree sequence.
@rst
Set the state of this tree to reflect the tree in parent
tree sequence whose index is ``0 <= tree < num_trees``.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@param tree The target tree index.
@param options Seek options. Currently unused. Set to 0 for compatibility
with future versions of tskit.
@return Return 0 on success or a negative value on failure.
*/
int tsk_tree_seek_index(tsk_tree_t *self, tsk_id_t tree, tsk_flags_t options);
/** @} */
/**
@defgroup TREE_API_TREE_QUERY_GROUP Tree Queries
@{
*/
/**
@brief Returns the number of roots in this tree.
@rst
See the :ref:`sec_data_model_tree_roots` section for more information
on how the roots of a tree are defined.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Returns the number roots in this tree.
*/
tsk_size_t tsk_tree_get_num_roots(const tsk_tree_t *self);
/**
@brief Returns the leftmost root in this tree.
@rst
See the :ref:`sec_data_model_tree_roots` section for more information
on how the roots of a tree are defined.
This function is equivalent to ``tree.left_child[tree.virtual_root]``.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Returns the leftmost root in the tree.
*/
tsk_id_t tsk_tree_get_left_root(const tsk_tree_t *self);
/**
@brief Returns the rightmost root in this tree.
@rst
See the :ref:`sec_data_model_tree_roots` section for more information
on how the roots of a tree are defined.
This function is equivalent to ``tree.right_child[tree.virtual_root]``.
@endrst
@param self A pointer to an initialised tsk_tree_t object.
@return Returns the rightmost root in the tree.
*/
tsk_id_t tsk_tree_get_right_root(const tsk_tree_t *self);
/**
@brief Get the list of sites for this tree.
@rst
Gets the list of :c:data:`tsk_site_t` objects in the parent tree sequence
for which the position lies within this tree's genomic interval.
The memory pointed to by the ``sites`` parameter is managed by the
``tsk_tree_t`` object and must not be altered or freed by client code.
.. code-block:: c
static void
print_sites(const tsk_tree_t *tree)
{
int ret;
tsk_size_t j, num_sites;
const tsk_site_t *sites;
ret = tsk_tree_get_sites(tree, &sites, &num_sites);
check_tsk_error(ret);
for (j = 0; j < num_sites; j++) {
printf("position = %f\n", sites[j].position);
}
}
This is a constant time operation.
@endrst
@param self A pointer to a tsk_tree_t object.
@param sites The destination pointer for the list of sites.
@param sites_length A pointer to a tsk_size_t value in which the number
of sites is stored.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_sites(
const tsk_tree_t *self, const tsk_site_t **sites, tsk_size_t *sites_length);
/**
@brief Return an upper bound on the number of nodes reachable
from the roots of this tree.
@rst
This function provides an upper bound on the number of nodes that
can be reached in tree traversals, and is intended to be used
for memory allocation purposes. If ``num_nodes`` is the number
of nodes visited in a tree traversal from the
:ref:`virtual root`
(e.g., ``tsk_tree_preorder_from(tree, tree->virtual_root, nodes,
&num_nodes)``), the bound ``N`` returned here is guaranteed to
be greater than or equal to ``num_nodes``.
.. warning:: The precise value returned is not defined and should
not be depended on, as it may change from version-to-version.
@endrst
@param self A pointer to a tsk_tree_t object.
@return An upper bound on the number nodes reachable from the roots
of this tree, or zero if this tree has not been initialised.
*/
tsk_size_t tsk_tree_get_size_bound(const tsk_tree_t *self);
/**
@brief Print out the state of this tree to the specified stream.
This method is intended for debugging purposes and should not be used
in production code. The format of the output should **not** be depended
on and may change arbitrarily between versions.
@param self A pointer to a tsk_tree_t object.
@param out The stream to write the summary to.
*/
void tsk_tree_print_state(const tsk_tree_t *self, FILE *out);
/** @} */
/**
@defgroup TREE_API_NODE_QUERY_GROUP Node Queries
@{
*/
/**
@brief Returns the parent of the specified node.
@rst
Equivalent to ``tree.parent[u]`` with bounds checking for the node u.
Performance sensitive code which can guarantee that the node u is
valid should use the direct array access in preference to this method.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The tree node.
@param parent A tsk_id_t pointer to store the returned parent node.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_parent(const tsk_tree_t *self, tsk_id_t u, tsk_id_t *parent);
/**
@brief Returns the time of the specified node.
@rst
Equivalent to ``tables->nodes.time[u]`` with bounds checking for the node u.
Performance sensitive code which can guarantee that the node u is
valid should use the direct array access in preference to this method,
for example:
.. code-block:: c
static void
print_times(const tsk_tree_t *tree)
{
int ret;
tsk_size_t num_nodes, j;
const double *node_time = tree->tree_sequence->tables->nodes.time;
tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));
if (nodes == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
ret = tsk_tree_preorder(tree, nodes, &num_nodes);
check_tsk_error(ret);
for (j = 0; j < num_nodes; j++) {
printf("time = %f\n", node_time[nodes[j]]);
}
free(nodes);
}
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The tree node.
@param ret_time A double pointer to store the returned node time.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_time(const tsk_tree_t *self, tsk_id_t u, double *ret_time);
/**
@brief Return number of nodes on the path from the specified node to root.
@rst
Return the number of nodes on the path from u to root, not including u.
The depth of a root is therefore zero.
As a special case, the depth of the
:ref:`virtual root ` is defined as -1.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The tree node.
@param ret_depth An int pointer to store the returned node depth.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_depth(const tsk_tree_t *self, tsk_id_t u, int *ret_depth);
/**
@brief Return the length of the branch ancestral to the specified node.
@rst
Return the length of the branch ancestral to the specified node.
Branch length is defined as difference between the time
of a node and its parent. The branch length of a root is zero.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The tree node.
@param ret_branch_length A double pointer to store the returned branch length.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_branch_length(
const tsk_tree_t *self, tsk_id_t u, double *ret_branch_length);
/**
@brief Computes the sum of the lengths of all branches reachable from
the specified node, or from all roots if ``u=TSK_NULL``.
@rst
Return the total branch length in a particular subtree or of the
entire tree. If the specified node is :c:macro:`TSK_NULL` (or the
:ref:`virtual root`)
the sum of the lengths of all branches reachable from roots
is returned. Branch length is defined as difference between the time
of a node and its parent. The branch length of a root is zero.
Note that if the specified node is internal its branch length is
*not* included, so that, e.g., the total branch length of a
leaf node is zero.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The root of the subtree of interest, or ``TSK_NULL`` to return the
total branch length of the tree.
@param ret_tbl A double pointer to store the returned total branch length.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_total_branch_length(
const tsk_tree_t *self, tsk_id_t u, double *ret_tbl);
/**
@brief Counts the number of samples in the subtree rooted at a node.
@rst
Returns the number of samples descending from a particular node,
including the node itself.
This is a constant time operation.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The tree node.
@param ret_num_samples A tsk_size_t pointer to store the returned
number of samples.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_num_samples(
const tsk_tree_t *self, tsk_id_t u, tsk_size_t *ret_num_samples);
/**
@brief Compute the most recent common ancestor of two nodes.
@rst
If two nodes do not share a common ancestor in the current tree, the MRCA
node is :c:macro:`TSK_NULL`.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u A tree node.
@param v A tree node.
@param mrca A tsk_id_t pointer to store the returned most recent common ancestor node.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_get_mrca(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v, tsk_id_t *mrca);
/**
@brief Returns true if u is a descendant of v.
@rst
Returns true if u and v are both valid nodes in the tree sequence
and v lies on the path from u to root, and false otherwise.
Any node is a descendant of itself.
@endrst
@param self A pointer to a tsk_tree_t object.
@param u The descendant node.
@param v The ancestral node.
@return true if u is a descendant of v, and false otherwise.
*/
bool tsk_tree_is_descendant(const tsk_tree_t *self, tsk_id_t u, tsk_id_t v);
/** @} */
/**
@defgroup TREE_API_TRAVERSAL_GROUP Traversal orders.
@{
*/
/**
@brief Fill an array with the nodes of this tree in preorder.
@rst
Populate an array with the nodes in this tree in preorder. The array
must be pre-allocated and be sufficiently large to hold the array
of nodes visited. The recommended approach is to use the
:c:func:`tsk_tree_get_size_bound` function, as in the following example:
.. code-block:: c
static void
print_preorder(tsk_tree_t *tree)
{
int ret;
tsk_size_t num_nodes, j;
tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));
if (nodes == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
ret = tsk_tree_preorder(tree, nodes, &num_nodes);
check_tsk_error(ret);
for (j = 0; j < num_nodes; j++) {
printf("Visit preorder %lld\n", (long long) nodes[j]);
}
free(nodes);
}
.. seealso::
See the :ref:`sec_c_api_examples_tree_traversals` section for
more examples.
@endrst
@param self A pointer to a tsk_tree_t object.
@param nodes The tsk_id_t array to store nodes in. See notes above for
details.
@param num_nodes A pointer to a tsk_size_t value where we store the number
of nodes in the traversal.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_preorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes);
/**
@brief Fill an array with the nodes of this tree starting from a particular node.
@rst
As for :c:func:`tsk_tree_preorder` but starting the traversal at a particular node
(which will be the first node in the traversal list). The
:ref:`virtual root` is a valid input for this function
and will be treated like any other tree node. The value ``-1`` is a special case,
in which we visit all nodes reachable from the roots, and equivalent to
calling :c:func:`tsk_tree_preorder`.
See :c:func:`tsk_tree_preorder` for details the requirements for the ``nodes``
array.
@endrst
@param self A pointer to a tsk_tree_t object.
@param root The root of the subtree to traverse, or -1 to visit all nodes.
@param nodes The tsk_id_t array to store nodes in.
@param num_nodes A pointer to a tsk_size_t value where we store the number
of nodes in the traversal.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_preorder_from(
const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);
/**
@brief Fill an array with the nodes of this tree in postorder.
@rst
Populate an array with the nodes in this tree in postorder. The array
must be pre-allocated and be sufficiently large to hold the array
of nodes visited. The recommended approach is to use the
:c:func:`tsk_tree_get_size_bound` function, as in the following example:
.. code-block:: c
static void
print_postorder(tsk_tree_t *tree)
{
int ret;
tsk_size_t num_nodes, j;
tsk_id_t *nodes = malloc(tsk_tree_get_size_bound(tree) * sizeof(*nodes));
if (nodes == NULL) {
errx(EXIT_FAILURE, "Out of memory");
}
ret = tsk_tree_postorder(tree, nodes, &num_nodes);
check_tsk_error(ret);
for (j = 0; j < num_nodes; j++) {
printf("Visit postorder %lld\n", (long long) nodes[j]);
}
free(nodes);
}
.. seealso::
See the :ref:`sec_c_api_examples_tree_traversals` section for
more examples.
@endrst
@param self A pointer to a tsk_tree_t object.
@param nodes The tsk_id_t array to store nodes in. See notes above for
details.
@param num_nodes A pointer to a tsk_size_t value where we store the number
of nodes in the traversal.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_postorder(const tsk_tree_t *self, tsk_id_t *nodes, tsk_size_t *num_nodes);
/**
@brief Fill an array with the nodes of this tree starting from a particular node.
@rst
As for :c:func:`tsk_tree_postorder` but starting the traversal at a particular node
(which will be the last node in the traversal list). The
:ref:`virtual root` is a valid input for this function
and will be treated like any other tree node. The value ``-1`` is a special case,
in which we visit all nodes reachable from the roots, and equivalent to
calling :c:func:`tsk_tree_postorder`.
See :c:func:`tsk_tree_postorder` for details the requirements for the ``nodes``
array.
@endrst
@param self A pointer to a tsk_tree_t object.
@param root The root of the subtree to traverse, or -1 to visit all nodes.
@param nodes The tsk_id_t array to store nodes in. See
:c:func:`tsk_tree_postorder` for more details.
@param num_nodes A pointer to a tsk_size_t value where we store the number
of nodes in the traversal.
@return 0 on success or a negative value on failure.
*/
int tsk_tree_postorder_from(
const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);
/** @} */
/* Undocumented for now */
int tsk_tree_preorder_samples_from(
const tsk_tree_t *self, tsk_id_t root, tsk_id_t *nodes, tsk_size_t *num_nodes);
int tsk_tree_set_root_threshold(tsk_tree_t *self, tsk_size_t root_threshold);
tsk_size_t tsk_tree_get_root_threshold(const tsk_tree_t *self);
bool tsk_tree_has_sample_counts(const tsk_tree_t *self);
bool tsk_tree_has_sample_lists(const tsk_tree_t *self);
int tsk_tree_get_num_tracked_samples(
const tsk_tree_t *self, tsk_id_t u, tsk_size_t *num_tracked_samples);
int tsk_tree_set_tracked_samples(
tsk_tree_t *self, tsk_size_t num_tracked_samples, const tsk_id_t *tracked_samples);
int tsk_tree_track_descendant_samples(tsk_tree_t *self, tsk_id_t node);
typedef struct {
tsk_id_t node;
tsk_id_t parent;
int32_t state;
} tsk_state_transition_t;
int tsk_tree_map_mutations(tsk_tree_t *self, int32_t *genotypes, double *cost_matrix,
tsk_flags_t options, int32_t *ancestral_state, tsk_size_t *num_transitions,
tsk_state_transition_t **transitions);
int tsk_tree_kc_distance(
const tsk_tree_t *self, const tsk_tree_t *other, double lambda, double *result);
/* Don't document these balance metrics for now so it doesn't get in the way of
* C API 1.0, but should be straightforward to document based on Python docs. */
int tsk_tree_sackin_index(const tsk_tree_t *self, tsk_size_t *result);
int tsk_tree_colless_index(const tsk_tree_t *self, tsk_size_t *result);
int tsk_tree_b1_index(const tsk_tree_t *self, double *result);
/* NOTE: if we document this as part of the C API we'll have to be more careful
* about the error behaviour on bad log bases. At the moment we're just returning
* the resulting value which can be nan, inf etc, but some surprising results
* happen like a base 0 seems to return a finite value. */
int tsk_tree_b2_index(const tsk_tree_t *self, double base, double *result);
int tsk_tree_num_lineages(const tsk_tree_t *self, double t, tsk_size_t *result);
/* Things to consider removing: */
/* This is redundant, really */
bool tsk_tree_is_sample(const tsk_tree_t *self, tsk_id_t u);
/* Not terribly useful, since the definition is
* return (self->tree_sequence == other->tree_sequence) && (self->index == other->index)
* Remove?
*/
bool tsk_tree_equals(const tsk_tree_t *self, const tsk_tree_t *other);
int tsk_tree_position_init(
tsk_tree_position_t *self, const tsk_treeseq_t *tree_sequence, tsk_flags_t options);
int tsk_tree_position_free(tsk_tree_position_t *self);
int tsk_tree_position_print_state(const tsk_tree_position_t *self, FILE *out);
bool tsk_tree_position_next(tsk_tree_position_t *self);
bool tsk_tree_position_prev(tsk_tree_position_t *self);
int tsk_tree_position_seek_forward(tsk_tree_position_t *self, tsk_id_t index);
int tsk_tree_position_seek_backward(tsk_tree_position_t *self, tsk_id_t index);
#ifdef __cplusplus
}
#endif
#endif
================================================
FILE: c/tskit.h
================================================
/*
* MIT License
*
* Copyright (c) 2019-2024 Tskit Developers
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* @file tskit.h
* @brief Tskit API.
*/
#ifndef __TSKIT_H__
#define __TSKIT_H__
#include
#include
#include
#include
#include