Full Code of ryanlayer/samplot for AI

master 2929e4a90e54 cached

60 files

15.5 MB

83.6k tokens

133 symbols

1 requests

Download .txt

Showing preview only (333K chars total). Download the full file or copy to clipboard to get everything.

Repository: ryanlayer/samplot
Branch: master
Commit: 2929e4a90e54
Files: 60
Total size: 15.5 MB

Directory structure:
gitextract_qd67atv0/

├── .circleci/
│   ├── config.yml
│   └── setup.sh
├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
├── runtests.sh
├── samplot/
│   ├── __init__.py
│   ├── __main__.py
│   ├── samplot.py
│   ├── samplot_vcf.py
│   └── templates/
│       └── samplot_vcf.html
├── setup.py
├── ssshtest
└── test/
    ├── README.md
    ├── data/
    │   ├── 2_59305747-59505747_X_151018513-151218513.BND.bam
    │   ├── 2_59305747-59505747_X_151018513-151218513.BND.bam.bai
    │   ├── Alu.2_X.bed.gz.tbi
    │   ├── Alu.2_X.csionly.bed.gz.csi
    │   ├── HG002_10X.bam
    │   ├── HG002_10X.bam.bai
    │   ├── HG002_1_89475845-89478561_DEL.tenx.bam
    │   ├── HG002_1_89475845-89478561_DEL.tenx.bam.bai
    │   ├── HG002_Illumina.bam
    │   ├── HG002_Illumina.bam.bai
    │   ├── HG002_ONT.cram
    │   ├── HG002_ONT.cram.crai
    │   ├── HG002_PacBio.bam
    │   ├── HG002_PacBio.bam.bai
    │   ├── HG003_Illumina.bam
    │   ├── HG003_Illumina.bam.bai
    │   ├── HG004_Illumina.bam
    │   ├── HG004_Illumina.bam.bai
    │   ├── Homo_sapiens.GRCh37.82.sort.2_X.gff3.gz.tbi
    │   ├── Homo_sapiens.GRCh37.csionly.2_X.gff3.gz.csi
    │   ├── NA12878_restricted.bam
    │   ├── NA12878_restricted.bam.bai
    │   ├── NA12889_restricted.bam
    │   ├── NA12889_restricted.bam.bai
    │   ├── NA12890_restricted.bam
    │   ├── NA12890_restricted.bam.bai
    │   ├── README.md
    │   ├── commands.sh
    │   ├── examples.bed
    │   ├── examples_padded.bed
    │   ├── hg19_chr1_58343117_58343622_deletion.bam
    │   ├── hg19_chr1_58343117_58343622_deletion.bam.bai
    │   ├── hg19_chr21_27373431_27375410_inversion.bam
    │   ├── hg19_chr21_27373431_27375410_inversion.bam.bai
    │   ├── nanopore-NA12878.bam
    │   ├── nanopore-NA12878.bam.bai
    │   ├── subset_alignments.sh
    │   ├── test.ped
    │   ├── test.vcf
    │   ├── test_site/
    │   │   ├── README.md
    │   │   └── index.html
    │   └── test_site_cmds.sh
    ├── func/
    │   ├── samplot_test.sh
    │   └── samplot_vcf_test.sh
    └── unit/
        └── samplot_test.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .circleci/config.yml
================================================

version: 2

variables:
  setup_p3: &setup_p3
    run:
      shell: /bin/bash
      name: Setup Samplot python3 dependencies
      command: bash .circleci/setup.sh 3
  run_plot_tests: &run_plot_tests
    run:
      shell: /bin/bash
      name: Functional Tests for Samplot
      command: bash test/func/samplot_test.sh
      no_output_timeout: 1h 
  run_vcf_tests: &run_vcf_tests
    run:
      shell: /bin/bash
      name: Functional Tests for Samplot
      command: bash test/func/samplot_vcf_test.sh
      no_output_timeout: 1h 
  run_unit_tests: &run_unit_tests
    run:
      shell: /bin/bash
      name: Functional Tests for Samplot
      command: python test/unit/samplot_test.py
      no_output_timeout: 1h 
  macos: &macos
    macos:
      xcode: "12.5.1"
  linux: &linux
    machine: ubuntu-2004:202201-02
  install_samplot: &install_samplot
    run:
      name: Install Samplot
      command: python setup.py install



jobs:
  test-linux-python3:
    <<: *linux
    steps:
      - checkout
      - *setup_p3
      - *install_samplot 
      - *run_plot_tests
      - *run_vcf_tests
      - *run_unit_tests
  test-macos-python3:
    <<: *macos
    steps:
      - checkout
      - *setup_p3
      - *install_samplot
      - *run_plot_tests
      - *run_vcf_tests
      - *run_unit_tests


workflows:
  version: 2
  samplot-unit-tests:
    jobs:
      - test-linux-python3
      - test-macos-python3
  samplot-nightly-unit-tests:
    triggers:
      - schedule:
          cron: "0 0 * * *"
          filters:
            branches:
              only:
                - master
    jobs:
      - test-linux-python3
      - test-macos-python3


================================================
FILE: .circleci/setup.sh
================================================
#!/bin/bash

set -exo pipefail

WORKSPACE=$(pwd)

# Set path
echo "export PATH=$WORKSPACE/anaconda/bin:$PATH" >> $BASH_ENV
source $BASH_ENV

## Passed from .circleci/config.yml (Only 3 permited)
pythonversion=$1
if (( $pythonversion != 3 ))
then
    echo -e "\nERROR: Python 3 designation required. Python version $pythonversion was supplied. Please correct and run again\n"
    exit 1   
fi 

# setup conda and dependencies 
if [[ ! -d $WORKSPACE/anaconda ]]; then
    mkdir -p $WORKSPACE

    # step 1: download and install anaconda
    if [[ $OSTYPE == darwin* ]]; then
        tag="MacOSX"
        tag2="darwin"
    elif [[ $OSTYPE == linux* ]]; then
        tag="Linux"
        tag2="linux"
    else
        echo "Unsupported OS: $OSTYPE"
        exit 1
    fi  

    curl -O https://repo.anaconda.com/miniconda/Miniconda$pythonversion-latest-$tag-x86_64.sh
    sudo bash Miniconda$pythonversion-latest-$tag-x86_64.sh -b -p $WORKSPACE/anaconda/
    sudo chown -R $USER $WORKSPACE/anaconda/

    mkdir -p $WORKSPACE/anaconda/conda-bld/$tag-64

    # step 3: setup channels
    conda config --system --add channels defaults
    conda config --system --add channels r
    conda config --system --add channels bioconda
    conda config --system --add channels conda-forge

    # step 3: install Samplot requirements
    conda install -y --file requirements.txt

fi


================================================
FILE: .gitignore
================================================
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
#  Usually these files are written by a python script from a template
#  before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.vscode/
.DS_Store


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2019 Ryan Layer

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
[![CircleCI](https://circleci.com/gh/ryanlayer/samplot/tree/master.svg?style=svg)](https://circleci.com/gh/ryanlayer/samplot/tree/master)
[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/samplot/README.html)

<center><img src="/doc/imgs/samplot_logo_v5.png" width="300"/></center>

<center><img src="/doc/imgs/montage.jpg" width="100%"/></center>

`samplot` is a command line tool for rapid, multi-sample structural variant
visualization. `samplot` takes SV coordinates and bam files and produces
high-quality images that highlight any alignment and depth signals that
substantiate the SV.

If you use samplot, please cite https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02380-5


# Usage
<details>
  <summary>samplot plot</summary>
  
  ```
usage: samplot plot [-h] [-n TITLES [TITLES ...]] [-r REFERENCE] [-z Z] -b
                    BAMS [BAMS ...] [-o OUTPUT_FILE] [--output_dir OUTPUT_DIR]
                    -s START -e END -c CHROM [-w WINDOW] [-d MAX_DEPTH]
                    [-t SV_TYPE] [-T TRANSCRIPT_FILE]
                    [--transcript_filename TRANSCRIPT_FILENAME]
                    [--max_coverage_points MAX_COVERAGE_POINTS]
                    [-A ANNOTATION_FILES [ANNOTATION_FILES ...]]
                    [--annotation_filenames ANNOTATION_FILENAMES [ANNOTATION_FILENAMES ...]]
                    [--coverage_tracktype {stack,superimpose,none}] [-a]
                    [-H PLOT_HEIGHT] [-W PLOT_WIDTH] [-q INCLUDE_MQUAL]
                    [--separate_mqual SEPARATE_MQUAL] [-j]
                    [--start_ci START_CI] [--end_ci END_CI]
                    [--long_read LONG_READ] [--ignore_hp]
                    [--min_event_size MIN_EVENT_SIZE]
                    [--xaxis_label_fontsize XAXIS_LABEL_FONTSIZE]
                    [--yaxis_label_fontsize YAXIS_LABEL_FONTSIZE]
                    [--legend_fontsize LEGEND_FONTSIZE]
                    [--annotation_fontsize ANNOTATION_FONTSIZE]
                    [--hide_annotation_labels] [--coverage_only]
                    [--max_coverage MAX_COVERAGE] [--same_yaxis_scales]
                    [--marker_size MARKER_SIZE] [--jitter [JITTER]]
                    [--dpi DPI] [--annotation_scalar ANNOTATION_SCALAR]
                    [--zoom ZOOM] [--debug DEBUG]


options:
  -h, --help            show this help message and exit
  -n TITLES [TITLES ...], --titles TITLES [TITLES ...]
                        Space-delimited list of plot titles. Use quote marks
                        to include spaces (i.e. "plot 1" "plot 2")
  -r REFERENCE, --reference REFERENCE
                        Reference file for CRAM, required if CRAM files used
  -z Z, --z Z           Number of stdevs from the mean (default 4)
  -b BAMS [BAMS ...], --bams BAMS [BAMS ...]
                        Space-delimited list of BAM/CRAM file names
  -o OUTPUT_FILE, --output_file OUTPUT_FILE
                        Output file name/type. Defaults to
                        {type}_{chrom}_{start}_{end}.png
  --output_dir OUTPUT_DIR
                        Output directory name. Defaults to working dir.
                        Ignored if --output_file is set
  -s START, --start START
                        Start position of region/variant (add multiple for
                        translocation/BND events)
  -e END, --end END     End position of region/variant (add multiple for
                        translocation/BND events)
  -c CHROM, --chrom CHROM
                        Chromosome (add multiple for translocation/BND events)
  -w WINDOW, --window WINDOW
                        Window size (count of bases to include in view),
                        default(0.5 * len)
  -d MAX_DEPTH, --max_depth MAX_DEPTH
                        Max number of normal pairs to plot
  -t SV_TYPE, --sv_type SV_TYPE
                        SV type. If omitted, plot is created without variant
                        bar
  -T TRANSCRIPT_FILE, --transcript_file TRANSCRIPT_FILE
                        GFF3 of transcripts
  --transcript_filename TRANSCRIPT_FILENAME
                        Name for transcript track
  --max_coverage_points MAX_COVERAGE_POINTS
                        number of points to plot in coverage axis (downsampled
                        from region size for speed)
  -A ANNOTATION_FILES [ANNOTATION_FILES ...], --annotation_files ANNOTATION_FILES [ANNOTATION_FILES ...]
                        Space-delimited list of bed.gz tabixed files of
                        annotations (such as repeats, mappability, etc.)
  --annotation_filenames ANNOTATION_FILENAMES [ANNOTATION_FILENAMES ...]
                        Space-delimited list of names for the tracks in
                        --annotation_files
  --coverage_tracktype {stack,superimpose,none}
                        type of track to use for low MAPQ coverage plot.
  -a, --print_args      Print commandline arguments to a json file, useful
                        with PlotCritic
  -H PLOT_HEIGHT, --plot_height PLOT_HEIGHT
                        Plot height
  -W PLOT_WIDTH, --plot_width PLOT_WIDTH
                        Plot width
  -q INCLUDE_MQUAL, --include_mqual INCLUDE_MQUAL
                        Min mapping quality of reads to be included in plot
                        (default 1)
  --separate_mqual SEPARATE_MQUAL
                        coverage from reads with MAPQ <= separate_mqual
                        plotted in lighter grey. To disable, pass in negative
                        value
  -j, --json_only       Create only the json file, not the image plot
  --start_ci START_CI   confidence intervals of SV first breakpoint (distance
                        from the breakpoint). Must be a comma-separated pair
                        of ints (i.e. 20,40)
  --end_ci END_CI       confidence intervals of SV end breakpoint (distance
                        from the breakpoint). Must be a comma-separated pair
                        of ints (i.e. 20,40)
  --long_read LONG_READ
                        Min length of a read to be treated as a long-read
                        (default 1000)
  --ignore_hp           Choose to ignore HP tag in alignment files
  --min_event_size MIN_EVENT_SIZE
                        Min size of an event in long-read CIGAR to include
                        (default 20)
  --xaxis_label_fontsize XAXIS_LABEL_FONTSIZE
                        Font size for X-axis labels (default 6)
  --yaxis_label_fontsize YAXIS_LABEL_FONTSIZE
                        Font size for Y-axis labels (default 6)
  --legend_fontsize LEGEND_FONTSIZE
                        Font size for legend labels (default 6)
  --annotation_fontsize ANNOTATION_FONTSIZE
                        Font size for annotation labels (default 6)
  --hide_annotation_labels
                        Hide the label (fourth column text) from annotation
                        files, useful for regions with many annotations
  --coverage_only       Hide all reads and show only coverage
  --max_coverage MAX_COVERAGE
                        apply a maximum coverage cutoff. Unlimited by default
  --same_yaxis_scales   Set the scales of the Y axes to the max of all
  --marker_size MARKER_SIZE
                        Size of marks on pairs and splits (default 3)
  --jitter [JITTER]     Add uniform random noise to insert sizes. This can be
                        helpful to resolve overlapping entries. Either a
                        custom value (<1.0) is supplied or 0.08 will be used.
  --dpi DPI             Dots per inches (pixel count, default 300)
  --annotation_scalar ANNOTATION_SCALAR
                        scaling factor for the optional annotation/trascript
                        tracks
  --zoom ZOOM           Only show +- zoom amount around breakpoints, much
                        faster for large regions. Ignored if region smaller
                        than --zoom (default 500000)
  --debug DEBUG         Print debug statements

```
</details>

## Installing
`Samplot` is available from bioconda and is installable via the conda package manager:
```
conda install -c bioconda samplot 
```

## Examples: 

Samplot requires either BAM files or CRAM files as primary input. If you use
CRAM, you'll also need a reference genome. You can easily acquire a reference genome file with [GGD](https://github.com/gogetdata/ggd-cli), which is also available from conda.

### Basic use case
Using data from NA12878, NA12889, and NA12890 in the 
[1000 Genomes Project](http://www.internationalgenome.org/about) (available in the test/data directory of samplot), we will
inspect a possible deletion in NA12878 at 4:115928726-115931880 with respect
to that same region in two unrelated samples NA12889 and NA12890.

The following command will create an image of that region:
```
time samplot plot \
    -n NA12878 NA12889 NA12890 \
    -b samplot/test/data/NA12878_restricted.bam \
      samplot/test/data/NA12889_restricted.bam \
      samplot/test/data/NA12890_restricted.bam \
    -o 4_115928726_115931880.png \
    -c chr4 \
    -s 115928726 \
    -e 115931880 \
    -t DEL

real	0m3.882s
user	0m3.831s
sys	0m0.328s

```

The arguments used above are:

`-n` The names to be shown for each sample in the plot

`-b` The BAM/CRAM files of the samples (space-delimited)

`-o` The name of the output file containing the plot

`-c` The chromosome of the region of interest

`-s` The start location of the region of interest

`-e` The end location of the region of interest

`-t` The type of the variant of interest

This will create an image file named `4_115928726_115931880.png`, shown below:

<img src="/doc/imgs/4_115928726_115931880.png">

### Gene and other genomic feature annotations

Gene annotations (tabixed, gff3 file) and genome features (tabixed, bgzipped, bed file) can be 
included in the plots.

Get the gene annotations:
```
wget ftp://ftp.ensembl.org/pub/grch37/release-84/gff3/homo_sapiens/Homo_sapiens.GRCh37.82.gff3.gz
bedtools sort -i Homo_sapiens.GRCh37.82.gff3.gz \
| bgzip -c > Homo_sapiens.GRCh37.82.sort.gff3.gz
tabix Homo_sapiens.GRCh37.82.sort.gff3.gz
```

Get genome annotations, in this case Repeat Masker tracks and a mappability track:
```
wget http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig
bigWigToBedGraph wgEncodeDukeMapabilityUniqueness35bp.bigWig wgEncodeDukeMapabilityUniqueness35bp.bed
bgzip wgEncodeDukeMapabilityUniqueness35bp.bed
tabix wgEncodeDukeMapabilityUniqueness35bp.bed.gz

curl http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/rmsk.txt.gz \
| bgzip -d -c \
| cut -f 6,7,8,13 \
| bedtools sort -i stdin \
| bgzip -c > rmsk.bed.gz
tabix rmsk.bed.gz
```

Plot:
```
samplot plot \
    -n NA12878 NA12889 NA12890 \
    -b samplot/test/data/NA12878_restricted.bam \
      samplot/test/data/NA12889_restricted.bam \
      samplot/test/data/NA12890_restricted.bam \
    -o 4_115928726_115931880.d100.genes_reps_map.png \
    -c chr4 \
    -s 115928726 \
    -e 115931880 \
    -t DEL \
    -d 100 \
    -T Homo_sapiens.GRCh37.82.sort.gff3.gz \
    -A rmsk.bed.gz wgEncodeDukeMapabilityUniqueness35bp.bed.gz
```

<img src="/doc/imgs/4_115928726_115931880.d100.genes_reps_map.png">

## Generating images from a VCF file
To plot images from structural variant calls in a VCF file, use samplot's
`vcf` subcommand. This accepts a VCF file and the BAM files of samples
you wish to plot, outputting images and an `index.html` page for review. 

### Usage
<details>
  <summary> samplot vcf </summary>
  
  ```
usage: samplot vcf [-h] [--vcf VCF] [-d OUT_DIR] [--ped PED] [--dn_only]
                   [--min_call_rate MIN_CALL_RATE] [--filter FILTER]
                   [-O {png,pdf,eps,jpg}] [--max_hets MAX_HETS]
                   [--min_entries MIN_ENTRIES] [--max_entries MAX_ENTRIES]
                   [--max_mb MAX_MB] [--min_bp MIN_BP]
                   [--important_regions IMPORTANT_REGIONS] -b BAMS [BAMS ...]
                   [--sample_ids SAMPLE_IDS [SAMPLE_IDS ...]]
                   [--command_file COMMAND_FILE] [--format FORMAT]
                   [--gff3 GFF3] [--downsample DOWNSAMPLE] [--manual_run]
                   [--plot_all] [-t THREADS] [--debug]

options:
  -h, --help            show this help message and exit
  --vcf VCF, -v VCF     VCF file containing structural variants (default:
                        None)
  -d OUT_DIR, --out-dir OUT_DIR
                        path to write output images (default: samplot-out)
  --ped PED             path to ped (or .fam) file (default: None)
  --dn_only             plots only putative de novo variants (PED file
                        required) (default: False)
  --min_call_rate MIN_CALL_RATE
                        only plot variants with at least this call-rate
                        (default: None)
  --filter FILTER       simple filter that samples must meet. Join multiple
                        filters with '&' and specify --filter multiple times
                        for 'or' e.g. DHFFC < 0.7 & SVTYPE = 'DEL' (default:
                        [])
  -O {png,pdf,eps,jpg}, --output_type {png,pdf,eps,jpg}
                        type of output figure (default: png)
  --max_hets MAX_HETS   only plot variants with at most this many
                        heterozygotes (default: None)
  --min_entries MIN_ENTRIES
                        try to include homref samples as controls to get this
                        many samples in plot (default: 6)
  --max_entries MAX_ENTRIES
                        only plot at most this many heterozygotes (default:
                        10)
  --max_mb MAX_MB       skip variants longer than this many megabases
                        (default: None)
  --min_bp MIN_BP       skip variants shorter than this many bases (default:
                        20)
  --important_regions IMPORTANT_REGIONS
                        only report variants that overlap regions in this bed
                        file (default: None)
  -b BAMS [BAMS ...], --bams BAMS [BAMS ...]
                        Space-delimited list of BAM/CRAM file names (default:
                        None)
  --sample_ids SAMPLE_IDS [SAMPLE_IDS ...]
                        Space-delimited list of sample IDs, must have same
                        order as BAM/CRAM file names. BAM RG tag required if
                        this is omitted. (default: None)
  --command_file COMMAND_FILE
                        store commands in this file. (default:
                        samplot_vcf_cmds.tmp)
  --format FORMAT       comma separated list of FORMAT fields to include in
                        sample plot title (default: AS,AP,DHFFC)
  --gff3 GFF3           genomic regions (.gff with .tbi in same directory)
                        used when building HTML table and table filters
                        (default: None)
  --downsample DOWNSAMPLE
                        Number of normal reads/pairs to plot (default: 1)
  --manual_run          disables auto-run for the plotting commands (default:
                        False)
  --plot_all            plots all samples and all variants - limited by any
                        filtering arguments set (default: False)
  -t THREADS, --threads THREADS
                        Number of threads to use to generate plots. Default: 1
  --debug               prints out the reason for skipping any skipped variant
                        entry (default: False)
  ```
</details>

`samplot vcf` can be used to quickly apply some basic filters to variants. Filters are applied via the `--filter` argument, which may be repeated as many times as desired. Each expression specified with the `--filter` option is applied separately in an OR fashion, which `&` characters may be used within a statement for AND operations. 

### Example:
```
samplot vcf \
    --filter "SVTYPE == 'DEL' & SU >= 8" \
    --filter "SVTYPE == 'INV' & SU >= 5" \
    --vcf example.vcf\
    -d test/\
    -O png\
    --important_regions regions.bed\
    -b example.bam > samplot_commands.sh
```
This example will create a directory named test (in the current working directory). A file named `index.html` will be created inside that directory to explore the images created.

**Filters:** The above filters will remove all samples/variants from output except:
* `DUP` variants with at least `SU` of 8
* `INV` variants with `SU` of at least 5

The specific `FORMAT` fields available in your VCF file may be different. I recommend SV VCF annotation with [duphold](https://github.com/brentp/duphold) by [brentp](https://github.com/brentp).

For more complex expression-based VCF filtering, try brentp's [slivar](https://github.com/brentp/slivar), which provides similar but more broad options for filter expressions.

**Region restriction.** Variants can also be filtered by overlap with a set of region (for example, gene coordinates for genes correlated with a disease). The `important_regions` argument provides a BED file of such regions for this example.

**Filtering for de novo SVs** 
Using a [PED](https://gatkforums.broadinstitute.org/gatk/discussion/7696/pedigree-ped-files) file with `samplot vcf` allows filtering for variants that may be spontaneous/de novo variants. This filter is a simple Mendelian violation test. If a sample 1) has valid parent IDs in the PED file, 2) has a non-homref genotype (1/0, 0/1, or 1/1 in VCF), 3) passes filters, and 4) both parents have homref genotypes (0/0 in VCF), the sample may have a de novo variant. Filter parameters are not applied to the parents. The sample is plotted along with both parents, which are labeled as father and mother in the image. 

Example call with the addition of a PED file:

<pre>
samplot vcf \
    --filter "SVTYPE == 'DEL' & SU >= 8" \
    --filter "SVTYPE == 'INV' & SU >= 5" \
    --vcf example.vcf\
    -d test/\
    -O png\
    <b>--ped family.ped\</b>
    --important_regions regions.bed\
    -b example.bam > samplot_commands.sh
</pre>

**Additional notes.** 
* Variants where fewer than 95% of samples have a call (whether reference or alternate) will be excluded by default. This can be altered via the command-line argument `min_call_rate`.
* If you're primarily interested in rare variants, you can use the `max_hets` filter to remove variants that appear in more than `max_hets` samples.
* Large variants can now be plotted easily by samplot through use of `samplot plot`'s `zoom` argument. However, you can still choose to only plot variants larger than a given size using the `max_mb` argument. The `zoom` argument takes an integer parameter and shows only the intervals within +/- that parameter on either side of the breakpoints. A dotted line connects the ends of the variant call bar at the top of the window, showing that the region between breakpoint intervals is not shown.
* By default, if fewer than 6 samples have a variant and additional homref samples are given, control samples will be added from the homref group to reach a total of 6 samples in the plot. This number may be altered using the `min_entries` argument.
* Arguments that are optional in `samplot plot` can by given as arguments to `samplot vcf`. They will be applied to each image generated.


#### CRAM inputs
Samplot also support CRAM input, which requires a reference fasta file for
reading as noted above. Notice that the reference file is not included in this
repository due to size. This time we'll plot an interesting duplication at
X:101055330-101067156.

```
samplot plot \
    -n NA12878 NA12889 NA12890 \
    -b samplot/test/data/NA12878_restricted.cram \
      samplot/test/data/NA12889_restricted.cram \
      samplot/test/data/NA12890_restricted.cram \
    -o cramX_101055330_101067156.png 
    -c chrX \
    -s 101055330 \
    -e 101067156 \
    -t DUP \
    -r hg19.fa
```


The arguments used above are the same as those used for the basic use case, with the addition of the following:

`-r` The reference file used for reading CRAM files

#### Plotting without the SV 
Samplot can also plot genomic regions that are unrelated to an SV. If you do
not pass the SV type option (`-t`) then the top SV bar will go away and only
the region that is given by `-c` `-s` and `-e` will be displayed.

#### Long read (Oxford nanopore and PacBio) and linked read support
Any alignment that is longer than 1000 bp is treated as a long read, and
the plot design will focus on aligned regions and gaps. Aligned regions are in orange, and gaps follow the same DEL/DUP/INV color code used for short reads. The height of the alignment is based on the size of its largest gap.

<img src="/doc/imgs/longread_del.png">

If the bam file has an MI tag, then the reads will be treated as linked reads.
The plots will be similar to short read plots, but all alignments with the same MI is plotted at the same height according to alignment with the largest gap in the group. A green line connects all alignments in a group.

<img src="/doc/imgs/linkedread_del.png">


================================================
FILE: requirements.txt
================================================
matplotlib<3.7
numpy
pysam>=0.15
wget
Jinja2


================================================
FILE: runtests.sh
================================================
echo "running unit tests:"
python test/unit/samplot_test.py
echo "finished unit tests"
echo "running functional tests for \`plot\`:"
bash test/func/samplot_test.sh
printf "\n\nfinished functional tests for \`plot\`:\n"
printf "running functional tests for \`vcf\`:\n"
bash test/func/samplot_vcf_test.sh
echo "finished functional tests for \`vcf\`:"


================================================
FILE: samplot/__init__.py
================================================
#!/usr/bin/env python
__version__ = "1.3.1"


================================================
FILE: samplot/__main__.py
================================================
#!/usr/bin/env python
import argparse
import logging
import sys

from .__init__ import __version__
from .samplot import add_plot
from .samplot_vcf import add_vcf


def main(args=None):
    logging.basicConfig(level=logging.INFO, stream=sys.stderr,
                        format="%(module)s - %(levelname)s: %(message)s")
    
    if args is None:
        args = sys.argv[1:]

    parser = argparse.ArgumentParser(
        prog="samplot", formatter_class=argparse.ArgumentDefaultsHelpFormatter
    )
    parser.add_argument(
        "-v",
        "--version",
        help="Installed version",
        action="version",
        version="%(prog)s " + str(__version__),
    )
    sub = parser.add_subparsers(title="[sub-commands]", dest="command")
    sub.required = True

    add_plot(sub)
    add_vcf(sub)

    args,extra_args = parser.parse_known_args(args)
    args.func(parser, args, extra_args)


if __name__ == "__main__":
    sys.exit(main() or 0)


================================================
FILE: samplot/samplot.py
================================================
#!/usr/bin/env python
from __future__ import print_function

import logging
import os
import random
import re
import sys
from argparse import SUPPRESS

import matplotlib
matplotlib.use("Agg") #must be before imports of submodules in matplotlib
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import matplotlib.path as mpath
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pysam
import warnings
warnings.filterwarnings('ignore', 'FixedFormatter should only be used together with FixedLocator')
from matplotlib.offsetbox import AnchoredText


logger = logging.getLogger(__name__)

INTERCHROM_YAXIS = 5000

COLORS = {
    "Deletion/Normal": "black",
    "Deletion": "black",
    "Duplication": "red",
    "Inversion": "blue",
    "InterChrmInversion": "blue",
    "InterChrm": "black",
}

READ_TYPES_USED = {
    "Deletion/Normal": False,
    "Duplication": False,
    "Inversion": False,
    "Aligned long read": False,
    "Linked read": False,
    "Split-read": False,
    "Paired-end read": False,
}

# pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.cigartuples
CIGAR_MAP = {
    "M": 0,
    "I": 1,
    "D": 2,
    "N": 3,
    "S": 4,
    "H": 5,
    "P": 6,
    "=": 7,
    "X": 8,
    "B": 9,
}

def strip_chr(chrom):
    """
    safer way to replace chr string, to support non-human genomes
    """
    if chrom[:3] == "chr":
        chrom = chrom[3:]
    return chrom

# {{{class plan_step:
class plan_step:
    step_events = ["Align", "ANNOTATION"]

    def __init__(self, start_pos, end_pos, event, info=None):
        self.start_pos = start_pos
        self.end_pos = end_pos
        self.event = event
        self.info = info

    def __str__(self):
        if self.info:
            return (
                "Step("
                + str(self.start_pos)
                + ", "
                + str(self.end_pos)
                + ", "
                + self.event
                + ", "
                + str(self.info)
                + ")"
            )
        else:
            return (
                "Step("
                + str(self.start_pos)
                + ", "
                + str(self.end_pos)
                + ", "
                + self.event
                + ")"
            )

    def __repr__(self):
        return str(self)


# }}}

# {{{class genome_interval:
class genome_interval:
    def __init__(self, chrm, start, end):
        self.chrm = chrm
        self.start = start
        self.end = end

    def __str__(self):
        return "(" + self.chrm + "," + str(self.start) + "," + str(self.end) + ")"

    def __repr__(self):
        return str(self)

    def __eq__(self, gi2):
        return self.chrm == gi2.chrm and self.start == gi2.start and self.end == gi2.end

    """ return -1 if before, 0 if in, 1 if after """

    def intersect(self, gi):
        if strip_chr(gi.chrm) < strip_chr(self.chrm) or gi.end < self.start:
            return -1
        elif strip_chr(gi.chrm) > strip_chr(self.chrm) or gi.start > self.end:
            return 1
        else:
            return 0


# }}}

# {{{def get_range_hit(ranges, chrm, point):
def get_range_hit(ranges, chrm, point):
    for j in range(len(ranges)):
        r = ranges[j]
        if (
            strip_chr(r.chrm) == strip_chr(chrm)
            and r.start <= point
            and r.end >= point
        ):
            return j
    return None


# }}}

# {{{def map_genome_point_to_range_points(ranges, chrm, point):
def map_genome_point_to_range_points(ranges, chrm, point):
    range_hit = get_range_hit(ranges, chrm, point)

    if range_hit == None:
        return None
    p = 1.0 / len(ranges) * range_hit + (1.0 / len(ranges)) * (
        float(point - ranges[range_hit].start)
        / float(ranges[range_hit].end - ranges[range_hit].start)
    )

    return p


# }}}

# {{{def points_in_window(points):
def points_in_window(points):
    """Checks whether these points lie within the window of interest

    Points is a list of one start, one end coordinate (ints)
    """
    if (
        None in points
        or points[0] < -5
        or points[1] < -5
        or points[0] > 5
        or points[1] > 5
    ):
        return False
    return True


# }}}

# {{{ def get_tabix_iter(chrm, start, end, datafile):
def get_tabix_iter(chrm, start, end, datafile):
    """Gets an iterator from a tabix BED/GFF3 file

    Used to avoid chrX vs. X notation issues when extracting data from
    annotation files
    """
    try:
        tbx = pysam.TabixFile(datafile)
    except:
        tbx = pysam.TabixFile(datafile, index=datafile+".csi")


    itr = None
    try:
        itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000)
    except ValueError:
        # try and account for chr/no chr prefix
        if chrm[:3] == "chr":
            chrm = chrm[3:]
        else:
            chrm = "chr" + chrm

        try:
            itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000)
        except ValueError as e:
            logger.warning(
                "Could not fetch {}:{}-{} from {}".format(
                    chrm,
                    start,
                    end,
                    datafile
                )
            )
            print(e)
    return itr


# }}}

##Coverage methods
# {{{def add_coverage(bam_file, read, coverage, separate_mqual):
def add_coverage(read, coverage_matrix, offset, column):
    """Adds a read to the known coverage 

    Coverage from Pysam read is added to coverage_matrix.
    offset defines the start position of the current range
    column specifies which column to add to.
    """

    curr_pos = read.reference_start
    if not read.cigartuples:
        return

    for op, length in read.cigartuples:
        if op in [CIGAR_MAP["M"], CIGAR_MAP["="], CIGAR_MAP["X"]]:
            coverage_matrix[curr_pos - offset: curr_pos + length - offset, column] += 1
            curr_pos += length
        elif op == CIGAR_MAP["I"]:
            curr_pos = curr_pos
        elif op == CIGAR_MAP["D"]:
            curr_pos += length
        elif op == CIGAR_MAP["N"]:
            curr_pos = length
        elif op == CIGAR_MAP["S"]:
            curr_pos = curr_pos
        elif op == CIGAR_MAP["H"]:
            curr_pos = curr_pos
        else:
            curr_pos += length


# }}}

# {{{def plot_coverage(coverage,
def plot_coverage(
    coverage,
    ax,
    ranges,
    hp_count,
    max_coverage,
    tracktype,
    yaxis_label_fontsize,
    max_coverage_points,
):
    """Plots high and low quality coverage for the region

    User may specify a preference between stacked and superimposed 
    superimposed may cause unexpected behavior if low-quality depth is
    greater than high 
    """
    cover_x = []
    cover_y_lowqual = []
    cover_y_highqual = []
    cover_y_all = []

    for i in range(len(ranges)):
        r = ranges[i]
        region_len = r.end-r.start
        downsample = 1
        if region_len > max_coverage_points:
            downsample = int(region_len / max_coverage_points)

        for i,pos in enumerate(range(r.start, r.end + 1)):
            if i%downsample !=  0: 
                continue
            cover_x.append(map_genome_point_to_range_points(ranges, r.chrm, pos))
            if r.chrm in coverage and pos in coverage[r.chrm]:
                cover_y_all.append(coverage[r.chrm][pos][0] + coverage[r.chrm][pos][1])
                cover_y_highqual.append(coverage[r.chrm][pos][0])
                cover_y_lowqual.append(coverage[r.chrm][pos][1])
            else:
                cover_y_lowqual.append(0)
                cover_y_highqual.append(0)
                cover_y_all.append(0)
    cover_y_lowqual = np.array(cover_y_lowqual)
    cover_y_highqual = np.array(cover_y_highqual)
    cover_y_all = np.array(cover_y_all)

    if max_coverage > 0:
        max_plot_depth = max_coverage
    elif cover_y_all.max() > 3 * cover_y_all.mean():
        max_plot_depth = max(
            np.percentile(cover_y_all, 99.5), np.percentile(cover_y_all, 99.5)
        )
    else:
        max_plot_depth = np.percentile(cover_y_all.max(), 99.5)
    ax2 = ax.twinx()
    ax2.set_xlim([0, 1])

    if 0 == max_plot_depth:
        max_plot_depth = 0.01

    ax2.set_ylim([0, max(1, max_plot_depth)])
    bottom_fill = np.zeros(len(cover_y_all))
    if tracktype == "stack":
        ax2.fill_between(
            cover_x,
            cover_y_highqual,
            bottom_fill,
            color="darkgrey",
            step="pre",
            alpha=0.4,
        )

        ax2.fill_between(
            cover_x, cover_y_all, cover_y_highqual, color="grey", step="pre", alpha=0.15
        )

    elif tracktype == "superimpose":
        ax2.fill_between(
            cover_x, cover_y_lowqual, bottom_fill, color="grey", step="pre", alpha=0.15
        )

        ax2.fill_between(
            cover_x,
            cover_y_highqual,
            cover_y_lowqual,
            color="darkgrey",
            step="pre",
            alpha=0.4,
        )

        ax2.fill_between(
            cover_x, cover_y_lowqual, bottom_fill, color="grey", step="pre", alpha=0.15
        )
    ## tracktype==None also allowed

    # number of ticks should be 6 if there's one hp, 3 otherwise
    tick_count = 5 if hp_count == 1 else 2
    tick_count = max(int(max_plot_depth / tick_count), 1)

    # set axis parameters
    #ax2.yaxis.set_major_locator(ticker.FixedLocator(tick_count))
    ax2.yaxis.set_major_locator(ticker.MultipleLocator(tick_count))
    ax2.tick_params(axis="y", colors="grey", labelsize=yaxis_label_fontsize)
    ax2.spines["top"].set_visible(False)
    ax2.spines["bottom"].set_visible(False)
    ax2.spines["left"].set_visible(False)
    ax2.spines["right"].set_visible(False)
    ax2.tick_params(axis="x", length=0)
    ax2.tick_params(axis="y", length=0)

    # break the variant plot when we have multiple ranges
    for i in range(1, len(ranges)):
        ax2.axvline(x=1.0 / len(ranges), color="white", linewidth=5)

    return ax2


# }}}

##Pair End methods
# {{{class PairedEnd:
class PairedEnd:
    """container of paired-end read info

    Contains start(int), end(int), strand(bool True=forward), MI (int
    molecular identifier), HP (int haplotype)
    """

    def __init__(self, chrm, start, end, is_reverse, MI_tag, HP_tag):
        """Create PairedEnd instance

        Genomic interval is defined by start and end integers
        Strand is opposite of is_reverse
        Molecular identifier and Haplotype are integers if present, else
        False
        """
        self.pos = genome_interval(chrm, start, end)
        self.strand = not (is_reverse)
        # molecular identifier - linked reads only
        self.MI = None
        # haplotype - phased reads only
        self.HP = 0

        if MI_tag:
            self.MI = MI_tag
        if HP_tag:
            self.HP = HP_tag

    def __repr__(self):
        return "PairedEnd(%s,%s,%s,%s,%s,%s)" % (
            self.pos.chrm,
            self.pos.start,
            self.pos.end,
            self.strand,
            self.MI,
            self.HP,
        )


# }}}

# {{{ def add_pair_end(bam_file, read, pairs, linked_reads):
def add_pair_end(bam_file, read, pairs, linked_reads, ignore_hp):
    """adds a (mapped, primary, non-supplementary, and paired) read to the
    pairs list

    Pysam read is added as simpified PairedEnd instance to pairs
    Also added to linked_reads list if there is an associated MI tag
    """

    if read.is_unmapped:
        return
    if not (read.is_paired):
        return
    if read.is_secondary:
        return
    if read.is_supplementary:
        return

    MI_tag = False
    HP_tag = False

    if read.has_tag("MI"):
        MI_tag = int(read.get_tag("MI"))
    if not ignore_hp and read.has_tag("HP"):
        HP_tag = int(read.get_tag("HP"))

    pe = PairedEnd(
        bam_file.get_reference_name(read.reference_id),
        read.reference_start,
        read.reference_end,
        read.is_reverse,
        MI_tag,
        HP_tag,
    )

    if pe.HP not in pairs:
        pairs[pe.HP] = {}

    if read.query_name not in pairs[pe.HP]:
        pairs[pe.HP][read.query_name] = []

    if pe.MI:
        if pe.HP not in linked_reads:
            linked_reads[pe.HP] = {}

        if pe.MI not in linked_reads[pe.HP]:
            linked_reads[pe.HP][pe.MI] = [[], []]
        linked_reads[pe.HP][pe.MI][0].append(read.query_name)

    pairs[pe.HP][read.query_name].append(pe)
    pairs[pe.HP][read.query_name].sort(key=lambda x: x.pos.start)


# }}}

# {{{def sample_normal(max_depth, pairs, z):
def sample_normal(max_depth, pairs, z):
    """Downsamples paired-end reads 
    
    Selects max_depth reads
    Does not remove discordant pairs, those with insert distance greater
    than z stdevs from mean

    Returns downsampled pairs list
    """

    sampled_pairs = {}
    plus_minus_pairs = {}

    if max_depth == 0:
        return sampled_pairs

    for read_name in pairs:
        pair = pairs[read_name]
        if len(pair) != 2:
            continue
        if pair[0].strand == True and pair[1].strand == False:
            plus_minus_pairs[read_name] = pair
        else:
            sampled_pairs[read_name] = pair

    if len(plus_minus_pairs) > max_depth:
        lens = np.array(
            [pair[1].pos.end - pair[0].pos.start for pair in plus_minus_pairs.values()]
        )
        mean = np.mean(lens)
        stdev = np.std(lens)

        inside_norm = {}

        for read_name in pairs:
            pair = pairs[read_name]
            if len(pair) != 2:
                continue
            if pair[1].pos.end - pair[0].pos.start >= mean + z * stdev:
                sampled_pairs[read_name] = pair
            else:
                inside_norm[read_name] = pair

        if len(inside_norm) > max_depth:
            for read_name in random.sample(list(inside_norm.keys()), max_depth):
                sampled_pairs[read_name] = inside_norm[read_name]
        else:
            for read_name in inside_norm:
                sampled_pairs[read_name] = inside_norm[read_name]
    else:
        for read_name in plus_minus_pairs:
            sampled_pairs[read_name] = plus_minus_pairs[read_name]

    return sampled_pairs


# }}}

# {{{def get_pairs_insert_sizes(pairs):
def get_pairs_insert_sizes(ranges, pairs):
    """Extracts the integer insert sizes for all pairs

    Return list of integer insert sizes
    """
    pair_insert_sizes = []

    for hp in pairs:
        for read_name in pairs[hp]:
            if len(pairs[hp][read_name]) == 2:
                size = get_pair_insert_size(ranges, pairs[hp][read_name])

                if size:
                    pair_insert_sizes.append(size)

    return pair_insert_sizes


# }}}

# {{{def get_pair_insert_size(ranges, pair):
def get_pair_insert_size(ranges, pair):
    """ Gives the outer distance
    """
    first = pair[0]
    second = pair[1]

    # make sure both sides are in range
    if (
        get_range_hit(ranges, first.pos.chrm, first.pos.start) != None
        or get_range_hit(ranges, first.pos.chrm, first.pos.end) != None
    ) and (
        get_range_hit(ranges, second.pos.chrm, second.pos.start) != None
        or get_range_hit(ranges, second.pos.chrm, second.pos.end) != None
    ):

        if first.pos.chrm == second.pos.chrm:
            return abs(second.pos.end - first.pos.start)
        else:
            return INTERCHROM_YAXIS
    else:
        return None


# }}}

# {{{ def get_pairs_plan(ranges, pairs, linked_plan=False):
def get_pairs_plan(ranges, pairs, linked_plan=False):
    steps = []
    max_event = 0

    insert_sizes = []

    for read_name in pairs:
        pair = pairs[read_name]

        plan = get_pair_plan(ranges, pair)

        if plan:
            insert_size, step = plan
            insert_sizes.append(insert_size)
            steps.append(step)

    if len(insert_sizes) > 0:
        max_event = max(insert_sizes)

    plan = [max_event, steps]

    return plan


# }}}

# {{{def get_pair_plan(ranges, pair, linked_plan=False):
def get_pair_plan(ranges, pair, linked_plan=False):
    if pair == None or len(pair) != 2:
        return None

    first = pair[0]
    second = pair[1]

    # see if they are part of a linked read
    if not linked_plan and (first.MI or second.MI):
        return None

    # make sure both ends are in the plotted region
    first_s_hit = get_range_hit(ranges, first.pos.chrm, first.pos.start)
    first_e_hit = get_range_hit(ranges, first.pos.chrm, first.pos.end)
    second_s_hit = get_range_hit(ranges, second.pos.chrm, second.pos.start)
    second_e_hit = get_range_hit(ranges, second.pos.chrm, second.pos.end)

    if (first_s_hit == None and first_e_hit == None) or (
        second_s_hit == None and second_e_hit == None
    ):
        return None

    insert_size = get_pair_insert_size(ranges, pair)

    first_hit = first_s_hit if first_s_hit != None else first_e_hit
    second_hit = second_e_hit if second_e_hit != None else second_s_hit

    start = genome_interval(
        first.pos.chrm,
        max(first.pos.start, ranges[first_hit].start),
        max(first.pos.start, ranges[first_hit].start),
    )

    end = genome_interval(
        second.pos.chrm,
        min(second.pos.end, ranges[second_hit].end),
        min(second.pos.end, ranges[second_hit].end),
    )

    step = plan_step(start, end, "PAIREND")

    event_type = get_pair_event_type(pair)
    step.info = {"TYPE": event_type, "INSERTSIZE": insert_size}

    return insert_size, step


# }}}

# {{{def get_pair_event_type(pe_read):
def get_pair_event_type(pe_read):
    """Decide what type of event the read supports (del/normal, dup, inv)
    """
    event_by_strand = {
        (True, False): "Deletion/Normal",
        (False, True): "Duplication",
        (False, False): "Inversion",
        (True, True): "Inversion",
    }
    event_type = event_by_strand[pe_read[0].strand, pe_read[1].strand]
    return event_type


# }}}

def jitter(value, bounds: float = 0.1) -> float:
    """
    Offset value by a random value within the defined bounds
    """
    assert 0.0 <= bounds < 1.0
    return value * (1 + bounds * random.uniform(-1, 1))


# {{{def plot_pair_plan(ranges, step, ax):
def plot_pair_plan(ranges, step, ax, marker_size, jitter_bounds):
    p = [
        map_genome_point_to_range_points(
            ranges, step.start_pos.chrm, step.start_pos.start
        ),
        map_genome_point_to_range_points(ranges, step.end_pos.chrm, step.end_pos.end),
    ]

    if None in p:
        return False

    # some points are far outside of the printable area, so we ignore them
    if not points_in_window(p):
        return False

    READ_TYPES_USED["Paired-end read"] = True

    y = step.info["INSERTSIZE"]

    # Offset y-values using jitter to avoid overlapping lines
    y = jitter(y, bounds=jitter_bounds)

    event_type = step.info["TYPE"]
    READ_TYPES_USED[event_type] = True
    color = COLORS[event_type]

    # plot the individual pair
    ax.plot(
        p,
        [y, y],
        "-",
        color=color,
        alpha=0.25,
        lw=0.5,
        marker="s",
        markersize=marker_size,
        zorder=10,
    )

    return True


# }}}

# {{{def plot_pairs(pairs,
def plot_pairs(
    pairs, ax, ranges, curr_min_insert_size, curr_max_insert_size, marker_size, jitter_bounds,
):
    """Plots all PairedEnd reads for the region
    """

    plan = get_pairs_plan(ranges, pairs)

    if not plan:
        [curr_min_insert_size, curr_max_insert_size]

    max_event, steps = plan

    for step in steps:
        plot_pair_plan(ranges, step, ax, marker_size, jitter_bounds)

    if not curr_min_insert_size or curr_min_insert_size > max_event:
        curr_min_insert_size = max_event
    if not curr_max_insert_size or curr_max_insert_size < max_event:
        curr_max_insert_size = max_event

    return [curr_min_insert_size, curr_max_insert_size]


# }}}

##Split Read methods
# {{{class SplitRead:
class SplitRead:
    """container of split read info

    Contains start(int), end(int), strand(bool True=forward), query
    position (int), MI (int molecular identifier), HP (int haplotype)
    """

    def __init__(self, chrm, start, end, strand, query_pos, MI_tag=None, HP_tag=None):
        """Create SplitRead instance

        Genomic interval is defined by start, end, and query_pos integers
        Strand is opposite of is_reverse
        Molecular identifier and Haplotype are integers if present, else
        False
        """
        self.pos = genome_interval(chrm, start, end)
        self.strand = strand
        self.query_pos = query_pos
        # molecular identifier - linked reads only
        self.MI = None
        # haplotype - phased reads only
        self.HP = 0

        if MI_tag:
            self.MI = MI_tag
        if HP_tag:
            self.HP = HP_tag

    def __repr__(self):
        return "SplitRead(%s,%s,%s,%s,%s,%s,%s)" % (
            self.pos.chrm,
            self.pos.start,
            self.pos.end,
            self.strand,
            self.query_pos,
            self.MI,
            self.HP,
        )


# }}}

# {{{def calc_query_pos_from_cigar(cigar, strand):
def calc_query_pos_from_cigar(cigar, strand):
    """Uses the CIGAR string to determine the query position of a read

    The cigar arg is a string like the following: 86M65S
    The strand arg is a boolean, True for forward strand and False for
    reverse

    Returns pair of ints for query start, end positions
    """

    cigar_ops = [[int(op[0]), op[1]] for op in re.findall("(\d+)([A-Za-z])", cigar)]

    order_ops = cigar_ops
    if not strand:  # - strand
        order_ops = order_ops[::-1]

    qs_pos = 0
    qe_pos = 0
    q_len = 0

    for op_position in range(len(cigar_ops)):
        op_len = cigar_ops[op_position][0]
        op_type = cigar_ops[op_position][1]

        if op_position == 0 and (op_type == "H" or op_type == "S"):
            qs_pos += op_len
            qe_pos += op_len
            q_len += op_len
        elif op_type == "H" or op_type == "S":
            q_len += op_len
        elif op_type == "M" or op_type == "I" or op_type == "X":
            qe_pos += op_len
            q_len += op_len

    return qs_pos, qe_pos


# }}}

# {{{def add_split(read, splits, bam_file, linked_reads):
def add_split(read, splits, bam_file, linked_reads, ignore_hp):
    """adds a (primary, non-supplementary) read to the splits list

    Pysam read is added as simpified SplitRead instance to splits
    Also added to linked_reads list if there is an associated MI tag
    """
    if read.is_secondary:
        return
    if read.is_supplementary:
        return
    if not read.has_tag("SA"):
        return

    qs_pos, qe_pos = calc_query_pos_from_cigar(read.cigarstring, (not read.is_reverse))

    HP_tag = False
    MI_tag = False
    if read.has_tag("MI"):
        MI_tag = int(read.get_tag("MI"))

    if not ignore_hp and read.has_tag("HP"):
        HP_tag = int(read.get_tag("HP"))
    sr = SplitRead(
        bam_file.get_reference_name(read.reference_id),
        read.reference_start,
        read.reference_end,
        not (read.is_reverse),
        qs_pos,
        MI_tag,
        HP_tag,
    )

    if sr.MI:
        if sr.HP not in linked_reads:
            linked_reads[sr.HP] = {}
        if sr.MI not in linked_reads[sr.HP]:
            linked_reads[sr.HP][sr.MI] = [[], []]
        linked_reads[sr.HP][sr.MI][1].append(read.query_name)

    if sr.HP not in splits:
        splits[sr.HP] = {}

    splits[sr.HP][read.query_name] = [sr]

    for sa in read.get_tag("SA").split(";"):
        if len(sa) == 0:
            continue
        A = sa.split(",")
        chrm = A[0]
        pos = int(A[1])
        strand = A[2] == "+"
        cigar = A[3]
        #mapq and nm are never used, annotating this for code readability 
        mapq = int(A[4])
        nm = int(A[5])
        qs_pos, qe_pos = calc_query_pos_from_cigar(cigar, strand)
        splits[sr.HP][read.query_name].append(
            SplitRead(chrm, pos, pos + qe_pos, strand, qs_pos)
        )

    if len(splits[sr.HP][read.query_name]) == 1:
        del splits[sr.HP][read.query_name]
    else:
        splits[sr.HP][read.query_name].sort(key=lambda x: x.pos.start)


# }}}


# {{{def get_split_plan(ranges, split):
def get_split_plan(ranges, split, linked_plan=False):
    """
    There can be 2 or more alignments in a split. Plot only those that are in a
    range, and set the insert size to be the largest gap

    A split read acts like a long read, so we will covert the split read
    to a long read, then convert the long read plan back to a split read plan
    """

    alignments = []
    for s in split:
        # see if they are part of a linked read
        if not linked_plan and (s.MI):
            return None
        alignment = Alignment(s.pos.chrm, s.pos.start, s.pos.end, s.strand, s.query_pos)
        alignments.append(alignment)

    long_read = LongRead(alignments)
    long_reads = {}
    long_reads["convert"] = [long_read]
    plan = get_long_read_plan("convert", long_reads, ranges)

    if not plan:
        return None

    max_gap, lr_steps = plan

    if len(lr_steps) < 3:
        return None

    sr_steps = []

    # a split read will include 3 long read steps, align, event, align
    for i in range(0, len(lr_steps), 2):
        if i + 2 > len(lr_steps):
            break
        if (
            lr_steps[i].info["TYPE"] == "Align"
            and lr_steps[i + 1].info["TYPE"] != "Align"
            and lr_steps[i + 2].info["TYPE"] == "Align"
        ):
            start = genome_interval(
                lr_steps[i].end_pos.chrm,
                lr_steps[i].end_pos.end,
                lr_steps[i].end_pos.end,
            )
            end = genome_interval(
                lr_steps[i + 2].start_pos.chrm,
                lr_steps[i + 2].start_pos.start,
                lr_steps[i + 2].start_pos.start,
            )
            sr_steps.append(
                plan_step(
                    start,
                    end,
                    "SPLITREAD",
                    info={"TYPE": lr_steps[i + 1].info["TYPE"], "INSERTSIZE": max_gap},
                )
            )
    return max_gap, sr_steps


# }}}

# {{{def get_splits_plan(ranges, splits, linked_plan=False):
def get_splits_plan(ranges, splits, linked_plan=False):
    steps = []
    max_event = 0

    insert_sizes = []

    for read_name in splits:
        split = splits[read_name]

        plan = get_split_plan(ranges, split)

        if plan:
            insert_size, step = plan
            insert_sizes.append(insert_size)
            steps += step

    if len(insert_sizes) > 0:
        max_event = max(insert_sizes)

    plan = [max_event, steps]

    return plan


# }}}


# {{{def plot_split(split, y, ax, ranges):
def plot_split_plan(ranges, step, ax, marker_size, jitter_bounds):
    p = [
        map_genome_point_to_range_points(
            ranges, step.start_pos.chrm, step.start_pos.start
        ),
        map_genome_point_to_range_points(ranges, step.end_pos.chrm, step.end_pos.end),
    ]

    if None in p:
        return False

    # some points are far outside of the printable area, so we ignore them
    if not points_in_window(p):
        return False

    READ_TYPES_USED["Split-read"] = True

    y = step.info["INSERTSIZE"]

    # Offset y-values using jitter to avoid overlapping lines
    y = jitter(y, bounds=jitter_bounds)

    event_type = step.info["TYPE"]
    READ_TYPES_USED[event_type] = True
    color = COLORS[event_type]

    ax.plot(
        p,
        [y, y],
        ":",
        color=color,
        alpha=0.25,
        lw=1,
        marker="o",
        markersize=marker_size,
    )


# }}}

# {{{def plot_splits(splits,
def plot_splits(
    splits, ax, ranges, curr_min_insert_size, curr_max_insert_size, marker_size, jitter_bounds,
):
    """Plots all SplitReads for the region
    """
    plan = get_splits_plan(ranges, splits)

    if not plan:
        [curr_min_insert_size, curr_max_insert_size]

    max_event, steps = plan

    for step in steps:
        plot_split_plan(ranges, step, ax, marker_size, jitter_bounds)

    if not curr_min_insert_size or curr_min_insert_size > max_event:
        curr_min_insert_size = max_event
    if not curr_max_insert_size or curr_max_insert_size < max_event:
        curr_max_insert_size = max_event

    return [curr_min_insert_size, curr_max_insert_size]


# }}}

##Long Read methods
# {{{class Alignment:
class Alignment:
    """container of alignment info, from CIGAR string

    Contains start(int), end(int), strand(bool True=forward), query
    position (int)
    """

    def __init__(self, chrm, start, end, strand, query_position):
        """Create Alignment instance

        Genomic interval is defined by start, end, and query_pos integers
        Strand is bool (True for forward)
        """
        self.pos = genome_interval(chrm, start, end)
        self.strand = strand
        self.query_position = query_position

    def __str__(self):
        return ",".join(
            [
                str(x)
                for x in [
                    self.pos.chrm,
                    self.pos.start,
                    self.pos.end,
                    self.strand,
                    self.query_position,
                ]
            ]
        )

    def __repr__(self):
        return "Alignment(%s,%s,%s,%s,%s)" % (
            self.pos.chrm,
            self.pos.start,
            self.pos.end,
            self.strand,
            self.query_position,
        )


# }}}

# {{{class LongRead:
class LongRead:
    """container of LongRead info

    Contains start(int), end(int), list of Alignments
    """

    def __init__(self, alignments):
        """Create LongRead instance

        Genomic interval is defined by start, end integers
        List of Alignments set by parameter
        """
        self.alignments = alignments

    def __str__(self):
        return ",".join([str(x) for x in self.alignments])

    def __repr__(self):
        return "LongRead(" + str(self) + ")"


# }}}

# {{{def get_alignments_from_cigar(chrm,
def get_alignments_from_cigar(chrm, curr_pos, strand, cigartuples, reverse=False):
    """Breaks CIGAR string into individual Aignments

    Starting point within genome given by curr_pos and strand
    Set of CIGAR operations and lengths as pairs passed in as cigartuples
    Direction of alignment set to reverse with reverse boolean

    Return list of Alignments
    """
    alignments = []
    q_pos = 0
    if reverse:
        cigartuples = cigartuples[::-1]

    for op, length in cigartuples:
        if op in [CIGAR_MAP["M"], CIGAR_MAP["="], CIGAR_MAP["X"]]:
            alignments.append(
                Alignment(chrm, curr_pos, curr_pos + length, strand, q_pos)
            )
            curr_pos += length
            q_pos += length
        elif op == CIGAR_MAP["I"]:
            q_pos += length
        elif op == CIGAR_MAP["D"]:
            curr_pos += length
        elif op == CIGAR_MAP["N"]:
            curr_pos += length
        elif op == CIGAR_MAP["S"]:
            q_pos += length
    return alignments


# }}}

# {{{def get_cigartuples_from_string(cigarstring):
def get_cigartuples_from_string(cigarstring):
    """Extracts operations,lengths as tuples from cigar string"

    Returns list of tuples of [operation,length]
    """
    cigartuples = []
    for match in re.findall(r"(\d+)([A-Z]{1})", cigarstring):
        length = int(match[0])
        op = match[1]
        cigartuples.append((CIGAR_MAP[op], length))

    return cigartuples


# }}}

# {{{def merge_alignments(min_gap, alignments):
def merge_alignments(min_gap, alignments):
    """Combines previously identified alignments if close together
    Alignments are combined if within min_gap distance
    Returns list of Alignments
    """

    merged_alignments = []

    for alignment in alignments:
        if len(merged_alignments) == 0:
            merged_alignments.append(alignment)
        else:
            if (
                alignment.pos.chrm == merged_alignments[-1].pos.chrm
                and alignment.pos.start < merged_alignments[-1].pos.end + min_gap
            ):
                merged_alignments[-1].pos.end = alignment.pos.end
            else:
                merged_alignments.append(alignment)
    return merged_alignments


# }}}

# {{{def add_long_reads(bam_file, read, long_reads, min_event_size):
def add_long_reads(bam_file, read, long_reads, min_event_size, ignore_hp):
    """Adds a (primary, non-supplementary, long) read to the long_reads list

    Read added to long_reads if within the inteval defined by ranges
    Alignments belonging to the LongRead instance combined if within the
    min_event_size distance apart
    """
    if read.is_supplementary or read.is_secondary:
        return

    hp = 0

    if not ignore_hp and read.has_tag("HP"):
        hp = int(read.get_tag("HP"))

    alignments = get_alignments_from_cigar(
        bam_file.get_reference_name(read.reference_id),
        read.pos,
        not read.is_reverse,
        read.cigartuples,
    )

    min_gap = min_event_size
    merged_alignments = merge_alignments(min_gap, alignments)

    read_strand = not read.is_reverse

    if read.has_tag("SA"):
        for sa in read.get_tag("SA").split(";"):
            if len(sa) == 0:
                continue

            rname, pos, strand, cigar, mapq, nm = sa.split(",")

            sa_pos = int(pos)
            sa_strand = strand == "+"
            strand_match = read_strand != sa_strand
            sa_cigartuples = get_cigartuples_from_string(cigar)
            sa_alignments = get_alignments_from_cigar(
                rname, sa_pos, sa_strand, sa_cigartuples, reverse=strand_match
            )

            sa_merged_alignments = merge_alignments(min_gap, sa_alignments)

            if len(sa_merged_alignments) > 0:
                merged_alignments += sa_merged_alignments

    if hp not in long_reads:
        long_reads[hp] = {}

    if read.query_name not in long_reads[hp]:
        long_reads[hp][read.query_name] = []

    long_reads[hp][read.query_name].append(LongRead(merged_alignments))


# }}}

# {{{def add_align_step(alignment, steps, ranges):
def add_align_step(alignment, steps, ranges):
    # alignment can span ranges
    start_range_hit_i = get_range_hit(ranges, alignment.pos.chrm, alignment.pos.start)
    end_range_hit_i = get_range_hit(ranges, alignment.pos.chrm, alignment.pos.end)

    # neither end is in range, add nothing
    if start_range_hit_i == None and end_range_hit_i == None:
        return

    # start is not in range, use end hit
    if start_range_hit_i == None:
        start = genome_interval(
            alignment.pos.chrm,
            max(alignment.pos.start, ranges[end_range_hit_i].start),
            max(alignment.pos.start, ranges[end_range_hit_i].start),
        )
        end = genome_interval(
            alignment.pos.chrm,
            min(alignment.pos.end, ranges[end_range_hit_i].end),
            min(alignment.pos.end, ranges[end_range_hit_i].end),
        )
        steps.append(plan_step(start, end, "LONGREAD", info={"TYPE": "Align"}))
    # end is not in range, use start hit
    elif end_range_hit_i == None:
        start = genome_interval(
            alignment.pos.chrm,
            max(alignment.pos.start, ranges[start_range_hit_i].start),
            max(alignment.pos.start, ranges[start_range_hit_i].start),
        )
        end = genome_interval(
            alignment.pos.chrm,
            min(alignment.pos.end, ranges[start_range_hit_i].end),
            min(alignment.pos.end, ranges[start_range_hit_i].end),
        )
        steps.append(plan_step(start, end, "LONGREAD", info={"TYPE": "Align"}))
    # both are in the same range
    elif start_range_hit_i == end_range_hit_i:
        start = genome_interval(
            alignment.pos.chrm,
            max(alignment.pos.start, ranges[start_range_hit_i].start),
            max(alignment.pos.start, ranges[start_range_hit_i].start),
        )
        end = genome_interval(
            alignment.pos.chrm,
            min(alignment.pos.end, ranges[end_range_hit_i].end),
            min(alignment.pos.end, ranges[end_range_hit_i].end),
        )
        steps.append(plan_step(start, end, "LONGREAD", info={"TYPE": "Align"}))
    # in different ranges
    else:
        start_1 = genome_interval(
            alignment.pos.chrm,
            max(alignment.pos.start, ranges[start_range_hit_i].start),
            max(alignment.pos.start, ranges[start_range_hit_i].start),
        )
        end_1 = genome_interval(
            alignment.pos.chrm,
            ranges[start_range_hit_i].end,
            ranges[start_range_hit_i].end,
        )
        steps.append(plan_step(start_1, end_1, "LONGREAD", info={"TYPE": "Align"}))

        start_2 = genome_interval(
            alignment.pos.chrm,
            ranges[end_range_hit_i].start,
            ranges[end_range_hit_i].start,
        )
        end_2 = genome_interval(
            alignment.pos.chrm,
            min(alignment.pos.end, ranges[end_range_hit_i].end),
            min(alignment.pos.end, ranges[end_range_hit_i].end),
        )
        steps.append(plan_step(start_2, end_2, "LONGREAD", info={"TYPE": "Align"}))


# }}}

# {{{def get_long_read_plan(read_name, long_reads, ranges):
def get_long_read_plan(read_name, long_reads, ranges):
    """Create a plan to render a long read

    Plan consists of the largest event within the read 
        (used to determine the y-axis position of read)
        and the alignment types for plotting each Alignment within 
        LongRead.alignments Align, Duplication, Deletion, Inversion,
        Inversion,
        InterChrmInversion, InterChrm

    Returns plan
    """

    alignments = []

    # only keep alignments that intersect a range
    seen = {}

    if read_name not in long_reads:
        logger.error("Read name {} not in list of long reads".format(read_name))
        sys.exit(1)

    for long_read in long_reads[read_name]:
        for alignment in long_read.alignments:
            if alignment.query_position in seen:
                continue
            seen[alignment.query_position] = 1
            # check to see if any part of this alignment overlaps a plot
            # range
            in_range = False
            for r in ranges:
                if r.intersect(alignment.pos) == 0:
                    in_range = True
            if in_range:
                alignments.append(alignment)

    if len(alignments) <= 0:
        return None
    alignments.sort(key=lambda x: x.query_position)

    # we set the primary strand to be the one with the longest alignment
    # this will affect which alignment is inverted. There are clearly edge
    # cases here that we will need to address as we get more examples
    # of inversions
    longest_alignment = 0
    longest_alignment_i = -1
    for i in range(len(alignments)):
        l = alignments[i].pos.end - alignments[i].pos.start
        if longest_alignment < l:
            longest_alignment = l
            longest_alignment_i = i
    primary_strand = alignments[longest_alignment_i].strand

    steps = []
    # long aglinments may spill over the edges, so we will clip that starts
    curr = alignments[0]

    add_align_step(curr, steps, ranges)

    for i in range(1, len(alignments)):
        last = alignments[i - 1]
        curr = alignments[i]

        # figure out what the event is

        # INTER CHROM
        if curr.pos.chrm != last.pos.chrm:
            if curr.strand != last.strand:
                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)

                end = genome_interval(curr.pos.chrm, curr.pos.end, curr.pos.end)

                info = {"TYPE": "InterChrmInversion"}
                steps.append(plan_step(start, end, "LONGREAD", info=info))
            else:
                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)
                end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)
                info = {"TYPE": "InterChrm"}
                steps.append(plan_step(start, end, "LONGREAD", info=info))

            add_align_step(curr, steps, ranges)
        # Inversion
        elif curr.strand != last.strand:
            # it is possible that we have a complex even that
            # is an inverted Duplication
            if curr.pos.start < last.pos.end:
                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)
                end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)
                info = {"TYPE": "Deletion"}
                steps.append(plan_step(start, end, "LONGREAD", info=info))
            if curr.strand != primary_strand:
                # last (primary) | curr
                # +++++++++++++++|-------
                #               ^.......^
                #             end           end

                # last (primary) | curr
                # ---------------|+++++++
                #               ^.......^
                #             end           end

                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)
                end = genome_interval(curr.pos.chrm, curr.pos.end, curr.pos.end)
                info = {"TYPE": "Inversion"}
                steps.append(plan_step(start, end, "LONGREAD", info=info))
            else:
                if curr.pos.start < last.pos.end:
                    start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)
                    end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)
                    info = {"TYPE": "Duplication"}
                    steps.append(plan_step(start, end, "LONGREAD", info=info))

                # last   | curr (primary)
                # +++++++|-------------
                # ^.......^
                # start   start

                # last   | curr (primary)
                # -------|+++++++++++++++
                # ^.......^
                # start   start

                start = genome_interval(last.pos.chrm, last.pos.start, last.pos.start)
                end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)
                info = {"TYPE": "Inversion"}
                steps.append(plan_step(start, end, "LONGREAD", info=info))

            add_align_step(curr, steps, ranges)
        # Duplication
        elif curr.pos.start < last.pos.end:
            start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)
            end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)
            info = {"TYPE": "Duplication"}
            steps.append(plan_step(start, end, "LONGREAD", info=info))
            add_align_step(curr, steps, ranges)
        # Deletion
        else:
            start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)
            end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)
            info = {"TYPE": "Deletion"}
            # steps.append(plan_step(start, end, 'LONGREAD', info=info))
            steps.append(plan_step(start, end, "LONGREAD", info={"TYPE": "Deletion"}))
            add_align_step(curr, steps, ranges)

        # if either end is in a range, then add its gap to the list

    max_gap = None

    chrms = set([s.start_pos.chrm for s in steps] + [s.end_pos.chrm for s in steps])

    # set interchrm dist to 5000
    if len(chrms) > 1:
        max_gap = INTERCHROM_YAXIS
    else:
        step_sizes = [
            abs(step.end_pos.end - step.start_pos.start)
            for step in steps
            if step.info["TYPE"] != "Align"
            and get_range_hit(ranges, step.start_pos.chrm, step.start_pos.start) != None
            and get_range_hit(ranges, step.end_pos.chrm, step.end_pos.end) != None
        ]

        max_gap = max(step_sizes) if len(step_sizes) > 0 else 0

    plan = [max_gap, steps]

    return plan


# }}}


##Variant methods
# {{{def plot_variant(sv, sv_type, ax, ranges):
def plot_variant(sv, sv_type, ax, ranges):
    """Plots the variant bar at the top of the image

    """

    r = [
        map_genome_point_to_range_points(ranges, sv[0].chrm, sv[0].start),
        map_genome_point_to_range_points(ranges, sv[-1].chrm, sv[-1].end),
    ]

    ax.plot(r, [0, 0], "-", color="black", lw=8, solid_capstyle="butt", alpha=0.5)

    ax.set_xlim([0, 1])
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.tick_params(axis="x", length=0)
    ax.tick_params(axis="y", length=0)
    ax.set_xticklabels([])
    ax.set_yticklabels([])

    ## make SV title
    sv_title = ""
    if sv[0].chrm == sv[-1].chrm:
        sv_size = float(sv[0].end) - float(sv[0].start)
        if len(sv) > 1:
            sv_size = abs(int(float(sv[0].end) - float(sv[-1].start)))
        sv_size_unit = "bp"

        if sv_size > 1000000:
            sv_size = "{0:0.2f}".format(sv_size / 1000000.0)
            sv_size_unit = "mb"
        elif sv_size > 1000:
            sv_size = "{0:0.2f}".format(sv_size / 1000.0)
            sv_size_unit = "kb"

        sv_title = str(sv_size) + " " + sv_size_unit + " " + sv_type
    else:
        sv_title = sv_type

    ax.set_title(sv_title, fontsize=8)


# }}}

# {{{def plot_confidence_interval(chrm, breakpoint,ci, ax, ranges):
def plot_confidence_interval(chrm, breakpoint, ci, ax, ranges):
    """Plots a confidence interval on the variant bar
    """

    r = [
        map_genome_point_to_range_points(ranges, chrm, breakpoint - int(ci[0])),
        map_genome_point_to_range_points(ranges, chrm, breakpoint + int(ci[1])),
    ]
    if None in r:
        # confidence intervals are invalid
        return

    ax.plot(r, [0, 0], "-", color="black", lw=0.5, alpha=1)
    ax.axvline(r[0], color="black", lw=0.5, alpha=1, ymin=0.40, ymax=0.60)
    ax.axvline(r[1], color="black", lw=0.5, alpha=1, ymin=0.40, ymax=0.60)

    ax.set_xlim([0, 1])
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.tick_params(axis="x", length=0)
    ax.tick_params(axis="y", length=0)
    ax.set_xticklabels([])
    ax.set_yticklabels([])


# }}}

# {{{def create_variant_plot(grid,
def create_variant_plot(grid, ax_i, sv, sv_type, ranges, start_ci, end_ci):
    """Plots the pieces of the variant bar at the top, including bar and
    confidence intervals 
    """
    ax = plt.subplot(grid[ax_i])
    plot_variant(sv, sv_type, ax, ranges)
    ax_i += 1
    # plot confidence intervals if provided
    if start_ci and start_ci != None:
        plot_confidence_interval(sv[0].chrm, sv[0].start, start_ci, ax, ranges)
    if end_ci and end_ci != None:
        plot_confidence_interval(sv[-1].chrm, sv[-1].end, end_ci, ax, ranges)

    # break the variant plot when we have multiple ranges
    for i in range(1, len(ranges)):
        ax.axvline(x=1.0 / len(ranges), color="white", linewidth=5)
        ax.text(
            1.0 / len(ranges),
            0,
            "...",
            fontsize=6,
            fontdict=None,
            horizontalalignment="center",
        )

    return ax_i


# }}}

# Linked Reads methods
# {{{ def get_linked_plan(ranges, pairs, splits, linked_reads, gem_name):
def get_linked_plan(ranges, pairs, splits, linked_reads, gem_name):
    insert_sizes = []

    gem_poss = [[] for i in range(len(ranges))]

    linked_pair_steps = []
    # collect all the pairs in a gem
    for name in linked_reads[gem_name][0]:
        if name in pairs and len(pairs[name]) == 2:
            pair = pairs[name]
            plan = get_pair_plan(ranges, pair, linked_plan=True)
            if plan:
                insert_size, step = plan
                insert_sizes.append(insert_size)
                linked_pair_steps.append(step)

    # collect all the splits in a gem
    linked_split_steps = []
    for name in linked_reads[gem_name][1]:
        if name in splits:
            split = splits[name]
            plan = get_split_plan(ranges, split, linked_plan=True)
            if plan:
                insert_size, steps = plan
                insert_sizes.append(insert_size)
                linked_split_steps += steps

    if len(linked_split_steps) == 0 and len(linked_pair_steps) == 0:
        return None

    for step in linked_split_steps + linked_pair_steps:
        poss = [
            (step.start_pos.chrm, step.start_pos.start),
            (step.start_pos.chrm, step.start_pos.end),
            (step.end_pos.chrm, step.end_pos.start),
            (step.end_pos.chrm, step.end_pos.end),
        ]
        for pos in poss:
            hit = get_range_hit(ranges, pos[0], pos[1])
            if hit > -1:
                gem_poss[hit].append(pos[1])

    max_event_size = max(insert_sizes)

    gem_steps = []

    for i in range(len(ranges)):
        if len(gem_poss[i]) == 0:
            continue
        start = genome_interval(ranges[i].chrm, min(gem_poss[i]), min(gem_poss[i]))
        end = genome_interval(ranges[i].chrm, max(gem_poss[i]), max(gem_poss[i]))
        gem_steps.append(plan_step(start, end, "LINKED"))

    # if the gem extends beyond the range, then push the end pos to the
    # end/begining of the range
    if len(gem_steps) > 1:
        gem_steps[0].end_pos.start = ranges[0].end
        gem_steps[0].end_pos.end = ranges[0].end

        gem_steps[1].start_pos.start = ranges[1].start
        gem_steps[1].start_pos.end = ranges[1].start

    info = {
        "INSERTSIZE": max_event_size,
        "PAIR_STEPS": linked_pair_steps,
        "SPLIT_STEPS": linked_split_steps,
    }

    gem_steps[0].info = info

    return max(insert_sizes), gem_steps


# }}}

# {{{ def plot_linked_reads(pairs,
def plot_linked_reads(
    pairs,
    splits,
    linked_reads,
    ax,
    ranges,
    curr_min_insert_size,
    curr_max_insert_size,
    marker_size,
    jitter_bounds,
):
    """Plots all LinkedReads for the region
    """
    for linked_read in linked_reads:
        plan = get_linked_plan(ranges, pairs, splits, linked_reads, linked_read)

        if not plan:
            continue

        insert_size, steps = plan

        insert_size = jitter(insert_size, bounds=jitter_bounds)

        if not curr_min_insert_size or curr_min_insert_size > insert_size:
            curr_min_insert_size = insert_size
        if not curr_max_insert_size or curr_max_insert_size < insert_size:
            curr_max_insert_size = insert_size

        for step in steps:
            p = [
                map_genome_point_to_range_points(
                    ranges, step.start_pos.chrm, step.start_pos.start
                ),
                map_genome_point_to_range_points(
                    ranges, step.end_pos.chrm, step.end_pos.end
                ),
            ]
            # ignore points outside window
            if not points_in_window(p):
                continue

            READ_TYPES_USED["Linked read"] = True

            ax.plot(
                p, [insert_size, insert_size], "-", color="green", alpha=0.75, lw=0.25
            )

        for pair_step in steps[0].info["PAIR_STEPS"]:
            pair_step.info["INSERTSIZE"] = insert_size
            plot_pair_plan(ranges, pair_step, ax, marker_size, jitter_bounds)

        for split_step in steps[0].info["SPLIT_STEPS"]:
            split_step.info["INSERTSIZE"] = insert_size
            plot_split_plan(ranges, split_step, ax, marker_size, jitter_bounds)

    return [curr_min_insert_size, curr_max_insert_size]


# }}}

# {{{def plot_long_reads(long_reads,
def plot_long_reads(long_reads, ax, ranges, curr_min_insert_size, curr_max_insert_size, jitter_bounds):
    """Plots all LongReads for the region
    """

    Path = mpath.Path

    colors = {
        "Align": "orange",
        "Deletion": "black",
        "Inversion": "blue",
        "Duplication": "red",
        "InterChrm": "black",
        "InterChrmInversion": "blue",
    }

    for read_name in long_reads:
        long_read_plan = get_long_read_plan(read_name, long_reads, ranges)

        if long_read_plan is None:
            continue
        max_gap = long_read_plan[0]
        steps = long_read_plan[1]
        for step in steps:

            p = [
                map_genome_point_to_range_points(
                    ranges, step.start_pos.chrm, step.start_pos.start
                ),
                map_genome_point_to_range_points(
                    ranges, step.end_pos.chrm, step.end_pos.end
                ),
            ]

            # some points are far outside of the printable area, so we
            # ignore them
            if not points_in_window(p):
                continue

            READ_TYPES_USED["Aligned long read"] = True

            event_type = step.info["TYPE"]
            READ_TYPES_USED[event_type] = True

            if event_type == "Align":
                ax.plot(
                    p,
                    [max_gap, max_gap],
                    "-",
                    color=colors[event_type],
                    alpha=0.25,
                    lw=1,
                )

                curr_max_insert_size = max(curr_max_insert_size, max_gap)
            else:
                x1 = p[0]
                x2 = p[1]
                # get offset to bend the line up
                max_gap_offset = max(jitter(max_gap * 1.1, bounds=jitter_bounds), max_gap)
                pp = mpatches.PathPatch(
                    Path(
                        [
                            (x1, max_gap),
                            (x1, max_gap_offset),
                            (x2, max_gap_offset),
                            (x2, max_gap),
                        ],
                        [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4],
                    ),
                    fc="none",
                    color=colors[event_type],
                    alpha=0.25,
                    lw=1,
                    ls=":",
                )
                ax.add_patch(pp)

                # add some room for the bend line
                curr_max_insert_size = max(curr_max_insert_size, max_gap_offset)

    return [curr_min_insert_size, curr_max_insert_size]


# }}}

##Setup
# {{{def pair(arg):
def pair(arg):
    """Defines behavior for ArgParse pairs 

    Pairs must be comma-separated list of two items
    """
    try:
        parsed_arg = [int(x) for x in arg.split(",")]
        if len(parsed_arg) == 2:
            return parsed_arg
        else:
            logger.error("Invalid number of pair values")
            sys.exit(1)
    except Exception as e:
        logger.error("Invalid pair values")
        print(e, file=sys.stderr)
        sys.exit(1)


# }}}

# {{{def print_arguments(options):
def print_arguments(options):
    """Prints out the arguments to samplot as a json object

    Used as metadata for PlotCritic
    """
    if options.print_args or options.json_only:
        import json

        args_filename = os.path.splitext(options.output_file)[0] + ".json"
        args_info = {
            "titles": options.titles if options.titles else "None",
            "reference": options.reference if options.reference else "None",
            "bams": options.bams,
            "output_file": options.output_file,
            "start": options.start,
            "end": options.end,
            "chrom": options.chrom,
            "window": options.window,
            "max_depth": options.max_depth if options.max_depth else "None",
            "sv_type": options.sv_type,
            "transcript_file": options.transcript_file
            if options.transcript_file
            else "None",
        }
        with open(args_filename, "w") as outfile:
            json.dump(args_info, outfile)


# }}}


# {{{def setup_arguments():
def add_plot(parent_parser):
    """Defines the allowed arguments for plot function
    """
    parser = parent_parser.add_parser(
        "plot",
        help="Plot an image of a genome region from "
        + "CRAM/SAM alignments, "
        + "optimized for structural variant call review",
    )

    parser.add_argument(
        "-n",
        "--titles",
        help="Space-delimited list of plot titles. "
        + "Use quote marks to include spaces "
        + '(i.e. "plot 1" "plot 2")',
        type=str,
        nargs="+",
        required=False,
    )

    parser.add_argument(
        "-r",
        "--reference",
        help="Reference file for CRAM, required if " + "CRAM files used",
        type=str,
        required=False,
    )

    parser.add_argument(
        "-z",
        "--z",
        type=int,
        default=4,
        help="Number of stdevs from the mean (default 4)",
        required=False,
    )

    def bam_file(bam):
        if not os.path.isfile(bam):
            parser.error("alignment file {} does not exist or is not a valid file".format(bam))
        options = ["sam", "bam", "cram"]
        idx_options = ["sai", "bai", "crai", "csi"]
        fields = os.path.splitext(bam)
        ext = fields[1][1:].lower()
        if ext not in options:
            parser.error("alignment file {} is not in SAM/BAM/CRAM format".format(bam))
        idx_type = idx_options[options.index(ext)]
        #try the type-specific index name
        picard_bam = os.path.splitext(bam)[0]
        if (not os.path.isfile(bam + "." + idx_type) and 
                not os.path.isfile(picard_bam + "." + idx_type)):
            idx_type = idx_options[3]
            #try the csi index name
            if not os.path.isfile(bam + "." + idx_type):
                parser.error("alignment file {} has no index".format(bam))
        return bam


    parser.add_argument(
        "-b",
        "--bams",
        type=bam_file,
        nargs="+",
        help="Space-delimited list of BAM/CRAM file names",
        required=True,
    )

    parser.add_argument(
        "-o", 
        "--output_file", 
        type=str, 
        help="Output file name/type. "
        +"Defaults to {type}_{chrom}_{start}_{end}.png",
        required=False,
    )
    
    parser.add_argument(
        "--output_dir",
        type=str,
        default=".",
        help="Output directory name. Defaults to working dir. "
        +"Ignored if --output_file is set",
        required=False,
    )

    parser.add_argument(
        "-s",
        "--start",
        type=int,
        help="Start position of region/variant (add multiple for translocation/BND events)",
        action="append",
        required=True,
    )

    parser.add_argument(
        "-e",
        "--end",
        type=int,
        help="End position of region/variant (add multiple for translocation/BND events)",
        action="append",
        required=True,
    )

    parser.add_argument(
        "-c",
        "--chrom", type=str,
        help="Chromosome (add multiple for translocation/BND events)",
        action="append",
        required=True
    )

    parser.add_argument(
        "-w",
        "--window",
        type=int,
        help="Window size (count of bases to include " + "in view), default(0.5 * len)",
        required=False,
    )

    parser.add_argument(
        "-d",
        "--max_depth",
        type=int,
        help="Max number of normal pairs to plot",
        default=1,
        required=False,
    )

    parser.add_argument(
        "-t",
        "--sv_type",
        type=str,
        help="SV type. If omitted, plot is created " + "without variant bar",
        required=False,
    )
    
    def gff_file(transcript_file):
        if not os.path.isfile(transcript_file):
            parser.error("transcript file {} does not exist or is not a valid file".format(transcript_file))
        options = ["gff", "gff3"]
        fields = os.path.splitext(transcript_file)
        ext = fields[1][1:]
        if ext == "gz":
            ext = os.path.splitext(fields[0])[1][1:]
        ext = ext.lower()
        if ext not in options:
            parser.error("transcript file {} is not in GFF3 format".format(transcript_file))

        idx_file = transcript_file + ".tbi"
        if not os.path.isfile(idx_file):
            idx_file = transcript_file + ".csi"
            if not os.path.isfile(idx_file):
                parser.error("transcript file {} is missing .tbi/.csi index file".format(transcript_file))
        return transcript_file

    parser.add_argument(
        "-T", "--transcript_file",
        help="GFF3 of transcripts",
        required=False,
        type=gff_file,
    )

    parser.add_argument(
        "--transcript_filename",
        help="Name for transcript track",
        required=False,
        type=str,
    )
    
    parser.add_argument(
        "--max_coverage_points",
        help="number of points to plot in coverage axis (downsampled from region size for speed)",
        required=False,
        type=int,
        default=1000,
    )

    def bed_file(annotation_file):
        if not os.path.isfile(annotation_file):
            parser.error("annotation file {} does not exist or is not a valid file".format(annotation_file))
        fields = os.path.splitext(annotation_file)
        ext = fields[1][1:]
        if ext == "gz":
            ext = os.path.splitext(fields[0])[1][1:]
        ext = ext.lower()
        if ext != "bed":
            parser.error("annotation file {} is not in BED format".format(annotation_file))

        idx_file = annotation_file + ".tbi"
        if not os.path.isfile(idx_file):
            idx_file = annotation_file + ".csi"
            if not os.path.isfile(idx_file):
                parser.error("annotation file {} is missing .tbi index file".format(annotation_file))
        return annotation_file

    parser.add_argument(
        "-A",
        "--annotation_files",
        type=bed_file,
        nargs="+",
        help="Space-delimited list of bed.gz tabixed "
        + "files of annotations (such as repeats, "
        + "mappability, etc.)",
        required=False,
    )
    
    parser.add_argument(
        "--annotation_filenames",
        type=str,
        nargs="+",
        help="Space-delimited list of names for the tracks in --annotation_files",
        required=False,
    )

    parser.add_argument(
        "--coverage_tracktype",
        type=str,
        help="type of track to use for low MAPQ " + "coverage plot.",
        choices=["stack", "superimpose", "none"],
        default="stack",
        required=False,
    )

    parser.add_argument(
        "-a",
        "--print_args",
        action="store_true",
        default=False,
        help="Print commandline arguments to a json file, useful with PlotCritic",
        required=False,
    )

    parser.add_argument(
        "-H", "--plot_height", type=int, help="Plot height", required=False
    )

    parser.add_argument(
        "-W", "--plot_width", type=int, help="Plot width", required=False
    )

    parser.add_argument(
        "-q",
        "--include_mqual",
        type=int,
        help="Min mapping quality of reads to be included in plot (default 1)",
        default=1,
        required=False,
    )

    parser.add_argument(
        "--separate_mqual",
        type=int,
        help="coverage from reads with MAPQ <= separate_mqual "
        + "plotted in lighter grey. To disable, "
        + "pass in negative value",
        default=0,
        required=False,
    )

    parser.add_argument(
        "-j",
        "--json_only",
        action="store_true",
        default=False,
        help="Create only the json file, not the " + "image plot",
        required=False,
    )

    parser.add_argument(
        "--start_ci",
        help="confidence intervals of SV first "
        + "breakpoint (distance from the "
        + "breakpoint). Must be a "
        + "comma-separated pair of ints (i.e. 20,40)",
        type=pair,
        required=False,
    )

    parser.add_argument(
        "--end_ci",
        help="confidence intervals of SV end "
        + "breakpoint (distance from the "
        + "breakpoint). Must be a "
        + "comma-separated pair of ints (i.e. 20,40)",
        type=pair,
        required=False,
    )

    parser.add_argument(
        "--long_read",
        type=int,
        default=1000,
        help="Min length of a read to be treated as a " + "long-read (default 1000)",
        required=False,
    )

    parser.add_argument(
        "--ignore_hp",
        action="store_true",
        help="Choose to ignore HP tag in alignment files",
        required=False,
    )
    parser.add_argument(
        "--min_event_size",
        type=int,
        default=20,
        help="Min size of an event in long-read " + "CIGAR to include (default 20)",
        required=False,
    )

    parser.add_argument(
        "--xaxis_label_fontsize",
        type=int,
        default=6,
        help="Font size for X-axis labels (default 6)",
        required=False,
    )

    parser.add_argument(
        "--yaxis_label_fontsize",
        type=int,
        default=6,
        help="Font size for Y-axis labels (default 6)",
        required=False,
    )

    parser.add_argument(
        "--legend_fontsize",
        type=int,
        default=6,
        help="Font size for legend labels (default 6)",
        required=False,
    )

    parser.add_argument(
        "--annotation_fontsize",
        type=int,
        default=6,
        help="Font size for annotation labels (default 6)",
        required=False,
    )

    parser.add_argument(
        "--hide_annotation_labels",
        action="store_true",
        default=False,
        help="Hide the label (fourth column text) "
        + "from annotation files, useful for regions "
        + "with many annotations",
        required=False,
    )

    parser.add_argument(
        "--coverage_only",
        action="store_true",
        default=False,
        help="Hide all reads and show only coverage",
        required=False,
    )

    parser.add_argument(
        "--max_coverage",
        default=0,
        type=int,
        help="apply a maximum coverage cutoff. Unlimited by default",
    )

    parser.add_argument(
        "--same_yaxis_scales",
        action="store_true",
        default=False,
        help="Set the scales of the Y axes to the " + "max of all",
        required=False,
    )

    parser.add_argument(
        "--marker_size",
        type=int,
        default=3,
        help="Size of marks on pairs and splits (default 3)",
        required=False,
    )
    parser.add_argument(
        "--jitter",
        type=float,
        nargs="?",
        const=0.08,
        default=0.0,
        help="Add uniform random noise to insert sizes. This can be helpful "
             "to resolve overlapping entries. Either a custom value (<1.0) is "
             "supplied or %(const)s will be used."
    )
    parser.add_argument(
        "--dpi",
        type=int,
        default=300,
        help="Dots per inches (pixel count, default 300)",
        required=False,
    )
    parser.add_argument(
        "--annotation_scalar",
        type=float,
        default=.3,
        help="scaling factor for the optional annotation/trascript tracks",
        required=False,
    )
    parser.add_argument(
        "--zoom",
        type=int,
        default=500000,
        help="Only show +- zoom amount around breakpoints, "
            +"much faster for large regions. "
            +"Ignored if region smaller than --zoom (default 500000)",
        required=False,
    )

    parser.add_argument(
        "--debug",
        type=str,
        help="Print debug statements",
        required=False
    )
    parser.add_argument(
        "--random-seed",
        type=int,
        default=9999,
        help=SUPPRESS,
    )
    parser.set_defaults(func=plot)


# }}}

# {{{def estimate_fragment_len(bam)
def estimate_fragment_len(bam, reference):
    try:
        if not reference:
            bam_file = pysam.AlignmentFile(bam, "rb")
        else:
            bam_file = pysam.AlignmentFile(bam, "rc", reference_filename=reference)
    except Exception as err:
        logger.error("Error opening file {}".format(bam_file))
        print(err, file=sys.stderr)
        sys.exit(1)

    frag_lens = []

    for i, read in enumerate(bam_file):
        if i >= 10000:
            break
        frag_lens.append(abs(read.tlen))
    if len(frag_lens) >= 5000:
        return np.median(frag_lens)
    else:
        logger.warning(
            "Insufficient reads for fragment length estimate.\nContinuing with unmodified window size"
        )
        return 0


# {{{def set_plot_dimensions(sv,
def set_plot_dimensions(
    sv,
    sv_type,
    arg_plot_height,
    arg_plot_width,
    bams,
    reference,
    annotation_files,
    transcript_file,
    arg_window,
    zoom,
):
    """Chooses appropriate dimensions for the plot

    Includes the number of samples, whether a variant type is included, and
    any annotations in height. Includes the start, end, and window argument
    in width If height and width are chosen by user, these are used instead

    Return plot height, width, and window as integers
    """

    plot_height = 5
    plot_width = 8
    if arg_plot_height:
        plot_height = arg_plot_height
    else:
        num_subplots = len(bams)
        if annotation_files:
            num_subplots += 0.3 * len(annotation_files)
        if transcript_file:
            num_subplots += 0.3
        plot_height = 2 + num_subplots

    if arg_plot_width:
        plot_width = arg_plot_width

    window = 0
    ranges = []
    if arg_window:
        window = arg_window

    """ 
    Several things determine the window size. 
    1) SV is not given, window = 0
    1) SV is given
        1) it is directly set
        2) it is not directly set
           2.1) single interval SV
           2.2) zoom set
           2.3) 2-interval SV
    """
    # if an SV type is given, then expand the window around its bounds
    if sv_type:
        # if the sv has one interval then set the window proportional
        # to sv size and set one range
        if len(sv) == 1:
            if arg_window:
                window = arg_window
            else:
                window = int((sv[0].end - sv[0].start) / 2)
                frag_len = estimate_fragment_len(bams[0], reference)

                if (0 < frag_len) and (window < 1.5 * frag_len):
                    old_window = window
                    window = int(1.5 * frag_len)
                    logger.warning(
                        "Window size is under 1.5x the estimated fragment length "
                        + "and will be resized to {}. Rerun with -w {} to override".format(
                            window, old_window
                        )
                    )

            ranges = [
                genome_interval(
                    sv[0].chrm, max(0, sv[0].start - window), sv[0].end + window
                )
            ]

            # if region is larger than zoom, set window to zoom and set two ranges
            if window >= zoom:
                window = zoom
                ranges = [
                    genome_interval(
                        sv[0].chrm,
                        max(0, sv[0].start - window),
                        sv[0].start + window,
                    ),
                    genome_interval(
                        sv[0].chrm, max(0, sv[0].end - window), sv[0].end + window
                    ),
                ]
        elif len(sv) == 2:
            if arg_window:
                window = arg_window
            elif zoom:
                window = zoom
            else:
                window = 1000

            ranges = [
                genome_interval(
                    sv[0].chrm, max(0, sv[0].start - window), sv[0].start + window
                ),
                genome_interval(
                    sv[1].chrm, max(0, sv[1].end - window), sv[1].end + window
                ),
            ]
        else:
            logger.error("{} genome splits are not supported".format(str(len(sv))))
            sys.exit(1)
    else:
        ranges = [genome_interval(sv[0].chrm, sv[0].start, sv[0].end)]

    return plot_height, plot_width, window, ranges


# }}}

# {{{def get_read_data(ranges,
def get_read_data(
    ranges,
    bams,
    reference,
    separate_mqual,
    include_mqual,
    coverage_only,
    long_read_length,
    min_event_size,
    same_yaxis_scales,
    max_depth,
    z_score,
    ignore_hp,
):
    """Reads alignment files to extract reads for the region

    Region and alignment files given with chrom, start, end, bams
    If CRAM files are used, reference must be provided
    Reads with mapping quality below include_mqual will not be retrieved
    If coverage_only, reads are not kept and used only for checking
    coverage Reads longer than long_read_length will be treated as long
    reads Max coverages values will be set to same value for all samples if
    same_yaxis_scales If max_depth, only max_depth reads will be retrieved,
    although all will be included in coverage If PairedEnd read insert size
    is greater than z_score standard deviations from mean, read will be
    treated as discordant
    """

    all_pairs = []
    all_splits = []
    all_coverages = []
    all_long_reads = []
    all_linked_reads = []

    max_coverage = 0
    haplotypes = [0, 1, 2]

    for bam_file_name in bams:
        bam_file = None
        try:
            if not reference:
                bam_file = pysam.AlignmentFile(bam_file_name, "rb")
            else:
                bam_file = pysam.AlignmentFile(
                    bam_file_name, "rc", reference_filename=reference
                )
        except Exception as err:
            logger.error("This can be caused by issues with the alignment file. "
                    +"Please make sure that it is sorted and indexed before trying again")
            print(err, file=sys.stderr)
            sys.exit(1)

        pairs = {}
        splits = {}
        long_reads = {}
        coverage = {hp: {} for hp in haplotypes}
        linked_reads = {}

        for r in ranges:
            # Define range boundries
            range_start = max(0, r.start - 1000)
            range_end = r.end + 1000

            try:
                bam_iter = bam_file.fetch(r.chrm, range_start, range_end)
            except ValueError:
                chrm = r.chrm
                if chrm[:3] == "chr":
                    chrm = chrm[3:]
                else:
                    chrm = "chr" + chrm
                bam_iter = bam_file.fetch(chrm, range_start, range_end)

            chrm = strip_chr(r.chrm)
            if chrm not in coverage[0]:
                for hp in haplotypes:
                    coverage[hp][chrm] = {}

            # Define a zeros matrix to hold coverage value over the range for all
            # haplotyps. If using separate_mqual the first column will hold the coverage
            # for high quality reads and the second column low quality reads. Otherwise
            # all coverage will be in the second column.
            range_len = range_end - range_start
            range_hp_coverage = {hp: np.zeros((range_len, 2), dtype=int) for hp in haplotypes}

            for read in bam_iter:
                if (
                    read.is_qcfail
                    or read.is_unmapped
                    or read.is_duplicate
                    or int(read.mapping_quality) < include_mqual
                ):
                    continue

                if not coverage_only:
                    if read.query_length >= long_read_length:
                        add_long_reads(bam_file, read, long_reads, min_event_size, ignore_hp)
                    else:
                        add_pair_end(bam_file, read, pairs, linked_reads, ignore_hp)
                        add_split(read, splits, bam_file, linked_reads, ignore_hp)

                # Add read coverage to the specified haplotype and column
                hp = 0 if ignore_hp or not read.has_tag("HP") else read.get_tag("HP")
                column = 0 if separate_mqual and (read.mapping_quality > separate_mqual) else 1
                add_coverage(read, range_hp_coverage[hp], range_start, column)

            # Tally the coverage for each position and updata coverage dict.
            for hp, range_coverage in range_hp_coverage.items():
                # Skip empty haplotypes
                if (range_coverage.sum() == 0).all():
                    continue

                for position in range(range_start, range_end):
                    coverage[hp][chrm][position] = list(range_coverage[position-range_start])

        if (
            len(pairs) == 0
            and len(splits) == 0
            and len(long_reads) == 0
            and len(linked_reads) == 0
        ):
            if not coverage_only:
                logger.warning(
                    "No data returned from fetch in regions {} from {}".format(
                        " ".join([str(r) for r in ranges]),
                        bam_file
                    )
                )

        # Update max_coverage and remove any empty haplotype dict from coverage dict
        for hp in haplotypes:
            hp_covered = False
            for chrm in coverage[hp]:
                sn_coverages = [
                    v for values in coverage[hp][chrm].values() for v in values
                ]
                curr_max = 0
                if len(sn_coverages) > 0:
                    curr_max = np.percentile(sn_coverages, 99.5)
                if curr_max > max_coverage:
                    max_coverage = curr_max

                if sum(sn_coverages) > 0:
                    hp_covered = True

            if not hp_covered:
                del coverage[hp]

        all_coverages.append(coverage)
        all_pairs.append(pairs)
        all_splits.append(splits)
        all_long_reads.append(long_reads)
        all_linked_reads.append(linked_reads)

    read_data = {
        "all_pairs": all_pairs,
        "all_splits": all_splits,
        "all_coverages": all_coverages,
        "all_long_reads": all_long_reads,
        "all_linked_reads": all_linked_reads,
    }

    # Sample +/- pairs in the normal insert size range
    if max_depth:
        read_data["all_pairs"] = downsample_pairs(
            max_depth, z_score, read_data["all_pairs"]
        )
    if not same_yaxis_scales:
        max_coverage = 0
    return read_data, max_coverage


# }}}

# {{{def downsample_pairs(max_depth, z_score, all_pairs):
def downsample_pairs(max_depth, z_score, all_pairs):
    """Downsamples to keep only max_depth normal pairs from all PairedEnd
    reads 
    """
    for bam_i in range(len(all_pairs)):
        for hp_i in all_pairs[bam_i]:
            all_pairs[bam_i][hp_i] = sample_normal(
                max_depth, all_pairs[bam_i][hp_i], z_score
            )
    return all_pairs


# }}}

# {{{def set_haplotypes(curr_coverage):
def set_haplotypes(curr_coverage):
    """Creates a list to manage counting haplotypes for subplots
    """
    hps = sorted(curr_coverage.keys(), reverse=True)
    # if there are multiple haplotypes, must have 0,1,2
    if len(hps) > 1 or (len(hps) == 1 and hps[0] != 0):
        if 0 not in hps:
            hps.append(0)
        if 1 not in hps:
            hps.append(1)
        if 2 not in hps:
            hps.append(2)
    elif 0 not in hps:
        hps.append(0)
    hps.sort(reverse=True)
    return hps


# }}}

# {{{def plot_samples(ranges,
def plot_samples(
    ranges,
    read_data,
    grid,
    ax_i,
    number_of_axes,
    bams,
    chrom,
    coverage_tracktype,
    titles,
    same_yaxis_scales,
    xaxis_label_fontsize,
    yaxis_label_fontsize,
    annotation_files,
    transcript_file,
    max_coverage_points,
    max_coverage,
    marker_size,
    coverage_only,
    jitter_bounds,
):

    """Plots all samples
    """
    max_insert_size = 0

    # If jitter > 0.08 is use we need to shift the ylim a bit to not hide any entires.
    ylim_margin = max(1.02 + jitter_bounds, 1.10)
    for i in range(len(bams)):
        #ax is never used, annotating this for readability
        ax = plt.subplot(grid[ax_i])
        hps = set_haplotypes(read_data["all_coverages"][i])
        inner_axs = gridspec.GridSpecFromSubplotSpec(
            len(hps), 1, subplot_spec=grid[ax_i], wspace=0.0, hspace=0.5
        )
        axs = {}
        for j in range(len(hps)):
            axs[j] = plt.subplot(inner_axs[hps[j]])

        curr_min_insert_size = None
        curr_max_insert_size = 0

        cover_axs = {}
        for hp in hps:
            curr_ax = axs[hp]

            curr_splits = []
            if hp in read_data["all_splits"][i]:
                curr_splits = read_data["all_splits"][i][hp]

            curr_linked_reads = []
            if hp in read_data["all_linked_reads"][i]:
                curr_linked_reads = read_data["all_linked_reads"][i][hp]

            curr_long_reads = []
            if hp in read_data["all_long_reads"][i]:
                curr_long_reads = read_data["all_long_reads"][i][hp]

            curr_pairs = []
            if hp in read_data["all_pairs"][i]:
                curr_pairs = read_data["all_pairs"][i][hp]

            curr_coverage = {}
            if hp in read_data["all_coverages"][i]:
                curr_coverage = read_data["all_coverages"][i][hp]

            cover_ax = plot_coverage(
                curr_coverage,
                curr_ax,
                ranges,
                len(hps),
                max_coverage,
                coverage_tracktype,
                yaxis_label_fontsize,
                max_coverage_points,
            )

            if len(curr_linked_reads) > 0:
                curr_min_insert_size, curr_max_insert_size = plot_linked_reads(
                    curr_pairs,
                    curr_splits,
                    curr_linked_reads,
                    curr_ax,
                    ranges,
                    curr_min_insert_size,
                    curr_max_insert_size,
                    marker_size,
                    jitter_bounds
                )
            elif len(curr_long_reads) > 0:
                curr_min_insert_size, curr_max_insert_size = plot_long_reads(
                    curr_long_reads,
                    curr_ax,
                    ranges,
                    curr_min_insert_size,
                    curr_max_insert_size,
                    jitter_bounds
                )
            else:
                curr_min_insert_size, curr_max_insert_size = plot_pairs(
                    curr_pairs,
                    curr_ax,
                    ranges,
                    curr_min_insert_size,
                    curr_max_insert_size,
                    marker_size,
                    jitter_bounds
                )

                curr_min_insert_size, curr_max_insert_size = plot_splits(
                    curr_splits,
                    curr_ax,
                    ranges,
                    curr_min_insert_size,
                    curr_max_insert_size,
                    marker_size,
                    jitter_bounds,
                )

            cover_axs[hp] = cover_ax
            if curr_max_insert_size and (curr_max_insert_size > max_insert_size):
                max_insert_size = curr_max_insert_size

        # {{{ set axis parameters
        # set the axis title to be either one passed in or filename
        curr_ax = axs[hps[0]]
        if titles and len(titles) == len(bams):
            curr_ax.set_title(titles[i], fontsize=8, loc="left")
        else:
            curr_ax.set_title(os.path.basename(bams[i]), fontsize=8, loc="left")

        if len(axs) > 1:
            for j in axs:
                curr_ax = axs[j]
                fp = dict(size=8, backgroundcolor="white")
                text = "HP: "
                if j == 0:
                    text += "Undef"
                else:
                    text += str(j)
                at = AnchoredText(
                    text, loc=2, prop=fp, borderpad=0, pad=0, frameon=False
                )
                curr_ax.add_artist(at)

        for j in hps:
            curr_ax = axs[j]
            curr_ax.set_xlim([0, 1])
            if same_yaxis_scales:
                curr_ax.set_ylim([0, max(1, max_insert_size * ylim_margin)])
            else:
                curr_ax.set_ylim([0, max(1, curr_max_insert_size * ylim_margin)])
            curr_ax.spines["top"].set_visible(False)
            curr_ax.spines["bottom"].set_visible(False)
            curr_ax.spines["left"].set_visible(False)
            curr_ax.spines["right"].set_visible(False)
            curr_ax.tick_params(axis="y", labelsize=yaxis_label_fontsize)
            # if there's one hp, 6 ticks fit. Otherwise, do 3
            tick_count = 6 if len(hps) == 1 else 3
            curr_ax.yaxis.set_major_locator(ticker.LinearLocator(tick_count))
            curr_ax.ticklabel_format(useOffset=False, style='plain')
            
            curr_ax.tick_params(axis="both", length=0)
            curr_ax.set_xticklabels([])
            if coverage_only:
                curr_ax.yaxis.set_visible(False)

        last_sample_num = number_of_axes - 1
        if annotation_files:
            last_sample_num -= len(annotation_files)
        if transcript_file:
            last_sample_num -= 1

        if ax_i == last_sample_num:
            curr_ax = axs[hps[-1]]

            labels = []
            if len(ranges) == 1:
                labels = [
                    int(ranges[0].start + l * (ranges[0].end - ranges[0].start))
                    for l in curr_ax.xaxis.get_majorticklocs()
                ]
            elif len(ranges) == 2:
                x_ticks = curr_ax.xaxis.get_majorticklocs()
                labels_per_range = int(
                    len(curr_ax.xaxis.get_majorticklocs()) / len(ranges)
                )
                labels = [
                    int(ranges[0].start + l * (ranges[0].end - ranges[0].start))
                    for l in x_ticks[:labels_per_range]
                ]
                try:
                    labels += [
                        int(ranges[-1].start + l * (ranges[-1].end - ranges[-1].start))
                        for l in x_ticks[labels_per_range:]
                    ]
                except Exception as e:
                    logger.error(labels_per_range)
                    print(e, file=sys.stderr)
                    sys.exit(1)
            else:
                logger.error("Ranges greater than 2 are not supported")
                sys.exit(1)
            
            curr_ax.set_xticklabels(labels, fontsize=xaxis_label_fontsize)
            chrms = [x.chrm for x in ranges]
            curr_ax.set_xlabel("Chromosomal position on " + "/".join(chrms), fontsize=8)
    
        curr_ax = axs[hps[int(len(hps) / 2)]]
        curr_ax.set_ylabel("Insert size", fontsize=8)
        cover_ax = cover_axs[hps[int(len(hps) / 2)]]
        cover_ax.set_ylabel("Coverage", fontsize=8)
        # }}}

        ax_i += 1
    return ax_i


# }}}

# {{{def plot_legend(fig, legend_fontsize):
def plot_legend(fig, legend_fontsize, marker_size):
    """Plots the figure legend
    """
    marker_colors = []
    marker_labels = []
    read_colors = {
        "Deletion/Normal": "black",
        "Duplication": "red",
        "Inversion": "blue",
        "Aligned long read": "orange",
        "Linked read": "green",
    }

    for read_type in READ_TYPES_USED:
        if read_type in read_colors:
            color = read_colors[read_type]
            flag = READ_TYPES_USED[read_type]
            if flag:
                marker_colors.append(color)
                marker_labels.append(read_type)
    legend_elements = []

    for color in marker_colors:
        legend_elements += [
            plt.Line2D([0, 0], [0, 1], color=color, linestyle="-", lw=1)
        ]
    if READ_TYPES_USED["Split-read"]:
        marker_labels.append("Split read")
        legend_elements += [
            plt.Line2D(
                [0, 0],
                [0, 1],
                markerfacecolor="None",
                markeredgecolor="grey",
                color="grey",
                marker="o",
                markersize=marker_size,
                linestyle=":",
                lw=1,
            )
        ]

    if READ_TYPES_USED["Paired-end read"]:
        marker_labels.append("Paired-end read")
        legend_elements += [
            plt.Line2D(
                [0, 0],
                [0, 1],
                markerfacecolor="None",
                markeredgecolor="grey",
                color="grey",
                marker="s",
                markersize=marker_size,
                linestyle="-",
                lw=1,
            )
        ]

    fig.legend(
        legend_elements, marker_labels, loc=1, fontsize=legend_fontsize, frameon=False
    )


# }}}

# {{{def create_gridspec(bams, transcript_file, annotation_files, sv_type ):
def create_gridspec(bams, transcript_file, annotation_files, sv_type, read_data, annotation_scalar):
    """Helper function for creation of a correctly-sized GridSpec instance
    """
    # give one axis to display each sample
    num_ax = len(bams)

    # add another if we are displaying the SV
    if sv_type:
        num_ax += 1

    # add another if a annotation file is given
    if transcript_file:
        num_ax += 1

    if annotation_files:
        num_ax += len(annotation_files)

    # set the relative sizes for each
    ratios = []
    if sv_type:
        ratios = [1]

    for i in range(len(bams)):
        ratios.append(len(read_data["all_coverages"][i]) * 3)
        if len(read_data["all_coverages"]) > 0:
            ratios[-1] = 9

    if annotation_files:
        ratios += [annotation_scalar] * len(annotation_files)
    if transcript_file:
        ratios.append(annotation_scalar * 3)

    return gridspec.GridSpec(num_ax, 1, height_ratios=ratios), num_ax


# }}}

##Annotations/Transcript methods
# {{{def get_plot_annotation_plan(ranges, annotation_file):
def get_plot_annotation_plan(ranges, annotation_file):
    annotation_plan = []
    for r in ranges:
        itr = get_tabix_iter(r.chrm, r.start, r.end, annotation_file)
        if not (itr):
            continue
        for row in itr:
            A = row.rstrip().split()
            A[0] = strip_chr(A[0])
            chrm = A[0]
            start = int(A[1])
            end = int(A[2])

            interval = genome_interval(chrm, start, end)

            # check to see if any part of this alignment overlaps a plot
            # range
            in_range = False
            for r in ranges:
                if r.intersect(interval) == 0:
                    in_range = True
            if in_range:
                step = plan_step(
                    genome_interval(chrm, start, start),
                    genome_interval(chrm, end, end),
                    "ANNOTATION",
                )
                if len(A) > 3:
                    try:
                        v = float(A[3])
                        step.event = "FLOAT_ANNOTATION"
                        step.info = v
                    except ValueError:
                        step.event = "STRING_ANNOTATION"
                        step.info = A[3]

                annotation_plan.append(step)
    return annotation_plan


# }}}

# {{{def plot_annotations(annotation_files, chrom, start, end,
def plot_annotations(
    annotation_files, annotation_filenames, ranges, hide_annotation_labels, annotation_fontsize, grid, ax_i, annotation_scalar,
):
    """Plots annotation information from region 
    """
    if not annotation_filenames:
        annotation_filenames = []
        for annotation_file in annotation_files:
            annotation_filenames.append(os.path.basename(annotation_file))

    for i,annotation_file in enumerate(annotation_files):
        annotation_plan = get_plot_annotation_plan(ranges, annotation_file)
        annotation_filename = annotation_filenames[i]

        if len(annotation_plan) == 0:
            continue
        ax = plt.subplot(grid[ax_i])
        ax_i += 1

        for step in annotation_plan:
            p = [
                map_genome_point_to_range_points(
                    ranges, step.start_pos.chrm, step.start_pos.start
                ),
                map_genome_point_to_range_points(
                    ranges, step.end_pos.chrm, step.end_pos.end
                ),
            ]
            # if an annotation lies outside the window, its coordinate will be None, so we trim to the window
            if p[0] is None:
                p[0] = 0
            if p[1] is None:
                p[1] = 1

            if step.event == "ANNOTATION":
                ax.plot(p, [0, 0], "-", color="black", lw=5)
            elif step.event == "FLOAT_ANNOTATION":
                ax.plot(p, [0, 0], "-", color=str(step.info), lw=15)
            elif step.event == "STRING_ANNOTATION":
                ax.plot(p, [0, 0], "-", color="black", lw=15)
                if step.info and not hide_annotation_labels:
                    ax.text(
                        p[0],
                        0.06,
                        step.info,
                        color="black",
                        fontsize=annotation_fontsize,
                    )
            else:
                logger.error("Unsupported annotation type: {}".format(step.event))
                sys.exit(1)

            # set axis parameters
            ax.set_xlim([0, 1])
            ax.spines["top"].set_visible(False)
            ax.spines["bottom"].set_visible(False)
            ax.spines["left"].set_visible(False)
            ax.spines["right"].set_visible(False)
            ax.set_title(annotation_filename, fontsize=8, loc="left")
            ax.tick_params(axis="x", length=0)
            ax.tick_params(axis="y", length=0)
            ax.set_xticklabels([])
            ax.set_yticklabels([])


# }}}

# {{{def get_interval_range_plan_start_end(ranges, interval):
def get_interval_range_plan_start_end(ranges, interval):

    # transcript can span ranges
    start_range_hit_i = get_range_hit(ranges, interval.chrm, interval.start)
    end_range_hit_i = get_range_hit(ranges, interval.chrm, interval.end)

    if start_range_hit_i is None and end_range_hit_i is None:
        for i, range_item in enumerate(ranges):
            if (
                (strip_chr(range_item.chrm) == strip_chr(interval.chrm))
                and (interval.start <= range_item.start <= interval.end)
                and (interval.start <= range_item.end <= interval.end)
            ):
                start_range_hit_i = i
                end_range_hit_i = i

    start = None
    end = None
    # neither end is in range, add nothing
    if start_range_hit_i == None and end_range_hit_i == None:
        return None, None
    # start is in, end is not
    elif end_range_hit_i == None:
        start = genome_interval(
            interval.chrm,
            max(interval.start, ranges[start_range_hit_i].start),
            max(interval.start, ranges[start_range_hit_i].start),
        )
        end = genome_interval(
            interval.chrm, ranges[start_range_hit_i].end, ranges[start_range_hit_i].end
        )
    # end is in, start is not
    elif start_range_hit_i == None:
        start = genome_interval(
            interval.chrm, ranges[end_range_hit_i].start, ranges[end_range_hit_i].start
        )
        end = genome_interval(
            interval.chrm,
            min(interval.end, ranges[end_range_hit_i].end),
            min(interval.end, ranges[end_range_hit_i].end),
        )
    # in same range or in different ranges
    else:
        start = genome_interval(
            interval.chrm,
            max(interval.start, ranges[start_range_hit_i].start),
            max(interval.start, ranges[start_range_hit_i].start),
        )
        end = genome_interval(
            interval.chrm,
            min(interval.end, ranges[end_range_hit_i].end),
            min(interval.end, ranges[end_range_hit_i].end),
        )
    return start, end


# }}}

# {{{def get_transcript_plan(ranges, transcript_file):
def get_transcript_plan(ranges, transcript_file):
    genes = {}
    transcripts = {}
    cdss = {}

    for r in ranges:
        itr = get_tabix_iter(r.chrm, r.start, r.end, transcript_file)
        if not itr:
            continue
        for row in itr:
            gene_annotation = row.rstrip().split()

            if gene_annotation[2] == "gene":
                info = dict(
                    [list(val.split("=")) for val in gene_annotation[8].split(";")]
                )

                info["strand"] = gene_annotation[6] == "+"

                if "Name" not in info:
                    continue

                genes[info["Name"]] = [
                    genome_interval(
                        gene_annotation[0],
                        int(gene_annotation[3]),
                        int(gene_annotation[4]),
                    ),
                    info,
                ]
            elif gene_annotation[2] in ["transcript", "mRNA"]:
                info = dict(
                    [list(val.split("=")) for val in gene_annotation[8].split(";")]
                )
                info["strand"] = gene_annotation[6] == "+"

                if info["Parent"] not in transcripts:
                    transcripts[info["Parent"]] = {}
                transcripts[info["Parent"]][info["ID"]] = [
                    genome_interval(
                        gene_annotation[0],
                        int(gene_annotation[3]),
                        int(gene_annotation[4]),
                    ),
                    info,
                ]
            elif gene_annotation[2] == "CDS":
                info = dict(
                    [list(val.split("=")) for val in gene_annotation[8].split(";")]
                )
                info["strand"] = gene_annotation[6] == "+"

                if info["Parent"] not in cdss:
                    cdss[info["Parent"]] = {}

                if info["ID"] not in cdss[info["Parent"]]:
                    cdss[info["Parent"]][info["ID"]] = []

                cdss[info["Parent"]][info["ID"]].append(
                    genome_interval(
                        gene_annotation[0],
                        int(gene_annotation[3]),
                        int(gene_annotation[4]),
                    )
                )
    transcript_plan = []
    for gene in genes:
        gene_id = genes[gene][1]["ID"]
        if gene_id not in transcripts:
            continue
        for transcript in transcripts[gene_id]:
            interval, info = transcripts[gene_id][transcript]
            start, end = get_interval_range_plan_start_end(ranges, interval)

            if not start or not end:
                continue

            step = plan_step(start, end, "TRANSCRIPT")
            step.info = {"Name": None, "Strand": None, "Exons": None}
            step.info["Name"] = info["Name"]
            step.info["Strand"] = info["strand"]

            exons = []
            if transcript in cdss:
                for cds in cdss[transcript]:
                    for exon in cdss[transcript][cds]:
                        start, end = get_interval_range_plan_start_end(ranges, exon)
                        if start and end:
                            exons.append(plan_step(start, end, "EXON"))
            if len(exons) > 0:
                step.info["Exons"] = exons

            transcript_plan.append(step)
    return transcript_plan


# }}}

# {{{ def plot_transcript(transcript_file, chrom, start, end,
def plot_transcript(
    transcript_file, transcript_filename, ranges, grid, annotation_fontsize, xaxis_label_fontsize, annotation_scalar,
):
    """Plots a transcript file annotation
    """
    if not transcript_filename:
        transcript_filename = os.path.basename(transcript_file)
    transcript_idx = 0
    transcript_idx_max = 0
    currect_transcript_end = 0
    ax = plt.subplot(grid[-1])

    transcript_plan = get_transcript_plan(ranges, transcript_file)

    for step in transcript_plan:
        p = [
            map_genome_point_to_range_points(
                ranges, step.start_pos.chrm, step.start_pos.start
            ),
            map_genome_point_to_range_points(
                ranges, step.end_pos.chrm, step.end_pos.end
            ),
        ]
        # if an annotation lies outside the window, its coordinate will be None, so we trim to the window
        if p[0] is None:
            p[0] = 0
        if p[1] is None:
            p[1] = 0

        # Reset transcript index outside of current stack
        if p[0] > currect_transcript_end:
            transcript_idx = 0

        currect_transcript_end = max(p[1], currect_transcript_end)

        ax.plot(
            p, [transcript_idx, transcript_idx], "-", color="cornflowerblue", lw=0.5,
            solid_capstyle="butt",
        )

        # Print arrows throughout gene to show direction.
        nr_arrows = 2 + int((p[1]-p[0])/0.02)
        arrow_locs = np.linspace(p[0], p[1], nr_arrows)
        arrowprops = dict(arrowstyle="->", color="cornflowerblue", lw=0.5,
                          mutation_aspect=2, mutation_scale=3)

        if step.info["Strand"]:
            # Add left-facing arrows
            for arrow_loc in arrow_locs[1:]:
                ax.annotate(
                    "",
                    xy=(arrow_loc, transcript_idx),
                    xytext=(p[0], transcript_idx),
                    arrowprops=arrowprops,
                    annotation_clip=True,
                )
        else:
            # Add right-facing arrows
            for arrow_loc in arrow_locs[:-1]:
                ax.annotate(
                    "",
                    xy=(arrow_loc, transcript_idx),
                    xytext=(p[1], transcript_idx),
                    arrowprops=arrowprops,
                    annotation_clip=True,
                )

        if step.info["Exons"]:
            for exon in step.info["Exons"]:
                p_exon = [
                    map_genome_point_to_range_points(
                        ranges, exon.start_pos.chrm, exon.start_pos.start
                    ),
                    map_genome_point_to_range_points(
                        ranges, exon.end_pos.chrm, exon.end_pos.end
                    ),
                ]
                if not points_in_window(p_exon):
                    continue

                ax.plot(
                    p_exon,
                    [transcript_idx, transcript_idx],
                    "-",
                    color="cornflowerblue",
                    solid_capstyle="butt",
                    lw=4,
                )

        ax.text(
            sum(p)/2,
            transcript_idx + 0.1,
            step.info["Name"],
            color="blue",
            fontsize=annotation_fontsize,
            ha="center"
        )

        transcript_idx += 1
        transcript_idx_max = max(transcript_idx, transcript_idx_max)

    # set axis parameters
    ax.set_xlim([0, 1])
    ax.set_ylim([transcript_idx_max * -0.1, 0.01+(transcript_idx_max * 1.01)])
    ax.spines["top"].set_visible(False)
    ax.spines["bottom"].set_visible(False)
    ax.spines["left"].set_visible(False)
    ax.spines["right"].set_visible(False)

    ax.tick_params(axis="x", length=0)
    ax.tick_params(axis="y", length=0)
    ax.set_xticklabels([])
    ax.set_yticklabels([])
    ax.set_title(transcript_filename, fontsize=8, loc="left")


# }}}


########################################################################
# main block
########################################################################
def plot(parser, options, extra_args=None):
    """
    To support translocations, the SVs are specified as an array of 
    genome_interval. For now we let that array be size 1 or 2.
    """
    if options.debug:
        logger.setLevel(logging.DEBUG)
    
    random.seed(options.random_seed)
    if options.print_args or options.json_only:
        print_arguments(options)
        if options.json_only:
            sys.exit(0)

    if options.output_file:
        output_file = options.output_file
    else:
        if not os.path.isdir(options.output_dir):
            os.mkdir(options.output_dir)
        name_fields = [
            options.sv_type,
            "-".join(options.chrom),
            "-".join([str(s) for s in options.start]),
            "-".join([str(e) for e in options.end]),
        ]
        if options.sv_type:
            output_file = os.path.join(options.output_dir, "_".join(name_fields))
        else:
            output_file = os.path.join(options.output_dir, "_".join(name_fields[1:]))
    if (options.annotation_files 
            and options.annotation_filenames 
            and len(options.annotation_files) != len(options.annotation_filenames)):
        logger.warning("annotation filenames do not match annotation files")
        sys.exit(1)

    for bam in options.bams:
        if ".cram" in bam:
            if not options.reference:
                logger.error("Missing argument reference (-r/--reference) required for CRAM")
                sys.exit(1)

    if len(options.chrom) != len(options.start) != len(options.end):
        logger.error("The number of chromosomes ({}), starts ({}), and ends ({}) do not match.".format(
            len(options.chrom),
            len(options.start),
            len(options.end)
            )
        )
        sys.exit()

    sv = []
    for i in range(len(options.chrom)):
        options.chrom[i] = strip_chr(options.chrom[i])
        sv.append(genome_interval(options.chrom[i], options.start[i], options.end[i]))
    # set up plot
    plot_height, plot_width, window, ranges = set_plot_dimensions(
        sv,
        options.sv_type,
        options.plot_height,
        options.plot_width,
        options.bams,
        options.reference,
        options.annotation_files,
        options.transcript_file,
        options.window,
        options.zoom,
    )

    marker_size = options.marker_size

    # set up sub plots
    matplotlib.rcParams.update({"font.size": 12})
    fig = plt.figure(figsize=(plot_width, plot_height))

    # read alignment data
    read_data, max_coverage = get_read_data(
        ranges,
        options.bams,
        options.reference,
        options.separate_mqual,
        options.include_mqual,
        options.coverage_only,
        options.long_read,
        options.min_event_size,
        options.same_yaxis_scales,
        options.max_depth,
        options.z,
        options.ignore_hp,
    )

    # set up grid organizer
    grid, num_ax = create_gridspec(
        options.bams,
        options.transcript_file,
        options.annotation_files,
        options.sv_type,
        read_data,
        options.annotation_scalar,
    )
    current_axis_idx = 0

    # plot variant on top
    if options.sv_type:
        current_axis_idx = create_variant_plot(
            grid,
            current_axis_idx,
            sv,
            options.sv_type,
            ranges,
            options.start_ci,
            options.end_ci,
        )
    if options.max_coverage:
        max_coverage = options.max_coverage

    # Plot each sample
    current_axis_idx = plot_samples(
        ranges,
        read_data,
        grid,
        current_axis_idx,
        num_ax,
        options.bams,
        options.chrom,
        options.coverage_tracktype,
        options.titles,
        options.same_yaxis_scales,
        options.xaxis_label_fontsize,
        options.yaxis_label_fontsize,
        options.annotation_files,
        options.transcript_file,
        options.max_coverage_points,
        max_coverage,
        marker_size,
        options.coverage_only,
        options.jitter,
    )
    # plot legend
    plot_legend(fig, options.legend_fontsize, marker_size)

    # Plot annotation files
    if options.annotation_files:
        plot_annotations(
            options.annotation_files,
            options.annotation_filenames,
            ranges,
            options.hide_annotation_labels,
            options.annotation_fontsize,
            grid,
            current_axis_idx,
            options.annotation_scalar,
        )

    # Plot sorted/bgziped/tabixed transcript file
    if options.transcript_file:
        plot_transcript(
            options.transcript_file,
            options.transcript_filename,
            ranges,
            grid,
            options.annotation_fontsize,
            options.xaxis_label_fontsize,
            options.annotation_scalar,
        )

    # save
    matplotlib.rcParams["agg.path.chunksize"] = 100000
    plt.tight_layout(pad=0.8, h_pad=0.1, w_pad=0.1)
    try:
        plt.savefig(output_file, dpi=options.dpi)
    except Exception as e:
        logger.error(
            "Failed to save figure {}".format(output_file)
        )
        print(e)

    plt.close(fig)
# }}}


================================================
FILE: samplot/samplot_vcf.py
================================================
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Create samplot vcf commands to execute and generate
companion HTML image browser.

Note: additional arguments are passed through to samplot plot
"""
from __future__ import print_function

import argparse
from collections import Counter
import logging
import operator
import os
import random
import sys
import re

import pysam
from jinja2 import Environment, FileSystemLoader, select_autoescape

try:
    from shlex import quote
except ImportError:
    from pipes import quote

from .samplot import add_plot


logger = logging.getLogger(__name__)

cmp_lookup = {
    ">": operator.gt,  # e.g. DHFC < 0.5
    "<": operator.lt,
    "<=": operator.le,
    ">=": operator.ge,
    "==": operator.eq,
    "contains": operator.contains,  # e.g. CSQ contains HIGH
    "exists": lambda a, b: True,  # e.g. exists smoove_gene
}


class Sample(object):
    __slots__ = [
        "family_id",
        "id",
        "paternal_id",
        "maternal_id",
        "mom",
        "dad",
        "kids",
        "i",
    ]

    def __init__(self, line):
        toks = line.rstrip().split()
        self.family_id = toks[0]
        self.id = toks[1]
        self.paternal_id = toks[2]
        self.maternal_id = toks[3]
        self.kids = []
        self.i = -1  # index in the vcf.

    def __repr__(self):
        return "Sample(id:{id},paternal_id:{pid},maternal_id:{mid})".format(
            id=self.id, pid=self.paternal_id, mid=self.maternal_id
        )


def flatten(value, sep=","):
    """
    >>> flatten([1,2,3,4])
    '1,2,3,4'
    >>> flatten((5,6))
    '5,6'
    >>> flatten(0.987654321)
    '0.987654'
    >>> flatten(7)
    '7'
    >>> flatten("flatten")
    'flatten'
    """
    flat = None
    # tuple or list
    if isinstance(value, tuple) or isinstance(value, list):
        flat = sep.join([str(i) for i in value])
    # reformats long float values
    elif isinstance(value, float):
        flat = "%.6f" % (value,)
    # string and int
    else:
        flat = str(value)
    return flat


def get_format_fields(ids, variant):
    """
    args:
        ids (list) - list of FORMAT field IDs, e.g. ['AS', 'AP', 'DHFFC']
        variant (pysam.libcbcf.VariantRecord)

    returns:
        list
    """
    sample_format = []
    for i, sample_fields in enumerate(variant.samples.values()):
        for field_id in ids:
            sample_field_val = flatten(sample_fields.get(field_id, ""))
            if sample_field_val:
                if len(sample_format) < i + 1:
                    sample_format.append("")
                else:
                    sample_format[i] += " "
                sample_format[i] += "{}={}".format(field_id, sample_field_val)
    return sample_format


def get_format_title(samples, ids, variant):
    """
    args:
        samples (list) - list of sample IDs in order of VCF annotations
        ids (list) - list of FORMAT field IDs, e.g. ['AS', 'AP', 'DHFFC']
        variant (pysam.libcbcf.VariantRecord)

    returns:
        dict
    """
    fields = get_format_fields(ids, variant)
    return dict(zip(samples, fields))


def make_plot_titles(samples, attr_values):
    """
    keeping this method separate in the event we add more things to the title

    args:
        samples (list) - list of sample IDs
        attr_values (str) - string of VCF FORMAT values

    returns:
        dict

    >>> make_plot_titles(
        ['s1', 's2', 's3'],
            {
                's1': 'AS=0 AP=0',
                's2': 'AS=0 AP=1',
                's3': 'AS=1 AP=1'
            }
        )
    {
        's1': "'s1 AS=0 AP=0'",
        's2': "'s2 AS=0 AP=1'",
        's3': "'s3 AS=1 AP=1'"
    }
    """
    plot_titles = dict()
    for sample in samples:
        if sample in attr_values:
            plot_titles[sample] = quote("%s %s" % (sample, attr_values[sample]))
    return plot_titles


def get_overlap(
    tabix,
    chrom,
    start,
    end,
    priority=["exon", "gene", "transcript", "cds"],
    no_hit="intergenic",
    fix_chr=True,
):
    """
    args:
        tabix (pysam.libctabix.TabixFile) - open TabixFile
        chrom (str)
        start (int)
        end (int)
        priority (Optional[list]) - order of preferred region annotation
        no_hit (Optional[str]) - use this annotation if no matches among priority
        fix_chr (Optional[bool]) - try to fetch a region using both
                            non-'chr' and 'chr' prefix on failures

    returns:
        str
    """
    overlaps = None
    try:
        overlaps = set(
            [i.split("\t")[2].lower() for i in tabix.fetch(chrom, start, end)]
        )
    except IndexError:
        # probably not a gff3
        logger.warning("Invalid annotation file specified for --gff3")
        overlaps = None
    except ValueError:
        if fix_chr:
            # try removing chr
            if chrom.startswith("chr"):
                overlaps = get_overlap(
                    tabix, chrom[3:], start, end, priority, no_hit, False
                )
            # or adding chr
            else:
                overlaps = get_overlap(
                    tabix,
                    "chr{chrom}".format(chrom=chrom),
                    start,
                    end,
                    priority,
                    no_hit,
                    False,
                )
    except:
        # bad regions
        logger.warning(
            "Error fetching {chrom}:{start}-{end}".format(
                chrom=chrom, start=start, end=end
            )
        )
        overlaps = None

    overlap = ""
    if overlaps:
        for feature in priority:
            if feature in overlaps:
                overlap = feature
                break
    else:
        # fetching overlaps failed
        overlap = "unknown"

    if not overlap and no_hit:
        overlap = no_hit
    return overlap


def parse_ped(path, vcf_samples=None):
    if path is None:
        return {}
    samples = []
    look = {}
    for line in open(path):
        samples.append(Sample(line))
        look[samples[-1].id] = samples[-1]

    for s in samples:
        s.dad = look.get(s.paternal_id)
        if s.dad is not None:
            s.dad.kids.append(s)
        s.mom = look.get(s.maternal_id)
        if s.mom is not None:
            s.mom.kids.append(s)
    # match these samples to the ones in the VCF.
    if vcf_samples is not None:
        result = []
        for i, variant_sample in enumerate(vcf_samples):
            if variant_sample not in look:
                continue
            result.append(next(s for s in samples if s.id == variant_sample))
            result[-1].i = i
        samples = result

    return {s.id: s for s in samples}


def get_names_to_bams(bams, name_list=None):
    """
    get mapping from names (read group samples) to bam paths)
    this is useful because the VCF has the names and we'll want the bam paths
    for those samples
    if name_list is passed in as a parameter those will be used instead
    """
    names = {}
    if name_list:
        if len(name_list) != len(bams):
            logger.error("List of sample IDs does not match list of alignment files.")
            sys.exit(1)

        for i, p in enumerate(bams):
            names[name_list[i]] = p
    else:
        for p in bams:
            b = pysam.AlignmentFile(p)
            # TODO - catch specific exception
            try:
                names[b.header["RG"][0]["SM"]] = p
            except Exception as e:
                logger.error("No RG field in alignment file {}".format(p))
                logger.error("Include ordered list of sample IDs to avoid this error")
                print(e, file=sys.stderr)
                sys.exit(1)
    return names


def tryfloat(v):
    try:
        return float(v)
    except:
        return v


def to_exprs(astr):
    """
    an expr is just a 3-tuple of "name", fn, value"
    e.g. "DHFFC", operator.lt, 0.7"
    >>> to_exprs("DHFFC < 0.5 & SVTYPE == 'DEL'")
    [('DHFFC', <built-in function lt>, 0.5), ('SVTYPE', <built-in function eq>, 'DEL')]

    >>> to_exprs("CSQ contains 'HIGH'")
    [('CSQ', <built-in function contains>, 'HIGH')]
    """
    astr = (x.strip() for x in astr.strip().split("&"))
    result = []
    for a in astr:
        a = [x.strip() for x in a.split()]
        if len(a) == 2:
            assert a[1] == "exists", ("bad expression", a)
            a.append("extra_arg")
        assert len(a) == 3, ("bad expression", a)
        assert a[1] in cmp_lookup, (
            "comparison:"
            + a[1]
            + " not supported. must be one of:"
            + ",".join(cmp_lookup.keys())
        )
        result.append((a[0], cmp_lookup[a[1]], tryfloat(a[2].strip("'").strip('"'))))
    return result


def check_expr(vdict, expr):
    """
    >>> check_expr({"CSQ": "asdfHIGHasdf"},
            to_exprs("CSQ contains 'HIGH'"))
    True

    >>> check_expr({"CSQ": "asdfHIGHasdf", "DHFC": 1.1},
            to_exprs("CSQ contains 'HIGH' & DHFC < 0.5"))
    False

    >>> check_expr({"CSQ": "asdfHIGHasdf", "DHFC": 1.1},
            to_exprs("CSQ contains 'HIGH' & DHFC < 1.5"))
    True

    >>> check_expr({"smoove_gene": "asdf"},
            to_exprs("smoove_gene exists"))
    True

    >>> check_expr({"smooe_gene": "asdf"},
            to_exprs("smoove_gene exists"))
    False

    >>> check_expr({"smoove_gene": ""},
            to_exprs("smoove_gene exists"))
    True
    """

    # a single set of exprs must be "anded"
    for name, fcmp, val in expr:
        # NOTE: asking for a missing annotation will return false.
        if name not in vdict:
            return False
        if not fcmp(vdict[name], val):
            return False
    return True


def make_single(vdict):
    """
    >>> d = {"xx": (1,)}
    >>> make_single(d)
    {'xx': 1}
    """
    for k in vdict.keys():
        if isinstance(vdict[k], tuple) and len(vdict[k]) == 1:
            vdict[k] = vdict[k][0]
    return vdict


def get_dn_row(ped_samples):
    for s in ped_samples.values():
        if s.mom is not None and s.dad is not None:
            return '{title:"de novo", field:"dn"}'
    return ""


def read_important_regions(bedfilename):
    if not bedfilename:
        return None
    important_regions = {}
    with open(bedfilename, "r") as bedfile:
        for line in bedfile:
            pos_fields = line.strip().split()
            region_string = "_".join(pos_fields[1:3])
            if pos_fields[0] not in important_regions:
                important_regions[pos_fields[0]] = []
            important_regions[pos_fields[0]].append(region_string)

    return important_regions


def var_in_important_regions(important_regions, chrom, start, end, svtype):
    if not important_regions:
        # if no important regions are set all locations are valid
        return True

    if chrom in important_regions:
        for region in important_regions[chrom]:
            region_st, region_end = [int(x) for x in region.split("_")]
            if (
                region_st <= start <= region_end
                or region_st <= end <= region_end
                or start <= region_st <= end
            ):
                return True

    logger.debug(
        "Skipping {} at {}:{}-{}, outside important_regions coordinates".format(
            svtype, chrom, start, end
        )
    )
    return False


def cram_input(bams):
    for bam in bams:
        if bam.endswith(".cram"):
            return True
    return False


def above_call_rate(gts, sample_count, min_call_rate, svtype, chrom, start, end):
    """
    skips variants with call rate below min_call_rate if set
    """
    if not min_call_rate:
        return True

    call_rate = (sample_count - sum(None in g for g in gts)) / sample_count
    if min_call_rate and (call_rate < min_call_rate):
        logger.debug(
            (
                "Skipping {} at {}:{}-{}, call rate of variant "
                + "({}) below min_call_rate"
            ).format(svtype, chrom, start, end, call_rate),
        )
        return False
    return True


def below_max_hets(gts, max_hets, svtype, chrom, start, end):
    """
    skips variants with more than max_hets heterozygotes
    if max_hets is set
    """
    if not max_hets:
        return False

    # requisite hets/hom-alts
    het_count = sum(sum(x) >= 1 for x in gts if None not in x)
    if het_count > max_hets:
        logger.debug(
            "Skipping {} at {}:{}-{}, more than max_hets heterozygotes".format(
                svtype, chrom, start, end
            )
        )
        return False
    return True


def no_variant_found(gts, svtype, chrom, start, end):
    """
    skips variants with no non-ref samples
    """
    if not any(sum(x) > 0 for x in gts if None not in x):
        logger.debug(
            "Skipping {} at {}:{}-{}, no samples have non-ref genotypes".format(
                svtype, chrom, start, end
            )
        )
        return True
    return False


def get_plottable_samples(
    gts, variant, plot_all, filters, svtype, chrom, start, end,
):
    """
    gets the samples and indices for all those which need to be plotted,
    which means passing filters and, if not plot_all, having a nonref genotype
    """
    if plot_all:
        test_idxs = [i for i, gt in enumerate(gts)]
        if len(test_idxs) == 0:
            logger.debug(
                "No samples found for {} at {}:{}-{}".format(svtype, chrom, start, end)
            )
    else:
        test_idxs = [i for i, gt in enumerate(gts) if None not in gt and sum(gt) > 0]
        if len(test_idxs) == 0:
            logger.debug(
                "No non-reference samples found for {} at {}:{}-{}".format(
                    svtype, chrom, start, end
                )
            )

    test_samples = [s for i, s in enumerate(variant.samples.values()) if i in test_idxs]

    # apply filters if set
    if len(filters) == 0:
        idxs = test_idxs
    else:
        idxs = []
        odict = make_single(dict(variant.info.items()))
        for i, ts in enumerate(test_samples):
            vdict = odict.copy()
            vdict.update(make_single(dict(ts.items())))

            if any(check_expr(vdict, fs) for fs in filters):
                idxs.append(test_idxs[i])
    if len(idxs) == 0:
        logger.debug(
            "No samples pass filters for {} at {}:{}-{}".format(
                svtype, chrom, start, end
            )
        )
    return idxs, test_samples


def get_variant_samples(
    idxs, vcf_samples, names_to_bams, svtype, chrom, start, end,
):
    """
    gets the samples that need to be plotted and have alignment files assigned
    """
    variant_samples = []
    for i in idxs:
        if vcf_samples[i] in names_to_bams:
            variant_samples.append(vcf_samples[i])
    if len(variant_samples) == 0:
        logger.debug(
            (
                "Skipping {} at {}:{}-{}, no plottable samples "
                + "with matched alignment files"
            ).format(svtype, chrom, start, end),
        )
    return variant_samples


def get_denovos(
    denovo_row,
    test_samples,
    variant_samples,
    ped_samples,
    svtype,
    chrom,
    start,
    end,
    dn_only,
):
    """
    we call it a de novo if the sample passed the filters but the mom and
    dad had homref genotypes before filtering.
    so stringent filtering on the kid and lenient on parents.
    """
    denovo_svs = []
    if denovo_row != "":
        test_sample_names = {s.name for s in test_samples}
        for variant_sample in variant_samples:
            sample = ped_samples[variant_sample]
            if sample.mom is None or sample.dad is None:
                continue
            if (
                sample.mom.id not in test_sample_names
                and sample.dad.id not in test_sample_names
            ):
                denovo_svs.append(sample.id)

    if len(denovo_svs) <= 0 and dn_only:
        logger.debug(
            "Skipping {} at {}:{}-{}, dn_only selected and no de novos found".format(
                svtype, chrom, start, end
            ),
        )
    return denovo_svs


def get_family_controls(
    ped,
    denovo_svs,
    variant_samples,
    ped_samples,
    max_hets,
    bams,
    names_to_bams,
    vcf_samples_set,
):
    """
    tries to find family members to use as controls for putative de novos
    """
    # do DN samples first so we can see parents.
    # TODO also need to do the non-denovos as they seem to have been forgotten
    for variant_sample in denovo_svs + [
        x for x in variant_samples if x not in denovo_svs
    ]:
        sample = ped_samples.get(variant_sample)
        if sample is None:
            continue
        if (
            sample.mom is not None
            and sample.mom.id not in variant_samples
            and sample.mom.id in vcf_samples_set
        ):
            variant_samples.append("mom-of-%s[%s]" % (variant_sample, sample.mom.id))
            bams.append(names_to_bams[sample.mom.id])
        if (
            sample.dad is not None
            and sample.dad.id not in variant_samples
            and sample.dad.id in vcf_samples_set
        ):
            variant_samples.append("dad-of-%s[%s]" % (variant_sample, sample.dad.id))
            bams.append(names_to_bams[sample.dad.id])
        for kid in sample.kids:
            if kid.id not in variant_samples and kid.id in vcf_samples_set:
                variant_samples.append("kid-of-%s[%s]" % (variant_sample, kid.id))
                bams.append(names_to_bams[kid.id])
            if max_hets:
                if len(bams) > 1.5 * max_hets:
                    break
        if max_hets:
            if len(bams) > 1.5 * max_hets:
                break
    return variant_samples, bams


def get_nonfamily_controls(
    gts, vcf_samples, variant_samples, names_to_bams, min_entries, bams
):
    # extend with some controls:
    hom_ref_idxs = [
        i for i, gt in enumerate(gts) if len(gt) == 2 and gt[0] == 0 and gt[1] == 0
    ]

    if len(hom_ref_idxs) > 3:
        random.shuffle(hom_ref_idxs)

    hom_ref_samples = []
    for i in hom_ref_idxs:
        if vcf_samples[i] in names_to_bams:
            hom_ref_samples.append(vcf_samples[i])

    to_add_count = min_entries - len(bams)
    bams.extend(names_to_bams[s] for s in hom_ref_samples[:to_add_count])
    variant_samples += ["control-sample:" + s for s in hom_ref_samples[:to_add_count]]
    return variant_samples, bams


def create_metadata(
    variant,
    translocation_chrom,
    svtype,
    sample_str,
    n_samples,
    annotations,
    denovo_row,
    denovo_svs,
):
    """
    creates a dict with the info about the SV
    that will be used in the website
    """
    data_dict = {
        "chrom": variant.chrom,
        "chrom2": translocation_chrom,
        "start": variant.start,
        "end": variant.stop,
        "svtype": svtype,
        "svlength": variant.stop - variant.start,
        "samples": sample_str,
        "nsamples": n_samples,
    }
    if annotations:
        data_dict["overlaps"] = get_overlap(
            annotations, variant.chrom, variant.start, variant.stop
        )
    if denovo_row != "":
        data_dict["dn"] = ",".join(denovo_svs)
    return data_dict


def format_template(
    variant,
    data_dict,
    max_entries,
    bams,
    variant_samples,
    plot_titles,
    out_dir,
    output_type,
    svtype,
    downsample,
    pass_through_args,
):
    """
    formates the template string for generation of the final command
    """
    if data_dict["chrom2"] is None:
        figname_template = "{svtype}_{chrom}_{start}_{end}.{itype}"
    else:
        figname_template = "{svtype}_{chrom}_{start}_{chrom2}_{end}.{itype}"

    fig_path = os.path.join(
        out_dir, figname_template.format(itype=output_type, **data_dict),
    )

    if "CIPOS" in variant.info:
        v = variant.info["CIPOS"]
        cipos = "--start_ci '%s,%s'" % (abs(int(v[0])), abs(int(v[1])))
    else:
        cipos = ""
    if "CIEND" in variant.info:
        v = variant.info["CIEND"]
        ciend = "--end_ci '%s,%s'" % (abs(int(v[0])), abs(int(v[1])))
    else:
        ciend = ""
    # dynamically set Z to speed drawing and remove noise for larger events
    z = 3
    if variant.stop - variant.start > 2000:
        z = 4
    if variant.stop - variant.start > 10000:
        z = 6
    if variant.stop - variant.start > 20000:
        z = 9
    if data_dict["chrom2"] is None:
        z = 3

    if max_entries:
        bams = bams[:max_entries]
        variant_samples = variant_samples[:max_entries]

    # update titles based on FORMAT fields requested
    title_list = list()
    for variant_sample in variant_samples:
        if variant_sample in plot_titles:
            title_list.append(plot_titles[variant_sample])
        else:
            title_list.append(variant_sample)

    start = variant.start
    stop = variant.stop
    start2 = None
    stop2 = None

    if data_dict["chrom2"] is None:
        template = (
            "samplot plot {extra_args} -z {z} -n {titles} "
            + "{cipos} {ciend} {svtype} -c {chrom} -s {start} "
            + "-e {end} -o {fig_path} -d {downsample} -b {bams}"
        )
    else:
        template = (
            "samplot plot {extra_args} -z {z} -n {titles} "
            + "{cipos} {ciend} {svtype} -c {chrom} -s {start} "
            + "-e {end} -o {fig_path} -d {downsample} -b {bams} "
            + "-c {chrom2} -s {start2} -e {end2}"
        )
        # For interchromosomal variants the 2nd breakpoint position should 
        # not be encoded in INFO/END tag although some callers still do this. 
        # Currently it is unclear if there is a good replacement. Delly uses 
        # INFO/POS2 for this, GATK-SV uses INFO/END2, dysgu uses INFO/CHR2_POS.
        # see:  https://github.com/dellytools/delly/issues/159
        # see: https://gatk.broadinstitute.org/hc/en-us/articles/5334587352219-How-to-interpret-SV-VCFs
        # TODO - if the SV breakpoints are specified in the ALT field one 
        #        could use this info to get the 2nd breakpoint position
        if "POS2" in variant.info:
            start2 = variant.info["POS2"]
        elif "END2" in variant.info:
            start2 = variant.info["END2"]
        elif "CHR2_POS" in variant.info:
            start2 = variant.info["CHR2_POS"]
        else:
            start2 = stop
            # Update stop if INFO/END denotes the 2nd breakpoint
            stop = start + 1

        stop2 = start2 + 1
        

    command = template.format(
        extra_args=" ".join(pass_through_args),
        bams=" ".join(bams),
        titles=" ".join(title_list),
        z=z,
        cipos=cipos,
        ciend=ciend,
        svtype="-t " + svtype if svtype != "SV" else "",
        fig_path=fig_path,
        chrom=variant.chrom,
        start=start,
        end=stop,
        downsample=downsample,
        chrom2=data_dict["chrom2"],
        start2=start2,
        end2=stop2,
    ) + "\n"
    return command


def write_site(table_data, out_dir, output_type, annotations, denovo_row):
    # grab the template
    env = Environment(
        loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), "templates")),
        autoescape=select_autoescape(["html"]),
    )
    html_template = env.get_template("samplot_vcf.html")
    # write index.html
    with open("{out_dir}/index.html".format(out_dir=out_dir), "w") as fh:
        print(
            html_template.render(
                data=table_data,
                plot_type=output_type,
                gff3="true" if annotations else "false",
                denovo="true" if denovo_row else "false",
            ),
            file=fh,
        )


def is_simply_skippable(
    variant,
    vcf_samples,
    gts,
    important_regions,
    max_mb,
    min_bp,
    min_call_rate,
    max_hets,
    plot_all,
    translocation_chrom,
):
    """
    checks several basic terms that could filter this variant out
    specifically, if the variant type is INS,
    or fails the important regions,
    max_mb, min_bp, min_call_rate, or max_hets filters
    """
    svtype = variant.info.get("SVTYPE", "SV")

    # skips variants outside important regions if those are set
    if not var_in_important_regions(
        important_regions, variant.chrom, variant.start, variant.stop, svtype,
    ):
        return True

    # skips insertions
    if svtype in ("INS"):
        logger.debug(
            "Skipping {} at {}:{}-{}, INS type not supported".format(
                svtype, variant.chrom, variant.start, variant.stop
            )
        )
        return True

    # skips variants over max_mb length, if set
    if max_mb and (variant.stop - variant.start > max_mb * 1000000):
        logger.debug(
            "Skipping {} at {}:{}-{}, variant length greater than max_mb".format(
                svtype, variant.chrom, variant.start, variant.stop
            )
        )
        return True
    
    # skips variants under min_bp, if set
    if (variant.stop - variant.start < min_bp) and translocation_chrom is None:
        logger.debug(
            "Skipping {} at {}:{}-{}, variant length shorter than min_bp".format(
                svtype, variant.chrom, variant.start, variant.stop
            )
        )
        return True

    # skips variants if the call rate is below min_call_rate, if set
    if not above_call_rate(
        gts,
        len(vcf_samples),
        min_call_rate,
        svtype,
        variant.chrom,
        variant.start,
        variant.stop,
    ):
        return True

    # skips variants if there are more hets than max_hets, if set
    if below_max_hets(
        gts, max_hets, svtype, variant.chrom, variant.start, variant.stop
    ):
        return True

    # skips variants where no sample is non-ref, if plot_all is not set
    if not plot_all:
        if no_variant_found(
            gts, svtype, variant.chrom, variant.start, variant.stop
        ):
            return True

    return False


def generate_commands(
    vcf,
    plot_all,
    max_mb,
    min_bp,
    min_call_rate,
    max_hets,
    dn_only,
    ped,
    important_regions,
    format_field_ids,
    min_entries,
    max_entries,
    out_dir,
    output_type,
    downsample,
    filters,
    ped_samples,
    denovo_row,
    names_to_bams,
    annotations,
    pass_through_args,
):
    """
    for every variant in vcf, process and output plot
    command - if and only if it passes filters
    """
    commands = []
    table_data = []
    vcf_samples = vcf.header.samples
    vcf_samples_set = set(vcf_samples)
    vcf_samples_list = list(vcf_samples)
    vcf_stats = Counter()

    # Check if VCF samples match BAMs
    if vcf_samples_set != set(names_to_bams):
        missing_vcf_samples = vcf_samples_set - set(names_to_bams)
        missing_bam_samples = set(names_to_bams) - vcf_samples_set
        logger.warning(
            "VCF samples and BAMs do not match. "
            "This may be due to different sample names in the VCF and BAMs."
        )
        if missing_vcf_samples:
            logger.warning(
                "VCF samples missing from BAMs: {}".format(", ".join(missing_vcf_samples))
            )
        if missing_bam_samples:
            logger.warning(
                "BAMs missing from VCF samples: {}".format(", ".join(missing_bam_samples))
            )

    for var_count, variant in enumerate(vcf):
        translocation_chrom = None
        svtype = variant.info.get("SVTYPE", "SV")

        # get genotypes
        gts = [s.get("GT", (None, None)) for s in variant.samples.values()]

        # handle translocations
        if svtype in ["BND", "TRA"]:
            try:
                translocation_chrom = variant.info.get("CHR2")
            except (KeyError, ValueError) as e:
                logger.debug(e)
                logger.info(f"Translocation {svtype} on {variant.chrom}:{variant.start}"
                              "skipped due to missing CHR2 INFO field.")

        if is_simply_skippable(
            variant,
            vcf_samples,
            gts,
            important_regions,
            max_mb,
            min_bp,
            min_call_rate,
            max_hets,
            plot_all,
            translocation_chrom,
        ):
            vcf_stats["Skipped"] += 1
            continue

        # gets the list of samples to plot
        # skips ref samples if plot_all isn't set
        # and applies user-defined filters
        idxs, test_samples = get_plottable_samples(
            gts,
            variant,
            plot_all,
            filters,
            svtype,
            variant.chrom,
            variant.start,
            variant.stop,
        )
        if len(idxs) == 0:
            vcf_stats["No plottable samples"] += 1
            continue

        # matches alignment files to variant samples
        variant_samples = get_variant_samples(
            idxs,
            vcf_samples,
            names_to_bams,
            svtype,
            variant.chrom,
            variant.start,
            variant.stop,
        )
        if len(variant_samples) <= 0:
            vcf_stats["No plottable samples with matched BAM"] += 1
            continue

        bams = [names_to_bams[s] for s in variant_samples]

        # finds putative de novo variants
        denovo_svs = get_denovos(
            denovo_row,
            test_samples,
            variant_samples,
            ped_samples,
            svtype,
            variant.chrom,
            variant.start,
            variant.stop,
            dn_only,
        )
        if dn_only and (len(denovo_svs) <= 0):
            vcf_stats["Non de novo ('--dn_only' specified)"] += 1
            continue

        # save fields for the html.
        n_samples = len(variant_samples)
        # semi-colon delimited eases CSV export from HTML
        sample_str = ";".join(variant_samples)
        # dict holding sample to FORMAT title string
        plot_titles = dict()
        if format_field_ids:
            format_attrs = get_format_title(vcf_samples_list, format_field_ids, variant)
            plot_titles = make_plot_titles(variant_samples, format_attrs)

        # get control samples if possible
        # try to get family members if ped is set
        # and reference samples is ped is not set
        if ped is not None:
            variant_samples, bams = get_family_controls(
                ped,
                denovo_svs,
                variant_samples,
                ped_samples,
                max_hets,
                bams,
                names_to_bams,
                vcf_samples_set,
            )
        elif min_entries and len(bams) < min_entries:
            variant_samples, bams = get_nonfamily_controls(
                gts, vcf_samples, variant_samples, names_to_bams, min_entries, bams
            )

        data_dict = create_metadata(
            variant,
            translocation_chrom,
            svtype,
            sample_str,
            n_samples,
            annotations,
            denovo_row,
            denovo_svs,
        )
        table_data.append(data_dict)

        command = format_template(
            variant,
            data_dict,
            max_entries,
            bams,
            variant_samples,
            plot_titles,
            out_dir,
            output_type,
            svtype,
            downsample,
            pass_through_args,
        )
        commands.append(command)


    logger.debug("VCF entry count: {}".format(var_count + 1))
    if vcf_stats:
        logger.debug("VCF entrys filtered out: {}".format(sum(vcf_stats.values())))
        for reason, count in vcf_stats.items():
            logger.debug(" - {}: {}".format(reason, count))

    return commands, table_data


def run_plot_command(command_string: str):
    # Setup a parser for translating the command_string
    parent_parser = argparse.ArgumentParser()
    sub_parser = parent_parser.add_subparsers(title="[sub-commands]", dest="command")
    add_plot(sub_parser)

    # Convert command_string to list and remove leading 'samplot' argument
    # Taken from https://stackoverflow.com/a/524796.
    # NOTE: If python2 is dropped, `shlex.split` could be used for simpler syntax
    command = [p.strip("'") for p in re.split("( |\\\".*?\\\"|'.*?')", command_string.strip()) if p.strip()]
    command = command[1:]

    # Skipped parse_known_args here since extra_args are not used in `samplot plot`.
    # This means that any fauly extra arguments given to `samplot vcf` will raise
    # and error here
    args = parent_parser.parse_args(command)
    args.func(parent_parser, args)


def vcf(parser, args, pass_through_args):
    """
    Generate commands and html for plotting/reviewing variants from VCF
    """
    if args.debug:
        logger.setLevel(logging.DEBUG)

    if args.dn_only and not args.ped:
        logger.error("Missing --ped, required when using --dn_only")
        sys.exit(1)

    if cram_input(args.bams):
        if "-r" not in pass_through_args and "--reference" not in pass_through_args:
            logger.error(
                "ERROR: missing reference file required for CRAM. "
                + "Use -r option. (Run `samplot.py -h` for more help)"
            )
            sys.exit(1)

    vcf = pysam.VariantFile(args.vcf)
    vcf_samples = vcf.header.samples

    annotations = None
    if args.gff3:
        annotations = pysam.TabixFile(args.gff3)

    filters = [to_exprs(f) for f in args.filter]

    ped_samples = parse_ped(args.ped, vcf_samples)

    # this is empty unless we have a sample with both parents defined.
    denovo_row = get_dn_row(ped_samples)

    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)

    # connect the sample IDs to bam files
    names_to_bams = get_names_to_bams(args.bams, args.sample_ids)

    # check that at least one sample is can be plotted  
    if not any(vcf_sample in names_to_bams for vcf_sample in vcf_samples):
        other = "'--sample_ids'" if args.sample_ids else "BAM"
        logger.error("Samples in VCF do not match samples specified in {}".format(other))
        logger.error("VCF samples: {}".format(', '.join(vcf_samples)))
        logger.error("{} samples: {}".format(other, ', '.join(vcf_samples)))
        sys.exit(1)

    # if important regions are included, load those intervals
    # and only show SVs inside them
    important_regions = read_important_regions(args.important_regions)

    # user-requested FORMAT fields to add to plot title
    format_field_ids = None
    if args.format:
        format_field_ids = args.format.split(",")

    # for every variant in vcf, process and output plot
    # command - if and only if it passes filters
    commands, table_data = generate_commands(
        vcf,
        args.plot_all,
        args.max_mb,
        args.min_bp,
        args.min_call_rate,
        args.max_hets,
        args.dn_only,
        args.ped,
        important_regions,
        format_field_ids,
        args.min_entries,
        args.max_entries,
        args.out_dir,
        args.output_type,
        args.downsample,
        filters,
        ped_samples,
        denovo_row,
        names_to_bams,
        annotations,
        pass_through_args,
    )

    write_site(table_data, args.out_dir, args.output_type, annotations, denovo_row)

    if args.manual_run:
        with open(args.command_file, "w") as outfile:
            outfile.writelines(commands)
    else:
        if args.threads == 1:
            for command in commands:
                run_plot_command(command)
        else:
            from multiprocessing import Pool
            with Pool(processes=args.threads) as pool:
                pool.map(run_plot_command, commands)


def add_vcf(parent_parser):
    """Defines allowed arguments for samplot's vcf plotter
    """
    import doctest

    parser = parent_parser.add_parser(
        "vcf",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        help="Generates commands to plot images with `samplot plot`,"
        + " using VCF file to define regions",
    )

    if len(sys.argv) > 1 and sys.argv[1] == "test":
        r = doctest.testmod()
        print(r)
        sys.exit(r.failed)

    parser.add_argument("--vcf", "-v", help="VCF file containing structural variants")
    parser.add_argument(
        "-d", "--out-dir", help="path to write output images", default="samplot-out",
    )
    parser.add_argument(
        "--ped", help="path to ped (or .fam) file",
    )
    parser.add_argument(
        "--dn_only",
        help="plots only putative de novo variants (PED file required)",
        action="store_true",
    )
    parser.add_argument(
        "--min_call_rate",
        type=float,
        help="only plot variants with at least this call-rate",
        required=False,
    )
    parser.add_argument(
        "--filter",
        action="append",
        help="simple filter that samples"
        + " must meet. Join multiple filters with '&' "
        + "and specify --filter multiple times for 'or'"
        + " e.g. DHFFC < 0.7 & SVTYPE = 'DEL'",
        default=[],
    )
    parser.add_argument(
        "-O",
        "--output_type",
        choices=("png", "pdf", "eps", "jpg"),
        help="type of output figure",
        default="png",
    )
    parser.add_argument(
        "--max_hets",
        type=int,
        help="only plot variants with at most this many heterozygotes",
        required=False,
    )
    parser.add_argument(
        "--min_entries",
        type=int,
        help="try to include homref samples as controls to get this many samples in plot",
        default=6,
        required=False,
    )
    parser.add_argument(
        "--max_entries",
        type=int,
        help="only plot at most this many heterozygotes",
        default=10,
        required=False,
    )
    parser.add_argument(
        "--max_mb",
        type=int,
        help="skip variants longer than this many megabases",
        required=False,
    )
    parser.add_argument(
        "--min_bp",
        type=int,
        help="skip variants shorter than this many bases",
        default=20,
    )
    parser.add_argument(
        "--important_regions",
        help="only report variants that overlap regions in this bed file",
        required=False,
    )
    parser.add_argument(
        "-b",
        "--bams",
        type=str,
        nargs="+",
        help="Space-delimited list of BAM/CRAM file names",
        required=True,
    )
    parser.add_argument(
        "--sample_ids",
        type=str,
        nargs="+",
        help="Space-delimited list of sample IDs, "
        + "must have same order as BAM/CRAM file names. "
        + "BAM RG tag required if this is omitted.",
        required=False,
    )
    parser.add_argument(
        "--command_file",
        help="store commands in this file.",
        default="samplot_vcf_cmds.tmp",
        required=False,
    )
    parser.add_argument(
        "--format",
        default="AS,AP,DHFFC",
        help="comma separated list of FORMAT fields to include in sample plot title",
        required=False,
    )
    parser.add_argument(
        "--gff3",
        help="genomic regions (.gff with .tbi in same directory) "
        + "used when building HTML table and table filters",
        required=False,
    )
    parser.add_argument(
        "--downsample", help="Number of normal reads/pairs to plot", default=1, type=int
    )
    parser.add_argument(
        "--manual_run",
        help="disables auto-run for the plotting commands",
        default=False,
        action="store_true",
    )
    parser.add_argument(
        "--plot_all",
        help="plots all samples and all variants - "
        + "limited by any filtering arguments set",
        default=False,
        action="store_true",
    )
    parser.add_argument(
        "-t", "--threads",
        type=int,
        default=1,
        help="Number of threads to use to generate plots. Default: %(default)s",
    )
    parser.add_argument(
        "--debug",
        help="prints out the reason for skipping any skipped variant entry",
        default=False,
        action="store_true",
    )

    parser.set_defaults(func=vcf)


if __name__ == "__main__":
    print("Run as samplot module with `samplot vcf`")


================================================
FILE: samplot/templates/samplot_vcf.html
================================================
<!DOCTYPE html>
<html lang='en'>

<head>
    <meta charset='utf-8'>
    <title>samplot</title>

    <script src="https://cdnjs.cloudflare.com/ajax/libs/d3/5.9.2/d3.min.js"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/crossfilter2/1.4.7/crossfilter.min.js"
        type="text/javascript"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/dc/3.0.12/dc.min.js" type="text/javascript"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" type="text/javascript"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.bundle.min.js"
        type="text/javascript"></script>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/viewerjs/1.5.0/viewer.min.js" type="text/javascript"></script>
    <script src="https://cdn.datatables.net/v/bs4/dt-1.10.20/b-1.6.1/b-html5-1.6.1/sc-2.0.1/sl-1.3.1/datatables.min.js"
        type="text/javascript"></script>

    <link href="https://cdnjs.cloudflare.com/ajax/libs/viewerjs/1.5.0/viewer.min.css" rel="stylesheet"
        type="text/css" />
    <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.8.2/css/all.min.css" rel="stylesheet"
        type="text/css" />
    <link href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css" rel="stylesheet"
        type="text/css" />
    <link href="https://cdnjs.cloudflare.com/ajax/libs/dc/3.0.12/dc.min.css" rel="stylesheet" type="text/css" />
    <link href="https://cdn.datatables.net/v/bs4/dt-1.10.20/b-1.6.1/b-html5-1.6.1/sc-2.0.1/sl-1.3.1/datatables.min.css"
        rel="stylesheet" type="text/css" />

    <style type="text/css">
        #filter-menu .dropdown-menu {
            min-height: 100px;
            max-height: 100vh;
            overflow-y: auto;
            overflow-x: hidden;
            background-color: #edf0f2;
        }

        span.no-show {
            display: none;
        }

        span.show-ellipsis:after {
            content: "...";
        }

        .datatable-info {
            font-size: .9em;
        }

        #variant-table_info {
            padding-top: 8px;
        }

        table.dataTable thead th.sorting:after,
        table.dataTable thead th.sorting_asc:after,
        table.dataTable thead th.sorting_desc:after,
        table.dataTable thead th.sorting:before,
        table.dataTable thead th.sorting_asc:before,
        table.dataTable thead th.sorting_desc:before {
            font-family: FontAwesome !important;
        }

        .modal-content {
            width: 610px;
        }

        h7 {
            font-size: .95rem;
        }

        body {
            height: 100vh
        }

        div.dts div.dataTables_scrollBody {
            background: white;
        }
    </style>
</head>

<body>
    <nav class="navbar navbar-dark bg-dark p-0 pl-2">
        <a class="navbar-brand text-light p-0" href="https://github.com/ryanlayer/samplot">samplot</a>
    </nav>

    <div class="modal fade" id="filter-modal" tabindex="-1" role="dialog" aria-labelledby="filter-modal"
        aria-hidden="true">
        <div class="modal-dialog" role="document">
            <div class="modal-content">
                <div class="modal-header">
                    <div class="flex-column">
                        <h5 class="modal-title" id="filter-modal">Filters</h5>
                        <h7 class="pl-2 text-secondary" id="variant-count">
                            <a href="javascript:dc.filterAll(); dc.renderAll();">Reset All</a>
                        </h7>
                    </div>
                </div>
                <div class="modal-body">
                    <div class="container">
                        <div class="row pt-2">
                            <div class="col">
                                <h5>Sample</h5>
                            </div>
                        </div>
                        <div class="row pb-3">
                            <div class="col-12">
                                <div id="sample-search"></div>
                            </div>
                        </div>
                        <div class="row" id="nsamples-chart">
                            <div class="col-4">
                                <h5># of Samples</h5>
                            </div>
                            <div class="col-8 text-right">
                                <span class="reset text-muted" style="display: none;">[<span
                                        class="filter"></span>]</span>
                                <a class="reset" href="javascript:nsamplesChart.filterAll();dc.redrawAll();"
                                    style="display: none;">Reset</a>
                            </div>
                        </div>
                        <div class="row" id="size-chart">
                            <div class="col-4">
                                <h5>Size</h5>
                            </div>
                            <div class="col-8 text-right">
                                <span class="reset text-muted" style="display: none;">[<span
                                        class="filter"></span>]</span>
                                <a class="reset" href="javascript:sizeChart.filterAll();dc.redrawAll();"
                                    style="display: none;">Reset</a>
                            </div>
                        </div>
                        <div class="row" id="type-chart">
                            <div class="col-4">
                                <h5>SV Type</h5>
                            </div>
                            <div class="col-8 text-right">
                                <span class="reset text-muted" style="display: none;">[<span
                                        class="filter"></span>]</span>
                                <a class="reset" href="javascript:typeChart.filterAll();dc.redrawAll();"
                                    style="display: none;">Reset</a>
                            </div>
                        </div>
                        <div class="row" id="chrom-chart">
                            <div class="col-4">
                                <h5>Chromosome</h5>
                            </div>
                            <div class="col-8 text-right">
                                <span class="reset text-muted" style="display: none;">[<span
                                        class="filter"></span>]</span>
                                <a class="reset" href="javascript:chromChart.filterAll();dc.redrawAll();"
                                    style="display: none;">Reset</a>
                            </div>
                        </div>
                        <div class="row" id="overlaps-chart" hidden>
                            <div class="col-4">
                                <h5>SV Overlaps</h5>
                            </div>
                            <div class="col-8 text-right">
                                <span class="reset text-muted" style="display: none;">[<span
                                        class="filter"></span>]</span>
                                <a class="reset" href="javascript:overlapsChart.filterAll();dc.redrawAll();"
                                    style="display: none;">Reset</a>
                            </div>
                        </div>
                    </div>
                </div>
                <div class="modal-footer">
                    <button type="button" class="btn btn-outline-secondary" data-dismiss="modal"
                        onclick="javascript:dc.filterAll(); dc.renderAll();"
                        title="Clear selection and close">Cancel</button>
                    <button type="button" class="btn btn-primary" data-dismiss="modal"
                        title="Apply filters">Apply</button>
                </div>
            </div>
        </div>
    </div>

    <div class="container-fluid h-90">
        <div class="row" id="variant-table-placeholder">
            <div class="col-12">
                <div style="height:415px">
                    <div class="d-flex justify-content-center align-items-center text-muted h-100">
                        <div class="d-flex flex-column">
                            <i class="fas fa-10x fa-table"></i>
                        </div>
                    </div>
                </div>
            </div>
        </div>

        <div class="row pb-1" id="variant-table-div" hidden>
            <div class="col-12">
                <div class="table-responsive">
                    <table id="variant-table" class="table table-hover display nowrap" width="100%"></table>
                </div>
            </div>
        </div>
    </div>
</body>

<script>
    const data = {{ data|tojson }}
    const plot_type = "{{plot_type}}"
    const annotation = {{ gff3 }}
    const denovo = {{ denovo }}

    dc.config.defaultColors(d3.schemeSet1)

    // plot constraints
    const plotw = 585
    const ploth = 150

    // table filters
    var searchInput = dc.textFilterWidget("#sample-search")
    var nsamplesChart = dc.barChart("#nsamples-chart")
    var sizeChart = dc.barChart("#size-chart")
    var typeChart = dc.barChart("#type-chart")
    var chromChart = dc.barChart("#chrom-chart")
    var overlapsChart
    // shows filter impact in modal header
    var variantCount = dc.dataCount("#variant-count")

    // used to access filtered table data
    var chromDimension
    // datatables obj
    var variant_table
    // crossfilter obj
    var ndx

    $('#filter-modal').on('hidden.bs.modal', function () {
        update_table()
    })

    const table_click = (selection, table) => {
        table.$('tr.selected').removeClass('selected')
        $(selection).addClass('selected')
        let current = $('tr.selected')
        let next = current.next()
        let prev = current.prev()

        let row = table.rows('.selected').data()[0]
        let img = new Image()
        img.src = `${row.svtype}_${row.chrom}_${row.start}_${row.end}.${plot_type}`
        if ( row.chrom2) {
            img.src = `${row.svtype}_${row.chrom}_${row.start}_${row.chrom2}_${row.end}.${plot_type}`
        }
        img.onerror = function(){
           alert(`${img.src} not found`);
        }
        let viewer = new Viewer(img, {
            hidden: function () {
                viewer.destroy()
            },
            title: function () {
                return `${row.svtype} on chromosome ${row.chrom} at ${row.start}-${row.end}`
            },
            toolbar: {
                zoomIn: 4,
                zoomOut: 4,
                oneToOne: 4,
                reset: 4,
                prev: {
                    show: prev.length > 0 ? true : false,
                    size: "large",
                    click: function () {
                        viewer.destroy()
                        table_click(prev, table)
                    }
                },
                play: { show: false },
                next: {
                    show: next.length > 0 ? true : false,
                    size: "large",
                    click: function () {
                        viewer.destroy()
                        table_click(next, table)
                    }
                },
                rotateLeft: { show: false },
                rotateRight: { show: false },
                flipHorizontal: { show: false },
                flipVertical: { show: false },
            },
            transition: false,
            navbar: false,
        })
        viewer.show()
    }

    function build_table(data) {
        // hide the placeholder and show the datatable
        d3.select('#variant-table-placeholder').property("hidden", true)
        d3.select('#variant-table-div').property("hidden", false)

        let cols = [
            { data: 'chrom', title: 'Chrom' },
            { data: 'start', title: 'Start' },
            { data: 'end', title: 'End' },
            { data: 'svlength', title: 'Size' },
            { data: 'svtype', title: 'SV Type' },
            { data: 'nsamples', title: '# of Samples' },
            { data: 'samples', title: 'Samples' },
        ]
        if (annotation) {
            d3.select('#overlaps-chart').property("hidden", false)
            cols.push({ data: 'overlaps', title: 'Overlaps' })
        }
        if (denovo) {
            cols.push({ data: 'dn', title: 'De novo' })
        }

        variant_table = $("#variant-table").DataTable({
            data: data,
            columns: cols,
            deferRender: true,
            scrollY: '80vh',
            scrollCollapse: true,
            scroller: true,
            info: true,
            buttons: [
                'copyHtml5', 'csvHtml5'
            ],
            infoCallback: (oSettings, iStart, iEnd, iMax, iTotal, sPre) => {
                return `
            <span class="datatable-info">
                <span class="pr-2">Showing <b>${iStart}</b> - <b>${iEnd}</b> of <b>${iTotal}</b> records</span>
                <button type="button" class="btn btn-primary btn-sm" data-toggle="modal" data-target="#filter-modal" title="Show filters">
                    <span class="fas fa-filter"></span>
                </button>
                <span class="dropup">
                    <button type="button" class="btn btn-sm btn-primary dropdown-toggle" id="download-menu" title="Save table" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
                        <span class="fas fa-save"></span>
                    </button>
                    <span class="dropdown-menu" aria-labelledby="download-menu">
                        <h6 class="dropdown-header">Save ${iTotal} rows as:</h6>
                        <button class="dropdown-item" type="button" id="csv-button-download" onclick="csv_button_click()">
                            CSV
                        </button>
                        <button class="dropdown-item" type="button" id="copy-button-download" onclick="copy_button_click()">
                            Copy
                        </button>
                    </span>
                </span>
            </span>
            `
            },
            columnDefs: [
                {
                    targets: (annotation ? [0, 1, 2, 3, 4, 5, 7] : [0, 1, 2, 3, 4, 5]),
                    width: '15%'
                },
                // https://datatables.net/blog/2016-02-26
                {
                    targets: 6,
                    render: function (data, type, row) {
                        if (type === 'display' && data != null) {
                            data = data.replace(/<(?:.|\n)*?>/gm, '');
                            if (data.length > 40) {
                                return '<span class="show-ellipsis">' + data.substr(0, 40) + '</span><span class="no-show">' + data.substr(40) + '</span>';
                            } else {
                                return data;
                            }
                        } else {
                            return data;
                        }
                    }
                }
            ],
            // search is applied using crossfilter
            searching: false,
            lengthChange: false,
            order: [[0, 'asc'], [1, 'asc']],
        })

        // register table clicks on sample_column
        variant_table.on('click', 'tr', function () {
            table_click(this, variant_table)
        })
    }

    function csv_button_click() {
        variant_table.button('.buttons-csv').trigger()
    }

    function copy_button_click() {
        variant_table.button('.buttons-copy').trigger()
    }

    function update_table() {
        variant_table.clear()
        variant_table.rows.add(chromDimension.top(Infinity))
        variant_table.draw()
    }

    function remove_empty_bins(source_group) {
        return {
            all: function () {
                return source_group.all().filter(function (d) {
                    return d.value != 0
                })
            }
        }
    }

    // https://jsfiddle.net/gordonwoodhull/g34Ldwaz/8/
    // https://github.com/dc-js/dc.js/issues/348
    function index_group(group) {
        return {
            all: function () {
                return group.all().map(function (kv, i) {
                    return { key: i, value: kv.value }
                })
            }
        }
    }

    $(document).ready(function () {

        ndx = crossfilter(data)
        var all = ndx.groupAll()

        chromDimension = ndx.dimension((d) => { return d.chrom })
        build_table(chromDimension.top(Infinity))
        var chromGroup = chromDimension.group().reduceCount()
        var nonEmptyChromGroup = remove_empty_bins(chromGroup)

        var searchDimension = ndx.dimension(function (d) {
            return d.samples
        })
        searchInput
            .dimension(searchDimension)
            .on('renderlet', function () {
                d3.selectAll(".dc-text-filter-input")
                    .classed("form-control", true)
                d3.selectAll("#sample-search.dc-chart")
                    .classed("col-12", true)
            })

        var sizeDimension = ndx.dimension(function (d) {
            var round
            if (d.svlength < 100) {
                round = 100
            } else if (d.svlength < 1000) {
                round = 100
            } else if (d.svlength < 10000) {
                round = 1000
            } else if (d.svlength < 100000) {
                round = 10000
            } else if (d.svlength < 1000000) {
                round = 100000
            } else if (d.svlength < 10000000) {
                round = 1000000
            } else {
                round = 10000000
            }
            return Math.round(d.svlength / round) * round
        })
        var sizeGroup = sizeDimension.group().reduceCount()
        var nonEmptySizeGroup = remove_empty_bins(sizeGroup)
        // for brushing, need to track keys at numeric indexes
        var sizeKeys = nonEmptySizeGroup.all().map(dc.pluck('key')).slice()

        var typeDimension = ndx.dimension((d) => { return d.svtype })
        var typeGroup = typeDimension.group().reduceCount()
        var nonEmptyTypeGroup = remove_empty_bins(typeGroup)

        var nsamplesDimension = ndx.dimension((d) => { return d.nsamples })
        var nsamplesDimension = ndx.dimension(function (d) {
            var round
            if (d.nsamples < 10) {
                round = 1
            } else if (d.nsamples < 100) {
                round = 10
            } else if (d.nsamples < 1000) {
                round = 100
            } else if (d.nsamples < 10000) {
                round = 1000
            } else {
                round = 10000
            }
            return Math.round(d.nsamples / round) * round
        })
        var nsamplesGroup = nsamplesDimension.group().reduceCount()
        var nonEmptyNsamplesGroup = remove_empty_bins(nsamplesGroup)
        var nsamplesKeys = nonEmptyNsamplesGroup.all().map(dc.pluck('key')).slice()

        // number of samples
        nsamplesChart
            .width(plotw).height(ploth).gap(1)
            .margins({ top: 10, right: 50, bottom: 30, left: 40 })
            .x(d3.scaleLinear().domain([0, nsamplesKeys.length]))
            .round(Math.floor)
            .brushOn(true)
            .elasticX(true)
            .dimension(nsamplesDimension)
            .group(index_group(nonEmptyNsamplesGroup))
            .elasticY(true)
            .yAxisLabel('Count')
            .filterPrinter(function (filters) {
                var filter = filters[0]
                return nsamplesKeys[filter[0]] + ' - ' + nsamplesKeys[filter[1]]
            })
        // limit the number of labels along x-axis
        nsamplesChart.xAxis().ticks(20)
        nsamplesChart.yAxis().ticks(5)
        // update labels from keys
        nsamplesChart.xAxis().tickFormat(function (v) {
            return nsamplesKeys[v]
        })
        nsamplesChart.filterHandler(function (dimension, filters) {
            if (filters.length === 0) {
                // the empty case (no filtering)
                dimension.filter(null)
            } else {
                dimension.filterRange([nsamplesKeys[filters[0][0]], nsamplesKeys[filters[0][1]]])
            }
            return filters
        })

        // SV length
        sizeChart
            .width(plotw).height(ploth).gap(1)
            .margins({ top: 10, right: 50, bottom: 30, left: 40 })
            .x(d3.scaleLinear().domain([0, sizeKeys.length]))
            .round(Math.floor)
            .brushOn(true)
            .elasticX(true)
            .dimension(sizeDimension)
            .group(index_group(nonEmptySizeGroup))
            .elasticY(true)
            .yAxisLabel('Count')
            .filterPrinter(function

Download .txt

gitextract_qd67atv0/

├── .circleci/
│   ├── config.yml
│   └── setup.sh
├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
├── runtests.sh
├── samplot/
│   ├── __init__.py
│   ├── __main__.py
│   ├── samplot.py
│   ├── samplot_vcf.py
│   └── templates/
│       └── samplot_vcf.html
├── setup.py
├── ssshtest
└── test/
    ├── README.md
    ├── data/
    │   ├── 2_59305747-59505747_X_151018513-151218513.BND.bam
    │   ├── 2_59305747-59505747_X_151018513-151218513.BND.bam.bai
    │   ├── Alu.2_X.bed.gz.tbi
    │   ├── Alu.2_X.csionly.bed.gz.csi
    │   ├── HG002_10X.bam
    │   ├── HG002_10X.bam.bai
    │   ├── HG002_1_89475845-89478561_DEL.tenx.bam
    │   ├── HG002_1_89475845-89478561_DEL.tenx.bam.bai
    │   ├── HG002_Illumina.bam
    │   ├── HG002_Illumina.bam.bai
    │   ├── HG002_ONT.cram
    │   ├── HG002_ONT.cram.crai
    │   ├── HG002_PacBio.bam
    │   ├── HG002_PacBio.bam.bai
    │   ├── HG003_Illumina.bam
    │   ├── HG003_Illumina.bam.bai
    │   ├── HG004_Illumina.bam
    │   ├── HG004_Illumina.bam.bai
    │   ├── Homo_sapiens.GRCh37.82.sort.2_X.gff3.gz.tbi
    │   ├── Homo_sapiens.GRCh37.csionly.2_X.gff3.gz.csi
    │   ├── NA12878_restricted.bam
    │   ├── NA12878_restricted.bam.bai
    │   ├── NA12889_restricted.bam
    │   ├── NA12889_restricted.bam.bai
    │   ├── NA12890_restricted.bam
    │   ├── NA12890_restricted.bam.bai
    │   ├── README.md
    │   ├── commands.sh
    │   ├── examples.bed
    │   ├── examples_padded.bed
    │   ├── hg19_chr1_58343117_58343622_deletion.bam
    │   ├── hg19_chr1_58343117_58343622_deletion.bam.bai
    │   ├── hg19_chr21_27373431_27375410_inversion.bam
    │   ├── hg19_chr21_27373431_27375410_inversion.bam.bai
    │   ├── nanopore-NA12878.bam
    │   ├── nanopore-NA12878.bam.bai
    │   ├── subset_alignments.sh
    │   ├── test.ped
    │   ├── test.vcf
    │   ├── test_site/
    │   │   ├── README.md
    │   │   └── index.html
    │   └── test_site_cmds.sh
    ├── func/
    │   ├── samplot_test.sh
    │   └── samplot_vcf_test.sh
    └── unit/
        └── samplot_test.py

Download .txt

SYMBOL INDEX (133 symbols across 4 files)

FILE: samplot/__main__.py
  function main (line 11) | def main(args=None):

FILE: samplot/samplot.py
  function strip_chr (line 62) | def strip_chr(chrom):
  class plan_step (line 71) | class plan_step:
    method __init__ (line 74) | def __init__(self, start_pos, end_pos, event, info=None):
    method __str__ (line 80) | def __str__(self):
    method __repr__ (line 104) | def __repr__(self):
  class genome_interval (line 111) | class genome_interval:
    method __init__ (line 112) | def __init__(self, chrm, start, end):
    method __str__ (line 117) | def __str__(self):
    method __repr__ (line 120) | def __repr__(self):
    method __eq__ (line 123) | def __eq__(self, gi2):
    method intersect (line 128) | def intersect(self, gi):
  function get_range_hit (line 140) | def get_range_hit(ranges, chrm, point):
  function map_genome_point_to_range_points (line 155) | def map_genome_point_to_range_points(ranges, chrm, point):
  function points_in_window (line 171) | def points_in_window(points):
  function get_tabix_iter (line 190) | def get_tabix_iter(chrm, start, end, datafile):
  function add_coverage (line 231) | def add_coverage(read, coverage_matrix, offset, column):
  function plot_coverage (line 264) | def plot_coverage(
  class PairedEnd (line 383) | class PairedEnd:
    method __init__ (line 390) | def __init__(self, chrm, start, end, is_reverse, MI_tag, HP_tag):
    method __repr__ (line 410) | def __repr__(self):
  function add_pair_end (line 424) | def add_pair_end(bam_file, read, pairs, linked_reads, ignore_hp):
  function sample_normal (line 479) | def sample_normal(max_depth, pairs, z):
  function get_pairs_insert_sizes (line 538) | def get_pairs_insert_sizes(ranges, pairs):
  function get_pair_insert_size (line 559) | def get_pair_insert_size(ranges, pair):
  function get_pairs_plan (line 585) | def get_pairs_plan(ranges, pairs, linked_plan=False):
  function get_pair_plan (line 612) | def get_pair_plan(ranges, pair, linked_plan=False):
  function get_pair_event_type (line 662) | def get_pair_event_type(pe_read):
  function jitter (line 677) | def jitter(value, bounds: float = 0.1) -> float:
  function plot_pair_plan (line 686) | def plot_pair_plan(ranges, step, ax, marker_size, jitter_bounds):
  function plot_pairs (line 731) | def plot_pairs(
  class SplitRead (line 759) | class SplitRead:
    method __init__ (line 766) | def __init__(self, chrm, start, end, strand, query_pos, MI_tag=None, H...
    method __repr__ (line 787) | def __repr__(self):
  function calc_query_pos_from_cigar (line 802) | def calc_query_pos_from_cigar(cigar, strand):
  function add_split (line 842) | def add_split(read, splits, bam_file, linked_reads, ignore_hp):
  function get_split_plan (line 912) | def get_split_plan(ranges, split, linked_plan=False):
  function get_splits_plan (line 977) | def get_splits_plan(ranges, splits, linked_plan=False):
  function plot_split_plan (line 1005) | def plot_split_plan(ranges, step, ax, marker_size, jitter_bounds):
  function plot_splits (line 1046) | def plot_splits(
  class Alignment (line 1073) | class Alignment:
    method __init__ (line 1080) | def __init__(self, chrm, start, end, strand, query_position):
    method __str__ (line 1090) | def __str__(self):
    method __repr__ (line 1104) | def __repr__(self):
  class LongRead (line 1117) | class LongRead:
    method __init__ (line 1123) | def __init__(self, alignments):
    method __str__ (line 1131) | def __str__(self):
    method __repr__ (line 1134) | def __repr__(self):
  function get_alignments_from_cigar (line 1141) | def get_alignments_from_cigar(chrm, curr_pos, strand, cigartuples, rever...
  function get_cigartuples_from_string (line 1176) | def get_cigartuples_from_string(cigarstring):
  function merge_alignments (line 1193) | def merge_alignments(min_gap, alignments):
  function add_long_reads (line 1218) | def add_long_reads(bam_file, read, long_reads, min_event_size, ignore_hp):
  function add_align_step (line 1277) | def add_align_step(alignment, steps, ranges):
  function get_long_read_plan (line 1355) | def get_long_read_plan(read_name, long_reads, ranges):
  function plot_variant (line 1529) | def plot_variant(sv, sv_type, ax, ranges):
  function plot_confidence_interval (line 1576) | def plot_confidence_interval(chrm, breakpoint, ci, ax, ranges):
  function create_variant_plot (line 1606) | def create_variant_plot(grid, ax_i, sv, sv_type, ranges, start_ci, end_ci):
  function get_linked_plan (line 1638) | def get_linked_plan(ranges, pairs, splits, linked_reads, gem_name):
  function plot_linked_reads (line 1714) | def plot_linked_reads(
  function plot_long_reads (line 1775) | def plot_long_reads(long_reads, ax, ranges, curr_min_insert_size, curr_m...
  function pair (line 1862) | def pair(arg):
  function print_arguments (line 1883) | def print_arguments(options):
  function add_plot (line 1915) | def add_plot(parent_parser):
  function estimate_fragment_len (line 2350) | def estimate_fragment_len(bam, reference):
  function set_plot_dimensions (line 2377) | def set_plot_dimensions(
  function get_read_data (line 2496) | def get_read_data(
  function downsample_pairs (line 2671) | def downsample_pairs(max_depth, z_score, all_pairs):
  function set_haplotypes (line 2686) | def set_haplotypes(curr_coverage):
  function plot_samples (line 2707) | def plot_samples(
  function plot_legend (line 2928) | def plot_legend(fig, legend_fontsize, marker_size):
  function create_gridspec (line 2994) | def create_gridspec(bams, transcript_file, annotation_files, sv_type, re...
  function get_plot_annotation_plan (line 3033) | def get_plot_annotation_plan(ranges, annotation_file):
  function plot_annotations (line 3076) | def plot_annotations(
  function get_interval_range_plan_start_end (line 3144) | def get_interval_range_plan_start_end(ranges, interval):
  function get_transcript_plan (line 3203) | def get_transcript_plan(ranges, transcript_file):
  function plot_transcript (line 3302) | def plot_transcript(
  function plot (line 3424) | def plot(parser, options, extra_args=None):

FILE: samplot/samplot_vcf.py
  class Sample (line 44) | class Sample(object):
    method __init__ (line 56) | def __init__(self, line):
    method __repr__ (line 65) | def __repr__(self):
  function flatten (line 71) | def flatten(value, sep=","):
  function get_format_fields (line 97) | def get_format_fields(ids, variant):
  function get_format_title (line 119) | def get_format_title(samples, ids, variant):
  function make_plot_titles (line 133) | def make_plot_titles(samples, attr_values):
  function get_overlap (line 165) | def get_overlap(
  function parse_ped (line 239) | def parse_ped(path, vcf_samples=None):
  function get_names_to_bams (line 268) | def get_names_to_bams(bams, name_list=None):
  function tryfloat (line 297) | def tryfloat(v):
  function to_exprs (line 304) | def to_exprs(astr):
  function check_expr (line 332) | def check_expr(vdict, expr):
  function make_single (line 369) | def make_single(vdict):
  function get_dn_row (line 381) | def get_dn_row(ped_samples):
  function read_important_regions (line 388) | def read_important_regions(bedfilename):
  function var_in_important_regions (line 403) | def var_in_important_regions(important_regions, chrom, start, end, svtype):
  function cram_input (line 426) | def cram_input(bams):
  function above_call_rate (line 433) | def above_call_rate(gts, sample_count, min_call_rate, svtype, chrom, sta...
  function below_max_hets (line 452) | def below_max_hets(gts, max_hets, svtype, chrom, start, end):
  function no_variant_found (line 472) | def no_variant_found(gts, svtype, chrom, start, end):
  function get_plottable_samples (line 486) | def get_plottable_samples(
  function get_variant_samples (line 531) | def get_variant_samples(
  function get_denovos (line 551) | def get_denovos(
  function get_family_controls (line 589) | def get_family_controls(
  function get_nonfamily_controls (line 637) | def get_nonfamily_controls(
  function create_metadata (line 659) | def create_metadata(
  function format_template (line 692) | def format_template(
  function write_site (line 810) | def write_site(table_data, out_dir, output_type, annotations, denovo_row):
  function is_simply_skippable (line 830) | def is_simply_skippable(
  function generate_commands (line 911) | def generate_commands(
  function run_plot_command (line 1108) | def run_plot_command(command_string: str):
  function vcf (line 1127) | def vcf(parser, args, pass_through_args):
  function add_vcf (line 1224) | def add_vcf(parent_parser):

FILE: test/unit/samplot_test.py
  class Test_set_plot_dimensions (line 19) | class Test_set_plot_dimensions(unittest.TestCase):
    method test_set_plot_dimensions (line 21) | def test_set_plot_dimensions(self):
    method test_get_read_data (line 144) | def test_get_read_data(self):
  class Test_genome_interval (line 211) | class Test_genome_interval(unittest.TestCase):
    method test_init (line 213) | def test_init(self):
    method test_intersect (line 221) | def test_intersect(self):
    method test_get_range_hit (line 260) | def test_get_range_hit(self):
    method test_map_genome_point_to_range_points (line 278) | def test_map_genome_point_to_range_points(self):
  class Test_long_read_plan (line 419) | class Test_long_read_plan(unittest.TestCase):
    method test_add_align_step (line 421) | def test_add_align_step(self):
    method test_get_alignments_from_cigar (line 523) | def test_get_alignments_from_cigar(self):
    method test_get_long_read_plan (line 565) | def test_get_long_read_plan(self):
  class Test_annotation_plan (line 707) | class Test_annotation_plan(unittest.TestCase):
    method test_get_alignments_from_cigar (line 709) | def test_get_alignments_from_cigar(self):
  class Test_splits (line 784) | class Test_splits(unittest.TestCase):
    method test_get_split_plan (line 786) | def test_get_split_plan(self):
    method test_get_splits_plan (line 953) | def test_get_splits_plan(self):
  class Test_pairs (line 1002) | class Test_pairs(unittest.TestCase):
    method test_get_pair_insert_size (line 1004) | def test_get_pair_insert_size(self):
    method test_get_pair_plan (line 1093) | def test_get_pair_plan(self):
  class Test_linked (line 1122) | class Test_linked(unittest.TestCase):
    method test_get_linked_plan (line 1124) | def test_get_linked_plan(self):

Download .json

Condensed preview — 60 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (342K chars).

[
  {
    "path": ".circleci/config.yml",
    "chars": 1648,
    "preview": "\nversion: 2\n\nvariables:\n  setup_p3: &setup_p3\n    run:\n      shell: /bin/bash\n      name: Setup Samplot python3 dependen"
  },
  {
    "path": ".circleci/setup.sh",
    "chars": 1366,
    "preview": "#!/bin/bash\n\nset -exo pipefail\n\nWORKSPACE=$(pwd)\n\n# Set path\necho \"export PATH=$WORKSPACE/anaconda/bin:$PATH\" >> $BASH_E"
  },
  {
    "path": ".gitignore",
    "chars": 1222,
    "preview": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packagi"
  },
  {
    "path": "LICENSE",
    "chars": 1067,
    "preview": "MIT License\n\nCopyright (c) 2019 Ryan Layer\n\nPermission is hereby granted, free of charge, to any person obtaining a copy"
  },
  {
    "path": "README.md",
    "chars": 21066,
    "preview": "[![CircleCI](https://circleci.com/gh/ryanlayer/samplot/tree/master.svg?style=svg)](https://circleci.com/gh/ryanlayer/sam"
  },
  {
    "path": "requirements.txt",
    "chars": 45,
    "preview": "matplotlib<3.7\nnumpy\npysam>=0.15\nwget\nJinja2\n"
  },
  {
    "path": "runtests.sh",
    "chars": 349,
    "preview": "echo \"running unit tests:\"\npython test/unit/samplot_test.py\necho \"finished unit tests\"\necho \"running functional tests fo"
  },
  {
    "path": "samplot/__init__.py",
    "chars": 44,
    "preview": "#!/usr/bin/env python\n__version__ = \"1.3.1\"\n"
  },
  {
    "path": "samplot/__main__.py",
    "chars": 954,
    "preview": "#!/usr/bin/env python\nimport argparse\nimport logging\nimport sys\n\nfrom .__init__ import __version__\nfrom .samplot import "
  },
  {
    "path": "samplot/samplot.py",
    "chars": 109289,
    "preview": "#!/usr/bin/env python\nfrom __future__ import print_function\n\nimport logging\nimport os\nimport random\nimport re\nimport sys"
  },
  {
    "path": "samplot/samplot_vcf.py",
    "chars": 40430,
    "preview": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n\"\"\"\nCreate samplot vcf commands to execute and generate\ncompanion HTML ima"
  },
  {
    "path": "samplot/templates/samplot_vcf.html",
    "chars": 24630,
    "preview": "<!DOCTYPE html>\n<html lang='en'>\n\n<head>\n    <meta charset='utf-8'>\n    <title>samplot</title>\n\n    <script src=\"https:/"
  },
  {
    "path": "setup.py",
    "chars": 1252,
    "preview": "import re\n\nfrom setuptools import find_packages, setup\n\n\nwith open(\"README.md\", \"r\") as fh:\n    long_description = fh.re"
  },
  {
    "path": "ssshtest",
    "chars": 11743,
    "preview": "#!/bin/bash\n\n############################################################\n#  Program: ssshtest\n#  Authors : Ryan M Layer"
  },
  {
    "path": "test/README.md",
    "chars": 446,
    "preview": "These BAM files contain reads that align to the small set of SV-containing regions below.\n\nRegions included in BAM files"
  },
  {
    "path": "test/data/README.md",
    "chars": 423,
    "preview": "This directory contains data and scripts for the download of that data. Alignments are from Genome in a Bottle public re"
  },
  {
    "path": "test/data/commands.sh",
    "chars": 2418,
    "preview": "set -e\n\n#download hg19 reference for cram\nFILE=\"hg19.fa.gz\"\nif [ ! -f $FILE ]; then\n    wget http://hgdownload.cse.ucsc."
  },
  {
    "path": "test/data/examples.bed",
    "chars": 170,
    "preview": "1\t24804398\t24807302\tDEL\tHET\n1\t43059290\t43059950\tNA\tHOM\n4\t99813787\t99817098\tDUP\tHOM\n11\t67974432\t67975639\tDUP\tHET\n12\t12544"
  },
  {
    "path": "test/data/examples_padded.bed",
    "chars": 170,
    "preview": "1\t24802398\t24809302\tDEL\tHET\n1\t43057290\t43061950\tNA\tHOM\n4\t99811787\t99819098\tDUP\tHOM\n11\t67972432\t67977639\tDUP\tHET\n12\t12542"
  },
  {
    "path": "test/data/subset_alignments.sh",
    "chars": 1792,
    "preview": "#download example regions from GIAB 300X Illumina Ashkenazi Trio\nsamtools view -h -b -L examples_padded.bed ftp://ftp-tr"
  },
  {
    "path": "test/data/test.ped",
    "chars": 86,
    "preview": "0001    HG004   0   0   0\n0001    HG003   0   0   1\n0001    HG002   HG003   HG004   0\n"
  },
  {
    "path": "test/data/test.vcf",
    "chars": 8139,
    "preview": "##fileformat=VCFv4.1\n##fileDate=20170929\n##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase"
  },
  {
    "path": "test/data/test_site/README.md",
    "chars": 204,
    "preview": "Site generated using:\n\n```\nsamplot vcf --vcf test/data/test.vcf -b test/data/HG002_Illumina.bam test/data/HG003_Illumina"
  },
  {
    "path": "test/data/test_site/index.html",
    "chars": 25027,
    "preview": "<!DOCTYPE html>\n<html lang='en'>\n\n<head>\n    <meta charset='utf-8'>\n    <title>samplot</title>\n\n    <script src=\"https:/"
  },
  {
    "path": "test/data/test_site_cmds.sh",
    "chars": 1121,
    "preview": "samplot -z 4 --minq 0 -n HG002 HG004 control-sample:HG003 --start_ci '0,0' --end_ci '0,0' -t DEL -c 1 -s 24804397 -e 248"
  },
  {
    "path": "test/func/samplot_test.sh",
    "chars": 12081,
    "preview": "#!/bin/bash\n\ntest -e ssshtest || wget -q https://raw.githubusercontent.com/ryanlayer/ssshtest/master/ssshtest\n. ssshtest"
  },
  {
    "path": "test/func/samplot_vcf_test.sh",
    "chars": 10026,
    "preview": "#!/bin/bash\n\ntest -e ssshtest || wget -q https://raw.githubusercontent.com/ryanlayer/ssshtest/master/ssshtest\n. ssshtest"
  },
  {
    "path": "test/unit/samplot_test.py",
    "chars": 49583,
    "preview": "import unittest\nimport sys\n\nfrom samplot import samplot\n\n\nbam_1 = 'test/data/NA12878_restricted.bam'\nbam_2 = 'test/data/"
  }
]

// ... and 32 more files (download for full content)

About this extraction

This page contains the full source code of the ryanlayer/samplot GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 60 files (15.5 MB), approximately 83.6k tokens, and a symbol index with 133 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo