[
  {
    "path": ".circleci/config.yml",
    "content": "\nversion: 2\n\nvariables:\n  setup_p3: &setup_p3\n    run:\n      shell: /bin/bash\n      name: Setup Samplot python3 dependencies\n      command: bash .circleci/setup.sh 3\n  run_plot_tests: &run_plot_tests\n    run:\n      shell: /bin/bash\n      name: Functional Tests for Samplot\n      command: bash test/func/samplot_test.sh\n      no_output_timeout: 1h \n  run_vcf_tests: &run_vcf_tests\n    run:\n      shell: /bin/bash\n      name: Functional Tests for Samplot\n      command: bash test/func/samplot_vcf_test.sh\n      no_output_timeout: 1h \n  run_unit_tests: &run_unit_tests\n    run:\n      shell: /bin/bash\n      name: Functional Tests for Samplot\n      command: python test/unit/samplot_test.py\n      no_output_timeout: 1h \n  macos: &macos\n    macos:\n      xcode: \"12.5.1\"\n  linux: &linux\n    machine: ubuntu-2004:202201-02\n  install_samplot: &install_samplot\n    run:\n      name: Install Samplot\n      command: python setup.py install\n\n\n\njobs:\n  test-linux-python3:\n    <<: *linux\n    steps:\n      - checkout\n      - *setup_p3\n      - *install_samplot \n      - *run_plot_tests\n      - *run_vcf_tests\n      - *run_unit_tests\n  test-macos-python3:\n    <<: *macos\n    steps:\n      - checkout\n      - *setup_p3\n      - *install_samplot\n      - *run_plot_tests\n      - *run_vcf_tests\n      - *run_unit_tests\n\n\nworkflows:\n  version: 2\n  samplot-unit-tests:\n    jobs:\n      - test-linux-python3\n      - test-macos-python3\n  samplot-nightly-unit-tests:\n    triggers:\n      - schedule:\n          cron: \"0 0 * * *\"\n          filters:\n            branches:\n              only:\n                - master\n    jobs:\n      - test-linux-python3\n      - test-macos-python3\n"
  },
  {
    "path": ".circleci/setup.sh",
    "content": "#!/bin/bash\n\nset -exo pipefail\n\nWORKSPACE=$(pwd)\n\n# Set path\necho \"export PATH=$WORKSPACE/anaconda/bin:$PATH\" >> $BASH_ENV\nsource $BASH_ENV\n\n## Passed from .circleci/config.yml (Only 3 permited)\npythonversion=$1\nif (( $pythonversion != 3 ))\nthen\n    echo -e \"\\nERROR: Python 3 designation required. Python version $pythonversion was supplied. Please correct and run again\\n\"\n    exit 1   \nfi \n\n# setup conda and dependencies \nif [[ ! -d $WORKSPACE/anaconda ]]; then\n    mkdir -p $WORKSPACE\n\n    # step 1: download and install anaconda\n    if [[ $OSTYPE == darwin* ]]; then\n        tag=\"MacOSX\"\n        tag2=\"darwin\"\n    elif [[ $OSTYPE == linux* ]]; then\n        tag=\"Linux\"\n        tag2=\"linux\"\n    else\n        echo \"Unsupported OS: $OSTYPE\"\n        exit 1\n    fi  \n\n    curl -O https://repo.anaconda.com/miniconda/Miniconda$pythonversion-latest-$tag-x86_64.sh\n    sudo bash Miniconda$pythonversion-latest-$tag-x86_64.sh -b -p $WORKSPACE/anaconda/\n    sudo chown -R $USER $WORKSPACE/anaconda/\n\n    mkdir -p $WORKSPACE/anaconda/conda-bld/$tag-64\n\n    # step 3: setup channels\n    conda config --system --add channels defaults\n    conda config --system --add channels r\n    conda config --system --add channels bioconda\n    conda config --system --add channels conda-forge\n\n    # step 3: install Samplot requirements\n    conda install -y --file requirements.txt\n\nfi\n"
  },
  {
    "path": ".gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n.vscode/\n.DS_Store\n"
  },
  {
    "path": "LICENSE",
    "content": "MIT License\n\nCopyright (c) 2019 Ryan Layer\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all\ncopies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\nSOFTWARE.\n"
  },
  {
    "path": "README.md",
    "content": "[![CircleCI](https://circleci.com/gh/ryanlayer/samplot/tree/master.svg?style=svg)](https://circleci.com/gh/ryanlayer/samplot/tree/master)\n[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/samplot/README.html)\n\n<center><img src=\"/doc/imgs/samplot_logo_v5.png\" width=\"300\"/></center>\n\n<center><img src=\"/doc/imgs/montage.jpg\" width=\"100%\"/></center>\n\n`samplot` is a command line tool for rapid, multi-sample structural variant\nvisualization. `samplot` takes SV coordinates and bam files and produces\nhigh-quality images that highlight any alignment and depth signals that\nsubstantiate the SV.\n\nIf you use samplot, please cite https://genomebiology.biomedcentral.com/articles/10.1186/s13059-021-02380-5\n\n\n# Usage\n<details>\n  <summary>samplot plot</summary>\n  \n  ```\nusage: samplot plot [-h] [-n TITLES [TITLES ...]] [-r REFERENCE] [-z Z] -b\n                    BAMS [BAMS ...] [-o OUTPUT_FILE] [--output_dir OUTPUT_DIR]\n                    -s START -e END -c CHROM [-w WINDOW] [-d MAX_DEPTH]\n                    [-t SV_TYPE] [-T TRANSCRIPT_FILE]\n                    [--transcript_filename TRANSCRIPT_FILENAME]\n                    [--max_coverage_points MAX_COVERAGE_POINTS]\n                    [-A ANNOTATION_FILES [ANNOTATION_FILES ...]]\n                    [--annotation_filenames ANNOTATION_FILENAMES [ANNOTATION_FILENAMES ...]]\n                    [--coverage_tracktype {stack,superimpose,none}] [-a]\n                    [-H PLOT_HEIGHT] [-W PLOT_WIDTH] [-q INCLUDE_MQUAL]\n                    [--separate_mqual SEPARATE_MQUAL] [-j]\n                    [--start_ci START_CI] [--end_ci END_CI]\n                    [--long_read LONG_READ] [--ignore_hp]\n                    [--min_event_size MIN_EVENT_SIZE]\n                    [--xaxis_label_fontsize XAXIS_LABEL_FONTSIZE]\n                    [--yaxis_label_fontsize YAXIS_LABEL_FONTSIZE]\n                    [--legend_fontsize LEGEND_FONTSIZE]\n                    [--annotation_fontsize ANNOTATION_FONTSIZE]\n                    [--hide_annotation_labels] [--coverage_only]\n                    [--max_coverage MAX_COVERAGE] [--same_yaxis_scales]\n                    [--marker_size MARKER_SIZE] [--jitter [JITTER]]\n                    [--dpi DPI] [--annotation_scalar ANNOTATION_SCALAR]\n                    [--zoom ZOOM] [--debug DEBUG]\n\n\noptions:\n  -h, --help            show this help message and exit\n  -n TITLES [TITLES ...], --titles TITLES [TITLES ...]\n                        Space-delimited list of plot titles. Use quote marks\n                        to include spaces (i.e. \"plot 1\" \"plot 2\")\n  -r REFERENCE, --reference REFERENCE\n                        Reference file for CRAM, required if CRAM files used\n  -z Z, --z Z           Number of stdevs from the mean (default 4)\n  -b BAMS [BAMS ...], --bams BAMS [BAMS ...]\n                        Space-delimited list of BAM/CRAM file names\n  -o OUTPUT_FILE, --output_file OUTPUT_FILE\n                        Output file name/type. Defaults to\n                        {type}_{chrom}_{start}_{end}.png\n  --output_dir OUTPUT_DIR\n                        Output directory name. Defaults to working dir.\n                        Ignored if --output_file is set\n  -s START, --start START\n                        Start position of region/variant (add multiple for\n                        translocation/BND events)\n  -e END, --end END     End position of region/variant (add multiple for\n                        translocation/BND events)\n  -c CHROM, --chrom CHROM\n                        Chromosome (add multiple for translocation/BND events)\n  -w WINDOW, --window WINDOW\n                        Window size (count of bases to include in view),\n                        default(0.5 * len)\n  -d MAX_DEPTH, --max_depth MAX_DEPTH\n                        Max number of normal pairs to plot\n  -t SV_TYPE, --sv_type SV_TYPE\n                        SV type. If omitted, plot is created without variant\n                        bar\n  -T TRANSCRIPT_FILE, --transcript_file TRANSCRIPT_FILE\n                        GFF3 of transcripts\n  --transcript_filename TRANSCRIPT_FILENAME\n                        Name for transcript track\n  --max_coverage_points MAX_COVERAGE_POINTS\n                        number of points to plot in coverage axis (downsampled\n                        from region size for speed)\n  -A ANNOTATION_FILES [ANNOTATION_FILES ...], --annotation_files ANNOTATION_FILES [ANNOTATION_FILES ...]\n                        Space-delimited list of bed.gz tabixed files of\n                        annotations (such as repeats, mappability, etc.)\n  --annotation_filenames ANNOTATION_FILENAMES [ANNOTATION_FILENAMES ...]\n                        Space-delimited list of names for the tracks in\n                        --annotation_files\n  --coverage_tracktype {stack,superimpose,none}\n                        type of track to use for low MAPQ coverage plot.\n  -a, --print_args      Print commandline arguments to a json file, useful\n                        with PlotCritic\n  -H PLOT_HEIGHT, --plot_height PLOT_HEIGHT\n                        Plot height\n  -W PLOT_WIDTH, --plot_width PLOT_WIDTH\n                        Plot width\n  -q INCLUDE_MQUAL, --include_mqual INCLUDE_MQUAL\n                        Min mapping quality of reads to be included in plot\n                        (default 1)\n  --separate_mqual SEPARATE_MQUAL\n                        coverage from reads with MAPQ <= separate_mqual\n                        plotted in lighter grey. To disable, pass in negative\n                        value\n  -j, --json_only       Create only the json file, not the image plot\n  --start_ci START_CI   confidence intervals of SV first breakpoint (distance\n                        from the breakpoint). Must be a comma-separated pair\n                        of ints (i.e. 20,40)\n  --end_ci END_CI       confidence intervals of SV end breakpoint (distance\n                        from the breakpoint). Must be a comma-separated pair\n                        of ints (i.e. 20,40)\n  --long_read LONG_READ\n                        Min length of a read to be treated as a long-read\n                        (default 1000)\n  --ignore_hp           Choose to ignore HP tag in alignment files\n  --min_event_size MIN_EVENT_SIZE\n                        Min size of an event in long-read CIGAR to include\n                        (default 20)\n  --xaxis_label_fontsize XAXIS_LABEL_FONTSIZE\n                        Font size for X-axis labels (default 6)\n  --yaxis_label_fontsize YAXIS_LABEL_FONTSIZE\n                        Font size for Y-axis labels (default 6)\n  --legend_fontsize LEGEND_FONTSIZE\n                        Font size for legend labels (default 6)\n  --annotation_fontsize ANNOTATION_FONTSIZE\n                        Font size for annotation labels (default 6)\n  --hide_annotation_labels\n                        Hide the label (fourth column text) from annotation\n                        files, useful for regions with many annotations\n  --coverage_only       Hide all reads and show only coverage\n  --max_coverage MAX_COVERAGE\n                        apply a maximum coverage cutoff. Unlimited by default\n  --same_yaxis_scales   Set the scales of the Y axes to the max of all\n  --marker_size MARKER_SIZE\n                        Size of marks on pairs and splits (default 3)\n  --jitter [JITTER]     Add uniform random noise to insert sizes. This can be\n                        helpful to resolve overlapping entries. Either a\n                        custom value (<1.0) is supplied or 0.08 will be used.\n  --dpi DPI             Dots per inches (pixel count, default 300)\n  --annotation_scalar ANNOTATION_SCALAR\n                        scaling factor for the optional annotation/trascript\n                        tracks\n  --zoom ZOOM           Only show +- zoom amount around breakpoints, much\n                        faster for large regions. Ignored if region smaller\n                        than --zoom (default 500000)\n  --debug DEBUG         Print debug statements\n\n```\n</details>\n\n## Installing\n`Samplot` is available from bioconda and is installable via the conda package manager:\n```\nconda install -c bioconda samplot \n```\n\n## Examples: \n\nSamplot requires either BAM files or CRAM files as primary input. If you use\nCRAM, you'll also need a reference genome. You can easily acquire a reference genome file with [GGD](https://github.com/gogetdata/ggd-cli), which is also available from conda.\n\n### Basic use case\nUsing data from NA12878, NA12889, and NA12890 in the \n[1000 Genomes Project](http://www.internationalgenome.org/about) (available in the test/data directory of samplot), we will\ninspect a possible deletion in NA12878 at 4:115928726-115931880 with respect\nto that same region in two unrelated samples NA12889 and NA12890.\n\nThe following command will create an image of that region:\n```\ntime samplot plot \\\n    -n NA12878 NA12889 NA12890 \\\n    -b samplot/test/data/NA12878_restricted.bam \\\n      samplot/test/data/NA12889_restricted.bam \\\n      samplot/test/data/NA12890_restricted.bam \\\n    -o 4_115928726_115931880.png \\\n    -c chr4 \\\n    -s 115928726 \\\n    -e 115931880 \\\n    -t DEL\n\nreal\t0m3.882s\nuser\t0m3.831s\nsys\t0m0.328s\n\n```\n\nThe arguments used above are:\n\n`-n` The names to be shown for each sample in the plot\n\n`-b` The BAM/CRAM files of the samples (space-delimited)\n\n`-o` The name of the output file containing the plot\n\n`-c` The chromosome of the region of interest\n\n`-s` The start location of the region of interest\n\n`-e` The end location of the region of interest\n\n`-t` The type of the variant of interest\n\nThis will create an image file named `4_115928726_115931880.png`, shown below:\n\n<img src=\"/doc/imgs/4_115928726_115931880.png\">\n\n### Gene and other genomic feature annotations\n\nGene annotations (tabixed, gff3 file) and genome features (tabixed, bgzipped, bed file) can be \nincluded in the plots.\n\nGet the gene annotations:\n```\nwget ftp://ftp.ensembl.org/pub/grch37/release-84/gff3/homo_sapiens/Homo_sapiens.GRCh37.82.gff3.gz\nbedtools sort -i Homo_sapiens.GRCh37.82.gff3.gz \\\n| bgzip -c > Homo_sapiens.GRCh37.82.sort.gff3.gz\ntabix Homo_sapiens.GRCh37.82.sort.gff3.gz\n```\n\nGet genome annotations, in this case Repeat Masker tracks and a mappability track:\n```\nwget http://hgdownload.cse.ucsc.edu/goldenpath/hg19/encodeDCC/wgEncodeMapability/wgEncodeDukeMapabilityUniqueness35bp.bigWig\nbigWigToBedGraph wgEncodeDukeMapabilityUniqueness35bp.bigWig wgEncodeDukeMapabilityUniqueness35bp.bed\nbgzip wgEncodeDukeMapabilityUniqueness35bp.bed\ntabix wgEncodeDukeMapabilityUniqueness35bp.bed.gz\n\ncurl http://hgdownload.soe.ucsc.edu/goldenPath/hg19/database/rmsk.txt.gz \\\n| bgzip -d -c \\\n| cut -f 6,7,8,13 \\\n| bedtools sort -i stdin \\\n| bgzip -c > rmsk.bed.gz\ntabix rmsk.bed.gz\n```\n\nPlot:\n```\nsamplot plot \\\n    -n NA12878 NA12889 NA12890 \\\n    -b samplot/test/data/NA12878_restricted.bam \\\n      samplot/test/data/NA12889_restricted.bam \\\n      samplot/test/data/NA12890_restricted.bam \\\n    -o 4_115928726_115931880.d100.genes_reps_map.png \\\n    -c chr4 \\\n    -s 115928726 \\\n    -e 115931880 \\\n    -t DEL \\\n    -d 100 \\\n    -T Homo_sapiens.GRCh37.82.sort.gff3.gz \\\n    -A rmsk.bed.gz wgEncodeDukeMapabilityUniqueness35bp.bed.gz\n```\n\n<img src=\"/doc/imgs/4_115928726_115931880.d100.genes_reps_map.png\">\n\n## Generating images from a VCF file\nTo plot images from structural variant calls in a VCF file, use samplot's\n`vcf` subcommand. This accepts a VCF file and the BAM files of samples\nyou wish to plot, outputting images and an `index.html` page for review. \n\n### Usage\n<details>\n  <summary> samplot vcf </summary>\n  \n  ```\nusage: samplot vcf [-h] [--vcf VCF] [-d OUT_DIR] [--ped PED] [--dn_only]\n                   [--min_call_rate MIN_CALL_RATE] [--filter FILTER]\n                   [-O {png,pdf,eps,jpg}] [--max_hets MAX_HETS]\n                   [--min_entries MIN_ENTRIES] [--max_entries MAX_ENTRIES]\n                   [--max_mb MAX_MB] [--min_bp MIN_BP]\n                   [--important_regions IMPORTANT_REGIONS] -b BAMS [BAMS ...]\n                   [--sample_ids SAMPLE_IDS [SAMPLE_IDS ...]]\n                   [--command_file COMMAND_FILE] [--format FORMAT]\n                   [--gff3 GFF3] [--downsample DOWNSAMPLE] [--manual_run]\n                   [--plot_all] [-t THREADS] [--debug]\n\noptions:\n  -h, --help            show this help message and exit\n  --vcf VCF, -v VCF     VCF file containing structural variants (default:\n                        None)\n  -d OUT_DIR, --out-dir OUT_DIR\n                        path to write output images (default: samplot-out)\n  --ped PED             path to ped (or .fam) file (default: None)\n  --dn_only             plots only putative de novo variants (PED file\n                        required) (default: False)\n  --min_call_rate MIN_CALL_RATE\n                        only plot variants with at least this call-rate\n                        (default: None)\n  --filter FILTER       simple filter that samples must meet. Join multiple\n                        filters with '&' and specify --filter multiple times\n                        for 'or' e.g. DHFFC < 0.7 & SVTYPE = 'DEL' (default:\n                        [])\n  -O {png,pdf,eps,jpg}, --output_type {png,pdf,eps,jpg}\n                        type of output figure (default: png)\n  --max_hets MAX_HETS   only plot variants with at most this many\n                        heterozygotes (default: None)\n  --min_entries MIN_ENTRIES\n                        try to include homref samples as controls to get this\n                        many samples in plot (default: 6)\n  --max_entries MAX_ENTRIES\n                        only plot at most this many heterozygotes (default:\n                        10)\n  --max_mb MAX_MB       skip variants longer than this many megabases\n                        (default: None)\n  --min_bp MIN_BP       skip variants shorter than this many bases (default:\n                        20)\n  --important_regions IMPORTANT_REGIONS\n                        only report variants that overlap regions in this bed\n                        file (default: None)\n  -b BAMS [BAMS ...], --bams BAMS [BAMS ...]\n                        Space-delimited list of BAM/CRAM file names (default:\n                        None)\n  --sample_ids SAMPLE_IDS [SAMPLE_IDS ...]\n                        Space-delimited list of sample IDs, must have same\n                        order as BAM/CRAM file names. BAM RG tag required if\n                        this is omitted. (default: None)\n  --command_file COMMAND_FILE\n                        store commands in this file. (default:\n                        samplot_vcf_cmds.tmp)\n  --format FORMAT       comma separated list of FORMAT fields to include in\n                        sample plot title (default: AS,AP,DHFFC)\n  --gff3 GFF3           genomic regions (.gff with .tbi in same directory)\n                        used when building HTML table and table filters\n                        (default: None)\n  --downsample DOWNSAMPLE\n                        Number of normal reads/pairs to plot (default: 1)\n  --manual_run          disables auto-run for the plotting commands (default:\n                        False)\n  --plot_all            plots all samples and all variants - limited by any\n                        filtering arguments set (default: False)\n  -t THREADS, --threads THREADS\n                        Number of threads to use to generate plots. Default: 1\n  --debug               prints out the reason for skipping any skipped variant\n                        entry (default: False)\n  ```\n</details>\n\n`samplot vcf` can be used to quickly apply some basic filters to variants. Filters are applied via the `--filter` argument, which may be repeated as many times as desired. Each expression specified with the `--filter` option is applied separately in an OR fashion, which `&` characters may be used within a statement for AND operations. \n\n### Example:\n```\nsamplot vcf \\\n    --filter \"SVTYPE == 'DEL' & SU >= 8\" \\\n    --filter \"SVTYPE == 'INV' & SU >= 5\" \\\n    --vcf example.vcf\\\n    -d test/\\\n    -O png\\\n    --important_regions regions.bed\\\n    -b example.bam > samplot_commands.sh\n```\nThis example will create a directory named test (in the current working directory). A file named `index.html` will be created inside that directory to explore the images created.\n\n**Filters:** The above filters will remove all samples/variants from output except:\n* `DUP` variants with at least `SU` of 8\n* `INV` variants with `SU` of at least 5\n\nThe specific `FORMAT` fields available in your VCF file may be different. I recommend SV VCF annotation with [duphold](https://github.com/brentp/duphold) by [brentp](https://github.com/brentp).\n\nFor more complex expression-based VCF filtering, try brentp's [slivar](https://github.com/brentp/slivar), which provides similar but more broad options for filter expressions.\n\n**Region restriction.** Variants can also be filtered by overlap with a set of region (for example, gene coordinates for genes correlated with a disease). The `important_regions` argument provides a BED file of such regions for this example.\n\n**Filtering for de novo SVs** \nUsing a [PED](https://gatkforums.broadinstitute.org/gatk/discussion/7696/pedigree-ped-files) file with `samplot vcf` allows filtering for variants that may be spontaneous/de novo variants. This filter is a simple Mendelian violation test. If a sample 1) has valid parent IDs in the PED file, 2) has a non-homref genotype (1/0, 0/1, or 1/1 in VCF), 3) passes filters, and 4) both parents have homref genotypes (0/0 in VCF), the sample may have a de novo variant. Filter parameters are not applied to the parents. The sample is plotted along with both parents, which are labeled as father and mother in the image. \n\nExample call with the addition of a PED file:\n\n<pre>\nsamplot vcf \\\n    --filter \"SVTYPE == 'DEL' & SU >= 8\" \\\n    --filter \"SVTYPE == 'INV' & SU >= 5\" \\\n    --vcf example.vcf\\\n    -d test/\\\n    -O png\\\n    <b>--ped family.ped\\</b>\n    --important_regions regions.bed\\\n    -b example.bam > samplot_commands.sh\n</pre>\n\n**Additional notes.** \n* Variants where fewer than 95% of samples have a call (whether reference or alternate) will be excluded by default. This can be altered via the command-line argument `min_call_rate`.\n* If you're primarily interested in rare variants, you can use the `max_hets` filter to remove variants that appear in more than `max_hets` samples.\n* Large variants can now be plotted easily by samplot through use of `samplot plot`'s `zoom` argument. However, you can still choose to only plot variants larger than a given size using the `max_mb` argument. The `zoom` argument takes an integer parameter and shows only the intervals within +/- that parameter on either side of the breakpoints. A dotted line connects the ends of the variant call bar at the top of the window, showing that the region between breakpoint intervals is not shown.\n* By default, if fewer than 6 samples have a variant and additional homref samples are given, control samples will be added from the homref group to reach a total of 6 samples in the plot. This number may be altered using the `min_entries` argument.\n* Arguments that are optional in `samplot plot` can by given as arguments to `samplot vcf`. They will be applied to each image generated.\n\n\n#### CRAM inputs\nSamplot also support CRAM input, which requires a reference fasta file for\nreading as noted above. Notice that the reference file is not included in this\nrepository due to size. This time we'll plot an interesting duplication at\nX:101055330-101067156.\n\n```\nsamplot plot \\\n    -n NA12878 NA12889 NA12890 \\\n    -b samplot/test/data/NA12878_restricted.cram \\\n      samplot/test/data/NA12889_restricted.cram \\\n      samplot/test/data/NA12890_restricted.cram \\\n    -o cramX_101055330_101067156.png \n    -c chrX \\\n    -s 101055330 \\\n    -e 101067156 \\\n    -t DUP \\\n    -r hg19.fa\n```\n\n\nThe arguments used above are the same as those used for the basic use case, with the addition of the following:\n\n`-r` The reference file used for reading CRAM files\n\n#### Plotting without the SV \nSamplot can also plot genomic regions that are unrelated to an SV. If you do\nnot pass the SV type option (`-t`) then the top SV bar will go away and only\nthe region that is given by `-c` `-s` and `-e` will be displayed.\n\n#### Long read (Oxford nanopore and PacBio) and linked read support\nAny alignment that is longer than 1000 bp is treated as a long read, and\nthe plot design will focus on aligned regions and gaps. Aligned regions are in orange, and gaps follow the same DEL/DUP/INV color code used for short reads. The height of the alignment is based on the size of its largest gap.\n\n<img src=\"/doc/imgs/longread_del.png\">\n\nIf the bam file has an MI tag, then the reads will be treated as linked reads.\nThe plots will be similar to short read plots, but all alignments with the same MI is plotted at the same height according to alignment with the largest gap in the group. A green line connects all alignments in a group.\n\n<img src=\"/doc/imgs/linkedread_del.png\">\n"
  },
  {
    "path": "requirements.txt",
    "content": "matplotlib<3.7\nnumpy\npysam>=0.15\nwget\nJinja2\n"
  },
  {
    "path": "runtests.sh",
    "content": "echo \"running unit tests:\"\npython test/unit/samplot_test.py\necho \"finished unit tests\"\necho \"running functional tests for \\`plot\\`:\"\nbash test/func/samplot_test.sh\nprintf \"\\n\\nfinished functional tests for \\`plot\\`:\\n\"\nprintf \"running functional tests for \\`vcf\\`:\\n\"\nbash test/func/samplot_vcf_test.sh\necho \"finished functional tests for \\`vcf\\`:\"\n"
  },
  {
    "path": "samplot/__init__.py",
    "content": "#!/usr/bin/env python\n__version__ = \"1.3.1\"\n"
  },
  {
    "path": "samplot/__main__.py",
    "content": "#!/usr/bin/env python\nimport argparse\nimport logging\nimport sys\n\nfrom .__init__ import __version__\nfrom .samplot import add_plot\nfrom .samplot_vcf import add_vcf\n\n\ndef main(args=None):\n    logging.basicConfig(level=logging.INFO, stream=sys.stderr,\n                        format=\"%(module)s - %(levelname)s: %(message)s\")\n    \n    if args is None:\n        args = sys.argv[1:]\n\n    parser = argparse.ArgumentParser(\n        prog=\"samplot\", formatter_class=argparse.ArgumentDefaultsHelpFormatter\n    )\n    parser.add_argument(\n        \"-v\",\n        \"--version\",\n        help=\"Installed version\",\n        action=\"version\",\n        version=\"%(prog)s \" + str(__version__),\n    )\n    sub = parser.add_subparsers(title=\"[sub-commands]\", dest=\"command\")\n    sub.required = True\n\n    add_plot(sub)\n    add_vcf(sub)\n\n    args,extra_args = parser.parse_known_args(args)\n    args.func(parser, args, extra_args)\n\n\nif __name__ == \"__main__\":\n    sys.exit(main() or 0)\n"
  },
  {
    "path": "samplot/samplot.py",
    "content": "#!/usr/bin/env python\nfrom __future__ import print_function\n\nimport logging\nimport os\nimport random\nimport re\nimport sys\nfrom argparse import SUPPRESS\n\nimport matplotlib\nmatplotlib.use(\"Agg\") #must be before imports of submodules in matplotlib\nimport matplotlib.gridspec as gridspec\nimport matplotlib.patches as mpatches\nimport matplotlib.path as mpath\nimport matplotlib.pyplot as plt\nimport matplotlib.ticker as ticker\nimport numpy as np\nimport pysam\nimport warnings\nwarnings.filterwarnings('ignore', 'FixedFormatter should only be used together with FixedLocator')\nfrom matplotlib.offsetbox import AnchoredText\n\n\nlogger = logging.getLogger(__name__)\n\nINTERCHROM_YAXIS = 5000\n\nCOLORS = {\n    \"Deletion/Normal\": \"black\",\n    \"Deletion\": \"black\",\n    \"Duplication\": \"red\",\n    \"Inversion\": \"blue\",\n    \"InterChrmInversion\": \"blue\",\n    \"InterChrm\": \"black\",\n}\n\nREAD_TYPES_USED = {\n    \"Deletion/Normal\": False,\n    \"Duplication\": False,\n    \"Inversion\": False,\n    \"Aligned long read\": False,\n    \"Linked read\": False,\n    \"Split-read\": False,\n    \"Paired-end read\": False,\n}\n\n# pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.cigartuples\nCIGAR_MAP = {\n    \"M\": 0,\n    \"I\": 1,\n    \"D\": 2,\n    \"N\": 3,\n    \"S\": 4,\n    \"H\": 5,\n    \"P\": 6,\n    \"=\": 7,\n    \"X\": 8,\n    \"B\": 9,\n}\n\ndef strip_chr(chrom):\n    \"\"\"\n    safer way to replace chr string, to support non-human genomes\n    \"\"\"\n    if chrom[:3] == \"chr\":\n        chrom = chrom[3:]\n    return chrom\n\n# {{{class plan_step:\nclass plan_step:\n    step_events = [\"Align\", \"ANNOTATION\"]\n\n    def __init__(self, start_pos, end_pos, event, info=None):\n        self.start_pos = start_pos\n        self.end_pos = end_pos\n        self.event = event\n        self.info = info\n\n    def __str__(self):\n        if self.info:\n            return (\n                \"Step(\"\n                + str(self.start_pos)\n                + \", \"\n                + str(self.end_pos)\n                + \", \"\n                + self.event\n                + \", \"\n                + str(self.info)\n                + \")\"\n            )\n        else:\n            return (\n                \"Step(\"\n                + str(self.start_pos)\n                + \", \"\n                + str(self.end_pos)\n                + \", \"\n                + self.event\n                + \")\"\n            )\n\n    def __repr__(self):\n        return str(self)\n\n\n# }}}\n\n# {{{class genome_interval:\nclass genome_interval:\n    def __init__(self, chrm, start, end):\n        self.chrm = chrm\n        self.start = start\n        self.end = end\n\n    def __str__(self):\n        return \"(\" + self.chrm + \",\" + str(self.start) + \",\" + str(self.end) + \")\"\n\n    def __repr__(self):\n        return str(self)\n\n    def __eq__(self, gi2):\n        return self.chrm == gi2.chrm and self.start == gi2.start and self.end == gi2.end\n\n    \"\"\" return -1 if before, 0 if in, 1 if after \"\"\"\n\n    def intersect(self, gi):\n        if strip_chr(gi.chrm) < strip_chr(self.chrm) or gi.end < self.start:\n            return -1\n        elif strip_chr(gi.chrm) > strip_chr(self.chrm) or gi.start > self.end:\n            return 1\n        else:\n            return 0\n\n\n# }}}\n\n# {{{def get_range_hit(ranges, chrm, point):\ndef get_range_hit(ranges, chrm, point):\n    for j in range(len(ranges)):\n        r = ranges[j]\n        if (\n            strip_chr(r.chrm) == strip_chr(chrm)\n            and r.start <= point\n            and r.end >= point\n        ):\n            return j\n    return None\n\n\n# }}}\n\n# {{{def map_genome_point_to_range_points(ranges, chrm, point):\ndef map_genome_point_to_range_points(ranges, chrm, point):\n    range_hit = get_range_hit(ranges, chrm, point)\n\n    if range_hit == None:\n        return None\n    p = 1.0 / len(ranges) * range_hit + (1.0 / len(ranges)) * (\n        float(point - ranges[range_hit].start)\n        / float(ranges[range_hit].end - ranges[range_hit].start)\n    )\n\n    return p\n\n\n# }}}\n\n# {{{def points_in_window(points):\ndef points_in_window(points):\n    \"\"\"Checks whether these points lie within the window of interest\n\n    Points is a list of one start, one end coordinate (ints)\n    \"\"\"\n    if (\n        None in points\n        or points[0] < -5\n        or points[1] < -5\n        or points[0] > 5\n        or points[1] > 5\n    ):\n        return False\n    return True\n\n\n# }}}\n\n# {{{ def get_tabix_iter(chrm, start, end, datafile):\ndef get_tabix_iter(chrm, start, end, datafile):\n    \"\"\"Gets an iterator from a tabix BED/GFF3 file\n\n    Used to avoid chrX vs. X notation issues when extracting data from\n    annotation files\n    \"\"\"\n    try:\n        tbx = pysam.TabixFile(datafile)\n    except:\n        tbx = pysam.TabixFile(datafile, index=datafile+\".csi\")\n\n\n    itr = None\n    try:\n        itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000)\n    except ValueError:\n        # try and account for chr/no chr prefix\n        if chrm[:3] == \"chr\":\n            chrm = chrm[3:]\n        else:\n            chrm = \"chr\" + chrm\n\n        try:\n            itr = tbx.fetch(chrm, max(0, start - 1000), end + 1000)\n        except ValueError as e:\n            logger.warning(\n                \"Could not fetch {}:{}-{} from {}\".format(\n                    chrm,\n                    start,\n                    end,\n                    datafile\n                )\n            )\n            print(e)\n    return itr\n\n\n# }}}\n\n##Coverage methods\n# {{{def add_coverage(bam_file, read, coverage, separate_mqual):\ndef add_coverage(read, coverage_matrix, offset, column):\n    \"\"\"Adds a read to the known coverage \n\n    Coverage from Pysam read is added to coverage_matrix.\n    offset defines the start position of the current range\n    column specifies which column to add to.\n    \"\"\"\n\n    curr_pos = read.reference_start\n    if not read.cigartuples:\n        return\n\n    for op, length in read.cigartuples:\n        if op in [CIGAR_MAP[\"M\"], CIGAR_MAP[\"=\"], CIGAR_MAP[\"X\"]]:\n            coverage_matrix[curr_pos - offset: curr_pos + length - offset, column] += 1\n            curr_pos += length\n        elif op == CIGAR_MAP[\"I\"]:\n            curr_pos = curr_pos\n        elif op == CIGAR_MAP[\"D\"]:\n            curr_pos += length\n        elif op == CIGAR_MAP[\"N\"]:\n            curr_pos = length\n        elif op == CIGAR_MAP[\"S\"]:\n            curr_pos = curr_pos\n        elif op == CIGAR_MAP[\"H\"]:\n            curr_pos = curr_pos\n        else:\n            curr_pos += length\n\n\n# }}}\n\n# {{{def plot_coverage(coverage,\ndef plot_coverage(\n    coverage,\n    ax,\n    ranges,\n    hp_count,\n    max_coverage,\n    tracktype,\n    yaxis_label_fontsize,\n    max_coverage_points,\n):\n    \"\"\"Plots high and low quality coverage for the region\n\n    User may specify a preference between stacked and superimposed \n    superimposed may cause unexpected behavior if low-quality depth is\n    greater than high \n    \"\"\"\n    cover_x = []\n    cover_y_lowqual = []\n    cover_y_highqual = []\n    cover_y_all = []\n\n    for i in range(len(ranges)):\n        r = ranges[i]\n        region_len = r.end-r.start\n        downsample = 1\n        if region_len > max_coverage_points:\n            downsample = int(region_len / max_coverage_points)\n\n        for i,pos in enumerate(range(r.start, r.end + 1)):\n            if i%downsample !=  0: \n                continue\n            cover_x.append(map_genome_point_to_range_points(ranges, r.chrm, pos))\n            if r.chrm in coverage and pos in coverage[r.chrm]:\n                cover_y_all.append(coverage[r.chrm][pos][0] + coverage[r.chrm][pos][1])\n                cover_y_highqual.append(coverage[r.chrm][pos][0])\n                cover_y_lowqual.append(coverage[r.chrm][pos][1])\n            else:\n                cover_y_lowqual.append(0)\n                cover_y_highqual.append(0)\n                cover_y_all.append(0)\n    cover_y_lowqual = np.array(cover_y_lowqual)\n    cover_y_highqual = np.array(cover_y_highqual)\n    cover_y_all = np.array(cover_y_all)\n\n    if max_coverage > 0:\n        max_plot_depth = max_coverage\n    elif cover_y_all.max() > 3 * cover_y_all.mean():\n        max_plot_depth = max(\n            np.percentile(cover_y_all, 99.5), np.percentile(cover_y_all, 99.5)\n        )\n    else:\n        max_plot_depth = np.percentile(cover_y_all.max(), 99.5)\n    ax2 = ax.twinx()\n    ax2.set_xlim([0, 1])\n\n    if 0 == max_plot_depth:\n        max_plot_depth = 0.01\n\n    ax2.set_ylim([0, max(1, max_plot_depth)])\n    bottom_fill = np.zeros(len(cover_y_all))\n    if tracktype == \"stack\":\n        ax2.fill_between(\n            cover_x,\n            cover_y_highqual,\n            bottom_fill,\n            color=\"darkgrey\",\n            step=\"pre\",\n            alpha=0.4,\n        )\n\n        ax2.fill_between(\n            cover_x, cover_y_all, cover_y_highqual, color=\"grey\", step=\"pre\", alpha=0.15\n        )\n\n    elif tracktype == \"superimpose\":\n        ax2.fill_between(\n            cover_x, cover_y_lowqual, bottom_fill, color=\"grey\", step=\"pre\", alpha=0.15\n        )\n\n        ax2.fill_between(\n            cover_x,\n            cover_y_highqual,\n            cover_y_lowqual,\n            color=\"darkgrey\",\n            step=\"pre\",\n            alpha=0.4,\n        )\n\n        ax2.fill_between(\n            cover_x, cover_y_lowqual, bottom_fill, color=\"grey\", step=\"pre\", alpha=0.15\n        )\n    ## tracktype==None also allowed\n\n    # number of ticks should be 6 if there's one hp, 3 otherwise\n    tick_count = 5 if hp_count == 1 else 2\n    tick_count = max(int(max_plot_depth / tick_count), 1)\n\n    # set axis parameters\n    #ax2.yaxis.set_major_locator(ticker.FixedLocator(tick_count))\n    ax2.yaxis.set_major_locator(ticker.MultipleLocator(tick_count))\n    ax2.tick_params(axis=\"y\", colors=\"grey\", labelsize=yaxis_label_fontsize)\n    ax2.spines[\"top\"].set_visible(False)\n    ax2.spines[\"bottom\"].set_visible(False)\n    ax2.spines[\"left\"].set_visible(False)\n    ax2.spines[\"right\"].set_visible(False)\n    ax2.tick_params(axis=\"x\", length=0)\n    ax2.tick_params(axis=\"y\", length=0)\n\n    # break the variant plot when we have multiple ranges\n    for i in range(1, len(ranges)):\n        ax2.axvline(x=1.0 / len(ranges), color=\"white\", linewidth=5)\n\n    return ax2\n\n\n# }}}\n\n##Pair End methods\n# {{{class PairedEnd:\nclass PairedEnd:\n    \"\"\"container of paired-end read info\n\n    Contains start(int), end(int), strand(bool True=forward), MI (int\n    molecular identifier), HP (int haplotype)\n    \"\"\"\n\n    def __init__(self, chrm, start, end, is_reverse, MI_tag, HP_tag):\n        \"\"\"Create PairedEnd instance\n\n        Genomic interval is defined by start and end integers\n        Strand is opposite of is_reverse\n        Molecular identifier and Haplotype are integers if present, else\n        False\n        \"\"\"\n        self.pos = genome_interval(chrm, start, end)\n        self.strand = not (is_reverse)\n        # molecular identifier - linked reads only\n        self.MI = None\n        # haplotype - phased reads only\n        self.HP = 0\n\n        if MI_tag:\n            self.MI = MI_tag\n        if HP_tag:\n            self.HP = HP_tag\n\n    def __repr__(self):\n        return \"PairedEnd(%s,%s,%s,%s,%s,%s)\" % (\n            self.pos.chrm,\n            self.pos.start,\n            self.pos.end,\n            self.strand,\n            self.MI,\n            self.HP,\n        )\n\n\n# }}}\n\n# {{{ def add_pair_end(bam_file, read, pairs, linked_reads):\ndef add_pair_end(bam_file, read, pairs, linked_reads, ignore_hp):\n    \"\"\"adds a (mapped, primary, non-supplementary, and paired) read to the\n    pairs list\n\n    Pysam read is added as simpified PairedEnd instance to pairs\n    Also added to linked_reads list if there is an associated MI tag\n    \"\"\"\n\n    if read.is_unmapped:\n        return\n    if not (read.is_paired):\n        return\n    if read.is_secondary:\n        return\n    if read.is_supplementary:\n        return\n\n    MI_tag = False\n    HP_tag = False\n\n    if read.has_tag(\"MI\"):\n        MI_tag = int(read.get_tag(\"MI\"))\n    if not ignore_hp and read.has_tag(\"HP\"):\n        HP_tag = int(read.get_tag(\"HP\"))\n\n    pe = PairedEnd(\n        bam_file.get_reference_name(read.reference_id),\n        read.reference_start,\n        read.reference_end,\n        read.is_reverse,\n        MI_tag,\n        HP_tag,\n    )\n\n    if pe.HP not in pairs:\n        pairs[pe.HP] = {}\n\n    if read.query_name not in pairs[pe.HP]:\n        pairs[pe.HP][read.query_name] = []\n\n    if pe.MI:\n        if pe.HP not in linked_reads:\n            linked_reads[pe.HP] = {}\n\n        if pe.MI not in linked_reads[pe.HP]:\n            linked_reads[pe.HP][pe.MI] = [[], []]\n        linked_reads[pe.HP][pe.MI][0].append(read.query_name)\n\n    pairs[pe.HP][read.query_name].append(pe)\n    pairs[pe.HP][read.query_name].sort(key=lambda x: x.pos.start)\n\n\n# }}}\n\n# {{{def sample_normal(max_depth, pairs, z):\ndef sample_normal(max_depth, pairs, z):\n    \"\"\"Downsamples paired-end reads \n    \n    Selects max_depth reads\n    Does not remove discordant pairs, those with insert distance greater\n    than z stdevs from mean\n\n    Returns downsampled pairs list\n    \"\"\"\n\n    sampled_pairs = {}\n    plus_minus_pairs = {}\n\n    if max_depth == 0:\n        return sampled_pairs\n\n    for read_name in pairs:\n        pair = pairs[read_name]\n        if len(pair) != 2:\n            continue\n        if pair[0].strand == True and pair[1].strand == False:\n            plus_minus_pairs[read_name] = pair\n        else:\n            sampled_pairs[read_name] = pair\n\n    if len(plus_minus_pairs) > max_depth:\n        lens = np.array(\n            [pair[1].pos.end - pair[0].pos.start for pair in plus_minus_pairs.values()]\n        )\n        mean = np.mean(lens)\n        stdev = np.std(lens)\n\n        inside_norm = {}\n\n        for read_name in pairs:\n            pair = pairs[read_name]\n            if len(pair) != 2:\n                continue\n            if pair[1].pos.end - pair[0].pos.start >= mean + z * stdev:\n                sampled_pairs[read_name] = pair\n            else:\n                inside_norm[read_name] = pair\n\n        if len(inside_norm) > max_depth:\n            for read_name in random.sample(list(inside_norm.keys()), max_depth):\n                sampled_pairs[read_name] = inside_norm[read_name]\n        else:\n            for read_name in inside_norm:\n                sampled_pairs[read_name] = inside_norm[read_name]\n    else:\n        for read_name in plus_minus_pairs:\n            sampled_pairs[read_name] = plus_minus_pairs[read_name]\n\n    return sampled_pairs\n\n\n# }}}\n\n# {{{def get_pairs_insert_sizes(pairs):\ndef get_pairs_insert_sizes(ranges, pairs):\n    \"\"\"Extracts the integer insert sizes for all pairs\n\n    Return list of integer insert sizes\n    \"\"\"\n    pair_insert_sizes = []\n\n    for hp in pairs:\n        for read_name in pairs[hp]:\n            if len(pairs[hp][read_name]) == 2:\n                size = get_pair_insert_size(ranges, pairs[hp][read_name])\n\n                if size:\n                    pair_insert_sizes.append(size)\n\n    return pair_insert_sizes\n\n\n# }}}\n\n# {{{def get_pair_insert_size(ranges, pair):\ndef get_pair_insert_size(ranges, pair):\n    \"\"\" Gives the outer distance\n    \"\"\"\n    first = pair[0]\n    second = pair[1]\n\n    # make sure both sides are in range\n    if (\n        get_range_hit(ranges, first.pos.chrm, first.pos.start) != None\n        or get_range_hit(ranges, first.pos.chrm, first.pos.end) != None\n    ) and (\n        get_range_hit(ranges, second.pos.chrm, second.pos.start) != None\n        or get_range_hit(ranges, second.pos.chrm, second.pos.end) != None\n    ):\n\n        if first.pos.chrm == second.pos.chrm:\n            return abs(second.pos.end - first.pos.start)\n        else:\n            return INTERCHROM_YAXIS\n    else:\n        return None\n\n\n# }}}\n\n# {{{ def get_pairs_plan(ranges, pairs, linked_plan=False):\ndef get_pairs_plan(ranges, pairs, linked_plan=False):\n    steps = []\n    max_event = 0\n\n    insert_sizes = []\n\n    for read_name in pairs:\n        pair = pairs[read_name]\n\n        plan = get_pair_plan(ranges, pair)\n\n        if plan:\n            insert_size, step = plan\n            insert_sizes.append(insert_size)\n            steps.append(step)\n\n    if len(insert_sizes) > 0:\n        max_event = max(insert_sizes)\n\n    plan = [max_event, steps]\n\n    return plan\n\n\n# }}}\n\n# {{{def get_pair_plan(ranges, pair, linked_plan=False):\ndef get_pair_plan(ranges, pair, linked_plan=False):\n    if pair == None or len(pair) != 2:\n        return None\n\n    first = pair[0]\n    second = pair[1]\n\n    # see if they are part of a linked read\n    if not linked_plan and (first.MI or second.MI):\n        return None\n\n    # make sure both ends are in the plotted region\n    first_s_hit = get_range_hit(ranges, first.pos.chrm, first.pos.start)\n    first_e_hit = get_range_hit(ranges, first.pos.chrm, first.pos.end)\n    second_s_hit = get_range_hit(ranges, second.pos.chrm, second.pos.start)\n    second_e_hit = get_range_hit(ranges, second.pos.chrm, second.pos.end)\n\n    if (first_s_hit == None and first_e_hit == None) or (\n        second_s_hit == None and second_e_hit == None\n    ):\n        return None\n\n    insert_size = get_pair_insert_size(ranges, pair)\n\n    first_hit = first_s_hit if first_s_hit != None else first_e_hit\n    second_hit = second_e_hit if second_e_hit != None else second_s_hit\n\n    start = genome_interval(\n        first.pos.chrm,\n        max(first.pos.start, ranges[first_hit].start),\n        max(first.pos.start, ranges[first_hit].start),\n    )\n\n    end = genome_interval(\n        second.pos.chrm,\n        min(second.pos.end, ranges[second_hit].end),\n        min(second.pos.end, ranges[second_hit].end),\n    )\n\n    step = plan_step(start, end, \"PAIREND\")\n\n    event_type = get_pair_event_type(pair)\n    step.info = {\"TYPE\": event_type, \"INSERTSIZE\": insert_size}\n\n    return insert_size, step\n\n\n# }}}\n\n# {{{def get_pair_event_type(pe_read):\ndef get_pair_event_type(pe_read):\n    \"\"\"Decide what type of event the read supports (del/normal, dup, inv)\n    \"\"\"\n    event_by_strand = {\n        (True, False): \"Deletion/Normal\",\n        (False, True): \"Duplication\",\n        (False, False): \"Inversion\",\n        (True, True): \"Inversion\",\n    }\n    event_type = event_by_strand[pe_read[0].strand, pe_read[1].strand]\n    return event_type\n\n\n# }}}\n\ndef jitter(value, bounds: float = 0.1) -> float:\n    \"\"\"\n    Offset value by a random value within the defined bounds\n    \"\"\"\n    assert 0.0 <= bounds < 1.0\n    return value * (1 + bounds * random.uniform(-1, 1))\n\n\n# {{{def plot_pair_plan(ranges, step, ax):\ndef plot_pair_plan(ranges, step, ax, marker_size, jitter_bounds):\n    p = [\n        map_genome_point_to_range_points(\n            ranges, step.start_pos.chrm, step.start_pos.start\n        ),\n        map_genome_point_to_range_points(ranges, step.end_pos.chrm, step.end_pos.end),\n    ]\n\n    if None in p:\n        return False\n\n    # some points are far outside of the printable area, so we ignore them\n    if not points_in_window(p):\n        return False\n\n    READ_TYPES_USED[\"Paired-end read\"] = True\n\n    y = step.info[\"INSERTSIZE\"]\n\n    # Offset y-values using jitter to avoid overlapping lines\n    y = jitter(y, bounds=jitter_bounds)\n\n    event_type = step.info[\"TYPE\"]\n    READ_TYPES_USED[event_type] = True\n    color = COLORS[event_type]\n\n    # plot the individual pair\n    ax.plot(\n        p,\n        [y, y],\n        \"-\",\n        color=color,\n        alpha=0.25,\n        lw=0.5,\n        marker=\"s\",\n        markersize=marker_size,\n        zorder=10,\n    )\n\n    return True\n\n\n# }}}\n\n# {{{def plot_pairs(pairs,\ndef plot_pairs(\n    pairs, ax, ranges, curr_min_insert_size, curr_max_insert_size, marker_size, jitter_bounds,\n):\n    \"\"\"Plots all PairedEnd reads for the region\n    \"\"\"\n\n    plan = get_pairs_plan(ranges, pairs)\n\n    if not plan:\n        [curr_min_insert_size, curr_max_insert_size]\n\n    max_event, steps = plan\n\n    for step in steps:\n        plot_pair_plan(ranges, step, ax, marker_size, jitter_bounds)\n\n    if not curr_min_insert_size or curr_min_insert_size > max_event:\n        curr_min_insert_size = max_event\n    if not curr_max_insert_size or curr_max_insert_size < max_event:\n        curr_max_insert_size = max_event\n\n    return [curr_min_insert_size, curr_max_insert_size]\n\n\n# }}}\n\n##Split Read methods\n# {{{class SplitRead:\nclass SplitRead:\n    \"\"\"container of split read info\n\n    Contains start(int), end(int), strand(bool True=forward), query\n    position (int), MI (int molecular identifier), HP (int haplotype)\n    \"\"\"\n\n    def __init__(self, chrm, start, end, strand, query_pos, MI_tag=None, HP_tag=None):\n        \"\"\"Create SplitRead instance\n\n        Genomic interval is defined by start, end, and query_pos integers\n        Strand is opposite of is_reverse\n        Molecular identifier and Haplotype are integers if present, else\n        False\n        \"\"\"\n        self.pos = genome_interval(chrm, start, end)\n        self.strand = strand\n        self.query_pos = query_pos\n        # molecular identifier - linked reads only\n        self.MI = None\n        # haplotype - phased reads only\n        self.HP = 0\n\n        if MI_tag:\n            self.MI = MI_tag\n        if HP_tag:\n            self.HP = HP_tag\n\n    def __repr__(self):\n        return \"SplitRead(%s,%s,%s,%s,%s,%s,%s)\" % (\n            self.pos.chrm,\n            self.pos.start,\n            self.pos.end,\n            self.strand,\n            self.query_pos,\n            self.MI,\n            self.HP,\n        )\n\n\n# }}}\n\n# {{{def calc_query_pos_from_cigar(cigar, strand):\ndef calc_query_pos_from_cigar(cigar, strand):\n    \"\"\"Uses the CIGAR string to determine the query position of a read\n\n    The cigar arg is a string like the following: 86M65S\n    The strand arg is a boolean, True for forward strand and False for\n    reverse\n\n    Returns pair of ints for query start, end positions\n    \"\"\"\n\n    cigar_ops = [[int(op[0]), op[1]] for op in re.findall(\"(\\d+)([A-Za-z])\", cigar)]\n\n    order_ops = cigar_ops\n    if not strand:  # - strand\n        order_ops = order_ops[::-1]\n\n    qs_pos = 0\n    qe_pos = 0\n    q_len = 0\n\n    for op_position in range(len(cigar_ops)):\n        op_len = cigar_ops[op_position][0]\n        op_type = cigar_ops[op_position][1]\n\n        if op_position == 0 and (op_type == \"H\" or op_type == \"S\"):\n            qs_pos += op_len\n            qe_pos += op_len\n            q_len += op_len\n        elif op_type == \"H\" or op_type == \"S\":\n            q_len += op_len\n        elif op_type == \"M\" or op_type == \"I\" or op_type == \"X\":\n            qe_pos += op_len\n            q_len += op_len\n\n    return qs_pos, qe_pos\n\n\n# }}}\n\n# {{{def add_split(read, splits, bam_file, linked_reads):\ndef add_split(read, splits, bam_file, linked_reads, ignore_hp):\n    \"\"\"adds a (primary, non-supplementary) read to the splits list\n\n    Pysam read is added as simpified SplitRead instance to splits\n    Also added to linked_reads list if there is an associated MI tag\n    \"\"\"\n    if read.is_secondary:\n        return\n    if read.is_supplementary:\n        return\n    if not read.has_tag(\"SA\"):\n        return\n\n    qs_pos, qe_pos = calc_query_pos_from_cigar(read.cigarstring, (not read.is_reverse))\n\n    HP_tag = False\n    MI_tag = False\n    if read.has_tag(\"MI\"):\n        MI_tag = int(read.get_tag(\"MI\"))\n\n    if not ignore_hp and read.has_tag(\"HP\"):\n        HP_tag = int(read.get_tag(\"HP\"))\n    sr = SplitRead(\n        bam_file.get_reference_name(read.reference_id),\n        read.reference_start,\n        read.reference_end,\n        not (read.is_reverse),\n        qs_pos,\n        MI_tag,\n        HP_tag,\n    )\n\n    if sr.MI:\n        if sr.HP not in linked_reads:\n            linked_reads[sr.HP] = {}\n        if sr.MI not in linked_reads[sr.HP]:\n            linked_reads[sr.HP][sr.MI] = [[], []]\n        linked_reads[sr.HP][sr.MI][1].append(read.query_name)\n\n    if sr.HP not in splits:\n        splits[sr.HP] = {}\n\n    splits[sr.HP][read.query_name] = [sr]\n\n    for sa in read.get_tag(\"SA\").split(\";\"):\n        if len(sa) == 0:\n            continue\n        A = sa.split(\",\")\n        chrm = A[0]\n        pos = int(A[1])\n        strand = A[2] == \"+\"\n        cigar = A[3]\n        #mapq and nm are never used, annotating this for code readability \n        mapq = int(A[4])\n        nm = int(A[5])\n        qs_pos, qe_pos = calc_query_pos_from_cigar(cigar, strand)\n        splits[sr.HP][read.query_name].append(\n            SplitRead(chrm, pos, pos + qe_pos, strand, qs_pos)\n        )\n\n    if len(splits[sr.HP][read.query_name]) == 1:\n        del splits[sr.HP][read.query_name]\n    else:\n        splits[sr.HP][read.query_name].sort(key=lambda x: x.pos.start)\n\n\n# }}}\n\n\n# {{{def get_split_plan(ranges, split):\ndef get_split_plan(ranges, split, linked_plan=False):\n    \"\"\"\n    There can be 2 or more alignments in a split. Plot only those that are in a\n    range, and set the insert size to be the largest gap\n\n    A split read acts like a long read, so we will covert the split read\n    to a long read, then convert the long read plan back to a split read plan\n    \"\"\"\n\n    alignments = []\n    for s in split:\n        # see if they are part of a linked read\n        if not linked_plan and (s.MI):\n            return None\n        alignment = Alignment(s.pos.chrm, s.pos.start, s.pos.end, s.strand, s.query_pos)\n        alignments.append(alignment)\n\n    long_read = LongRead(alignments)\n    long_reads = {}\n    long_reads[\"convert\"] = [long_read]\n    plan = get_long_read_plan(\"convert\", long_reads, ranges)\n\n    if not plan:\n        return None\n\n    max_gap, lr_steps = plan\n\n    if len(lr_steps) < 3:\n        return None\n\n    sr_steps = []\n\n    # a split read will include 3 long read steps, align, event, align\n    for i in range(0, len(lr_steps), 2):\n        if i + 2 > len(lr_steps):\n            break\n        if (\n            lr_steps[i].info[\"TYPE\"] == \"Align\"\n            and lr_steps[i + 1].info[\"TYPE\"] != \"Align\"\n            and lr_steps[i + 2].info[\"TYPE\"] == \"Align\"\n        ):\n            start = genome_interval(\n                lr_steps[i].end_pos.chrm,\n                lr_steps[i].end_pos.end,\n                lr_steps[i].end_pos.end,\n            )\n            end = genome_interval(\n                lr_steps[i + 2].start_pos.chrm,\n                lr_steps[i + 2].start_pos.start,\n                lr_steps[i + 2].start_pos.start,\n            )\n            sr_steps.append(\n                plan_step(\n                    start,\n                    end,\n                    \"SPLITREAD\",\n                    info={\"TYPE\": lr_steps[i + 1].info[\"TYPE\"], \"INSERTSIZE\": max_gap},\n                )\n            )\n    return max_gap, sr_steps\n\n\n# }}}\n\n# {{{def get_splits_plan(ranges, splits, linked_plan=False):\ndef get_splits_plan(ranges, splits, linked_plan=False):\n    steps = []\n    max_event = 0\n\n    insert_sizes = []\n\n    for read_name in splits:\n        split = splits[read_name]\n\n        plan = get_split_plan(ranges, split)\n\n        if plan:\n            insert_size, step = plan\n            insert_sizes.append(insert_size)\n            steps += step\n\n    if len(insert_sizes) > 0:\n        max_event = max(insert_sizes)\n\n    plan = [max_event, steps]\n\n    return plan\n\n\n# }}}\n\n\n# {{{def plot_split(split, y, ax, ranges):\ndef plot_split_plan(ranges, step, ax, marker_size, jitter_bounds):\n    p = [\n        map_genome_point_to_range_points(\n            ranges, step.start_pos.chrm, step.start_pos.start\n        ),\n        map_genome_point_to_range_points(ranges, step.end_pos.chrm, step.end_pos.end),\n    ]\n\n    if None in p:\n        return False\n\n    # some points are far outside of the printable area, so we ignore them\n    if not points_in_window(p):\n        return False\n\n    READ_TYPES_USED[\"Split-read\"] = True\n\n    y = step.info[\"INSERTSIZE\"]\n\n    # Offset y-values using jitter to avoid overlapping lines\n    y = jitter(y, bounds=jitter_bounds)\n\n    event_type = step.info[\"TYPE\"]\n    READ_TYPES_USED[event_type] = True\n    color = COLORS[event_type]\n\n    ax.plot(\n        p,\n        [y, y],\n        \":\",\n        color=color,\n        alpha=0.25,\n        lw=1,\n        marker=\"o\",\n        markersize=marker_size,\n    )\n\n\n# }}}\n\n# {{{def plot_splits(splits,\ndef plot_splits(\n    splits, ax, ranges, curr_min_insert_size, curr_max_insert_size, marker_size, jitter_bounds,\n):\n    \"\"\"Plots all SplitReads for the region\n    \"\"\"\n    plan = get_splits_plan(ranges, splits)\n\n    if not plan:\n        [curr_min_insert_size, curr_max_insert_size]\n\n    max_event, steps = plan\n\n    for step in steps:\n        plot_split_plan(ranges, step, ax, marker_size, jitter_bounds)\n\n    if not curr_min_insert_size or curr_min_insert_size > max_event:\n        curr_min_insert_size = max_event\n    if not curr_max_insert_size or curr_max_insert_size < max_event:\n        curr_max_insert_size = max_event\n\n    return [curr_min_insert_size, curr_max_insert_size]\n\n\n# }}}\n\n##Long Read methods\n# {{{class Alignment:\nclass Alignment:\n    \"\"\"container of alignment info, from CIGAR string\n\n    Contains start(int), end(int), strand(bool True=forward), query\n    position (int)\n    \"\"\"\n\n    def __init__(self, chrm, start, end, strand, query_position):\n        \"\"\"Create Alignment instance\n\n        Genomic interval is defined by start, end, and query_pos integers\n        Strand is bool (True for forward)\n        \"\"\"\n        self.pos = genome_interval(chrm, start, end)\n        self.strand = strand\n        self.query_position = query_position\n\n    def __str__(self):\n        return \",\".join(\n            [\n                str(x)\n                for x in [\n                    self.pos.chrm,\n                    self.pos.start,\n                    self.pos.end,\n                    self.strand,\n                    self.query_position,\n                ]\n            ]\n        )\n\n    def __repr__(self):\n        return \"Alignment(%s,%s,%s,%s,%s)\" % (\n            self.pos.chrm,\n            self.pos.start,\n            self.pos.end,\n            self.strand,\n            self.query_position,\n        )\n\n\n# }}}\n\n# {{{class LongRead:\nclass LongRead:\n    \"\"\"container of LongRead info\n\n    Contains start(int), end(int), list of Alignments\n    \"\"\"\n\n    def __init__(self, alignments):\n        \"\"\"Create LongRead instance\n\n        Genomic interval is defined by start, end integers\n        List of Alignments set by parameter\n        \"\"\"\n        self.alignments = alignments\n\n    def __str__(self):\n        return \",\".join([str(x) for x in self.alignments])\n\n    def __repr__(self):\n        return \"LongRead(\" + str(self) + \")\"\n\n\n# }}}\n\n# {{{def get_alignments_from_cigar(chrm,\ndef get_alignments_from_cigar(chrm, curr_pos, strand, cigartuples, reverse=False):\n    \"\"\"Breaks CIGAR string into individual Aignments\n\n    Starting point within genome given by curr_pos and strand\n    Set of CIGAR operations and lengths as pairs passed in as cigartuples\n    Direction of alignment set to reverse with reverse boolean\n\n    Return list of Alignments\n    \"\"\"\n    alignments = []\n    q_pos = 0\n    if reverse:\n        cigartuples = cigartuples[::-1]\n\n    for op, length in cigartuples:\n        if op in [CIGAR_MAP[\"M\"], CIGAR_MAP[\"=\"], CIGAR_MAP[\"X\"]]:\n            alignments.append(\n                Alignment(chrm, curr_pos, curr_pos + length, strand, q_pos)\n            )\n            curr_pos += length\n            q_pos += length\n        elif op == CIGAR_MAP[\"I\"]:\n            q_pos += length\n        elif op == CIGAR_MAP[\"D\"]:\n            curr_pos += length\n        elif op == CIGAR_MAP[\"N\"]:\n            curr_pos += length\n        elif op == CIGAR_MAP[\"S\"]:\n            q_pos += length\n    return alignments\n\n\n# }}}\n\n# {{{def get_cigartuples_from_string(cigarstring):\ndef get_cigartuples_from_string(cigarstring):\n    \"\"\"Extracts operations,lengths as tuples from cigar string\"\n\n    Returns list of tuples of [operation,length]\n    \"\"\"\n    cigartuples = []\n    for match in re.findall(r\"(\\d+)([A-Z]{1})\", cigarstring):\n        length = int(match[0])\n        op = match[1]\n        cigartuples.append((CIGAR_MAP[op], length))\n\n    return cigartuples\n\n\n# }}}\n\n# {{{def merge_alignments(min_gap, alignments):\ndef merge_alignments(min_gap, alignments):\n    \"\"\"Combines previously identified alignments if close together\n    Alignments are combined if within min_gap distance\n    Returns list of Alignments\n    \"\"\"\n\n    merged_alignments = []\n\n    for alignment in alignments:\n        if len(merged_alignments) == 0:\n            merged_alignments.append(alignment)\n        else:\n            if (\n                alignment.pos.chrm == merged_alignments[-1].pos.chrm\n                and alignment.pos.start < merged_alignments[-1].pos.end + min_gap\n            ):\n                merged_alignments[-1].pos.end = alignment.pos.end\n            else:\n                merged_alignments.append(alignment)\n    return merged_alignments\n\n\n# }}}\n\n# {{{def add_long_reads(bam_file, read, long_reads, min_event_size):\ndef add_long_reads(bam_file, read, long_reads, min_event_size, ignore_hp):\n    \"\"\"Adds a (primary, non-supplementary, long) read to the long_reads list\n\n    Read added to long_reads if within the inteval defined by ranges\n    Alignments belonging to the LongRead instance combined if within the\n    min_event_size distance apart\n    \"\"\"\n    if read.is_supplementary or read.is_secondary:\n        return\n\n    hp = 0\n\n    if not ignore_hp and read.has_tag(\"HP\"):\n        hp = int(read.get_tag(\"HP\"))\n\n    alignments = get_alignments_from_cigar(\n        bam_file.get_reference_name(read.reference_id),\n        read.pos,\n        not read.is_reverse,\n        read.cigartuples,\n    )\n\n    min_gap = min_event_size\n    merged_alignments = merge_alignments(min_gap, alignments)\n\n    read_strand = not read.is_reverse\n\n    if read.has_tag(\"SA\"):\n        for sa in read.get_tag(\"SA\").split(\";\"):\n            if len(sa) == 0:\n                continue\n\n            rname, pos, strand, cigar, mapq, nm = sa.split(\",\")\n\n            sa_pos = int(pos)\n            sa_strand = strand == \"+\"\n            strand_match = read_strand != sa_strand\n            sa_cigartuples = get_cigartuples_from_string(cigar)\n            sa_alignments = get_alignments_from_cigar(\n                rname, sa_pos, sa_strand, sa_cigartuples, reverse=strand_match\n            )\n\n            sa_merged_alignments = merge_alignments(min_gap, sa_alignments)\n\n            if len(sa_merged_alignments) > 0:\n                merged_alignments += sa_merged_alignments\n\n    if hp not in long_reads:\n        long_reads[hp] = {}\n\n    if read.query_name not in long_reads[hp]:\n        long_reads[hp][read.query_name] = []\n\n    long_reads[hp][read.query_name].append(LongRead(merged_alignments))\n\n\n# }}}\n\n# {{{def add_align_step(alignment, steps, ranges):\ndef add_align_step(alignment, steps, ranges):\n    # alignment can span ranges\n    start_range_hit_i = get_range_hit(ranges, alignment.pos.chrm, alignment.pos.start)\n    end_range_hit_i = get_range_hit(ranges, alignment.pos.chrm, alignment.pos.end)\n\n    # neither end is in range, add nothing\n    if start_range_hit_i == None and end_range_hit_i == None:\n        return\n\n    # start is not in range, use end hit\n    if start_range_hit_i == None:\n        start = genome_interval(\n            alignment.pos.chrm,\n            max(alignment.pos.start, ranges[end_range_hit_i].start),\n            max(alignment.pos.start, ranges[end_range_hit_i].start),\n        )\n        end = genome_interval(\n            alignment.pos.chrm,\n            min(alignment.pos.end, ranges[end_range_hit_i].end),\n            min(alignment.pos.end, ranges[end_range_hit_i].end),\n        )\n        steps.append(plan_step(start, end, \"LONGREAD\", info={\"TYPE\": \"Align\"}))\n    # end is not in range, use start hit\n    elif end_range_hit_i == None:\n        start = genome_interval(\n            alignment.pos.chrm,\n            max(alignment.pos.start, ranges[start_range_hit_i].start),\n            max(alignment.pos.start, ranges[start_range_hit_i].start),\n        )\n        end = genome_interval(\n            alignment.pos.chrm,\n            min(alignment.pos.end, ranges[start_range_hit_i].end),\n            min(alignment.pos.end, ranges[start_range_hit_i].end),\n        )\n        steps.append(plan_step(start, end, \"LONGREAD\", info={\"TYPE\": \"Align\"}))\n    # both are in the same range\n    elif start_range_hit_i == end_range_hit_i:\n        start = genome_interval(\n            alignment.pos.chrm,\n            max(alignment.pos.start, ranges[start_range_hit_i].start),\n            max(alignment.pos.start, ranges[start_range_hit_i].start),\n        )\n        end = genome_interval(\n            alignment.pos.chrm,\n            min(alignment.pos.end, ranges[end_range_hit_i].end),\n            min(alignment.pos.end, ranges[end_range_hit_i].end),\n        )\n        steps.append(plan_step(start, end, \"LONGREAD\", info={\"TYPE\": \"Align\"}))\n    # in different ranges\n    else:\n        start_1 = genome_interval(\n            alignment.pos.chrm,\n            max(alignment.pos.start, ranges[start_range_hit_i].start),\n            max(alignment.pos.start, ranges[start_range_hit_i].start),\n        )\n        end_1 = genome_interval(\n            alignment.pos.chrm,\n            ranges[start_range_hit_i].end,\n            ranges[start_range_hit_i].end,\n        )\n        steps.append(plan_step(start_1, end_1, \"LONGREAD\", info={\"TYPE\": \"Align\"}))\n\n        start_2 = genome_interval(\n            alignment.pos.chrm,\n            ranges[end_range_hit_i].start,\n            ranges[end_range_hit_i].start,\n        )\n        end_2 = genome_interval(\n            alignment.pos.chrm,\n            min(alignment.pos.end, ranges[end_range_hit_i].end),\n            min(alignment.pos.end, ranges[end_range_hit_i].end),\n        )\n        steps.append(plan_step(start_2, end_2, \"LONGREAD\", info={\"TYPE\": \"Align\"}))\n\n\n# }}}\n\n# {{{def get_long_read_plan(read_name, long_reads, ranges):\ndef get_long_read_plan(read_name, long_reads, ranges):\n    \"\"\"Create a plan to render a long read\n\n    Plan consists of the largest event within the read \n        (used to determine the y-axis position of read)\n        and the alignment types for plotting each Alignment within \n        LongRead.alignments Align, Duplication, Deletion, Inversion,\n        Inversion,\n        InterChrmInversion, InterChrm\n\n    Returns plan\n    \"\"\"\n\n    alignments = []\n\n    # only keep alignments that intersect a range\n    seen = {}\n\n    if read_name not in long_reads:\n        logger.error(\"Read name {} not in list of long reads\".format(read_name))\n        sys.exit(1)\n\n    for long_read in long_reads[read_name]:\n        for alignment in long_read.alignments:\n            if alignment.query_position in seen:\n                continue\n            seen[alignment.query_position] = 1\n            # check to see if any part of this alignment overlaps a plot\n            # range\n            in_range = False\n            for r in ranges:\n                if r.intersect(alignment.pos) == 0:\n                    in_range = True\n            if in_range:\n                alignments.append(alignment)\n\n    if len(alignments) <= 0:\n        return None\n    alignments.sort(key=lambda x: x.query_position)\n\n    # we set the primary strand to be the one with the longest alignment\n    # this will affect which alignment is inverted. There are clearly edge\n    # cases here that we will need to address as we get more examples\n    # of inversions\n    longest_alignment = 0\n    longest_alignment_i = -1\n    for i in range(len(alignments)):\n        l = alignments[i].pos.end - alignments[i].pos.start\n        if longest_alignment < l:\n            longest_alignment = l\n            longest_alignment_i = i\n    primary_strand = alignments[longest_alignment_i].strand\n\n    steps = []\n    # long aglinments may spill over the edges, so we will clip that starts\n    curr = alignments[0]\n\n    add_align_step(curr, steps, ranges)\n\n    for i in range(1, len(alignments)):\n        last = alignments[i - 1]\n        curr = alignments[i]\n\n        # figure out what the event is\n\n        # INTER CHROM\n        if curr.pos.chrm != last.pos.chrm:\n            if curr.strand != last.strand:\n                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n\n                end = genome_interval(curr.pos.chrm, curr.pos.end, curr.pos.end)\n\n                info = {\"TYPE\": \"InterChrmInversion\"}\n                steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n            else:\n                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n                end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)\n                info = {\"TYPE\": \"InterChrm\"}\n                steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n\n            add_align_step(curr, steps, ranges)\n        # Inversion\n        elif curr.strand != last.strand:\n            # it is possible that we have a complex even that\n            # is an inverted Duplication\n            if curr.pos.start < last.pos.end:\n                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n                end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)\n                info = {\"TYPE\": \"Deletion\"}\n                steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n            if curr.strand != primary_strand:\n                # last (primary) | curr\n                # +++++++++++++++|-------\n                #               ^.......^\n                #             end           end\n\n                # last (primary) | curr\n                # ---------------|+++++++\n                #               ^.......^\n                #             end           end\n\n                start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n                end = genome_interval(curr.pos.chrm, curr.pos.end, curr.pos.end)\n                info = {\"TYPE\": \"Inversion\"}\n                steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n            else:\n                if curr.pos.start < last.pos.end:\n                    start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n                    end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)\n                    info = {\"TYPE\": \"Duplication\"}\n                    steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n\n                # last   | curr (primary)\n                # +++++++|-------------\n                # ^.......^\n                # start   start\n\n                # last   | curr (primary)\n                # -------|+++++++++++++++\n                # ^.......^\n                # start   start\n\n                start = genome_interval(last.pos.chrm, last.pos.start, last.pos.start)\n                end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)\n                info = {\"TYPE\": \"Inversion\"}\n                steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n\n            add_align_step(curr, steps, ranges)\n        # Duplication\n        elif curr.pos.start < last.pos.end:\n            start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n            end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)\n            info = {\"TYPE\": \"Duplication\"}\n            steps.append(plan_step(start, end, \"LONGREAD\", info=info))\n            add_align_step(curr, steps, ranges)\n        # Deletion\n        else:\n            start = genome_interval(last.pos.chrm, last.pos.end, last.pos.end)\n            end = genome_interval(curr.pos.chrm, curr.pos.start, curr.pos.start)\n            info = {\"TYPE\": \"Deletion\"}\n            # steps.append(plan_step(start, end, 'LONGREAD', info=info))\n            steps.append(plan_step(start, end, \"LONGREAD\", info={\"TYPE\": \"Deletion\"}))\n            add_align_step(curr, steps, ranges)\n\n        # if either end is in a range, then add its gap to the list\n\n    max_gap = None\n\n    chrms = set([s.start_pos.chrm for s in steps] + [s.end_pos.chrm for s in steps])\n\n    # set interchrm dist to 5000\n    if len(chrms) > 1:\n        max_gap = INTERCHROM_YAXIS\n    else:\n        step_sizes = [\n            abs(step.end_pos.end - step.start_pos.start)\n            for step in steps\n            if step.info[\"TYPE\"] != \"Align\"\n            and get_range_hit(ranges, step.start_pos.chrm, step.start_pos.start) != None\n            and get_range_hit(ranges, step.end_pos.chrm, step.end_pos.end) != None\n        ]\n\n        max_gap = max(step_sizes) if len(step_sizes) > 0 else 0\n\n    plan = [max_gap, steps]\n\n    return plan\n\n\n# }}}\n\n\n##Variant methods\n# {{{def plot_variant(sv, sv_type, ax, ranges):\ndef plot_variant(sv, sv_type, ax, ranges):\n    \"\"\"Plots the variant bar at the top of the image\n\n    \"\"\"\n\n    r = [\n        map_genome_point_to_range_points(ranges, sv[0].chrm, sv[0].start),\n        map_genome_point_to_range_points(ranges, sv[-1].chrm, sv[-1].end),\n    ]\n\n    ax.plot(r, [0, 0], \"-\", color=\"black\", lw=8, solid_capstyle=\"butt\", alpha=0.5)\n\n    ax.set_xlim([0, 1])\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"bottom\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n    ax.tick_params(axis=\"x\", length=0)\n    ax.tick_params(axis=\"y\", length=0)\n    ax.set_xticklabels([])\n    ax.set_yticklabels([])\n\n    ## make SV title\n    sv_title = \"\"\n    if sv[0].chrm == sv[-1].chrm:\n        sv_size = float(sv[0].end) - float(sv[0].start)\n        if len(sv) > 1:\n            sv_size = abs(int(float(sv[0].end) - float(sv[-1].start)))\n        sv_size_unit = \"bp\"\n\n        if sv_size > 1000000:\n            sv_size = \"{0:0.2f}\".format(sv_size / 1000000.0)\n            sv_size_unit = \"mb\"\n        elif sv_size > 1000:\n            sv_size = \"{0:0.2f}\".format(sv_size / 1000.0)\n            sv_size_unit = \"kb\"\n\n        sv_title = str(sv_size) + \" \" + sv_size_unit + \" \" + sv_type\n    else:\n        sv_title = sv_type\n\n    ax.set_title(sv_title, fontsize=8)\n\n\n# }}}\n\n# {{{def plot_confidence_interval(chrm, breakpoint,ci, ax, ranges):\ndef plot_confidence_interval(chrm, breakpoint, ci, ax, ranges):\n    \"\"\"Plots a confidence interval on the variant bar\n    \"\"\"\n\n    r = [\n        map_genome_point_to_range_points(ranges, chrm, breakpoint - int(ci[0])),\n        map_genome_point_to_range_points(ranges, chrm, breakpoint + int(ci[1])),\n    ]\n    if None in r:\n        # confidence intervals are invalid\n        return\n\n    ax.plot(r, [0, 0], \"-\", color=\"black\", lw=0.5, alpha=1)\n    ax.axvline(r[0], color=\"black\", lw=0.5, alpha=1, ymin=0.40, ymax=0.60)\n    ax.axvline(r[1], color=\"black\", lw=0.5, alpha=1, ymin=0.40, ymax=0.60)\n\n    ax.set_xlim([0, 1])\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"bottom\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n    ax.tick_params(axis=\"x\", length=0)\n    ax.tick_params(axis=\"y\", length=0)\n    ax.set_xticklabels([])\n    ax.set_yticklabels([])\n\n\n# }}}\n\n# {{{def create_variant_plot(grid,\ndef create_variant_plot(grid, ax_i, sv, sv_type, ranges, start_ci, end_ci):\n    \"\"\"Plots the pieces of the variant bar at the top, including bar and\n    confidence intervals \n    \"\"\"\n    ax = plt.subplot(grid[ax_i])\n    plot_variant(sv, sv_type, ax, ranges)\n    ax_i += 1\n    # plot confidence intervals if provided\n    if start_ci and start_ci != None:\n        plot_confidence_interval(sv[0].chrm, sv[0].start, start_ci, ax, ranges)\n    if end_ci and end_ci != None:\n        plot_confidence_interval(sv[-1].chrm, sv[-1].end, end_ci, ax, ranges)\n\n    # break the variant plot when we have multiple ranges\n    for i in range(1, len(ranges)):\n        ax.axvline(x=1.0 / len(ranges), color=\"white\", linewidth=5)\n        ax.text(\n            1.0 / len(ranges),\n            0,\n            \"...\",\n            fontsize=6,\n            fontdict=None,\n            horizontalalignment=\"center\",\n        )\n\n    return ax_i\n\n\n# }}}\n\n# Linked Reads methods\n# {{{ def get_linked_plan(ranges, pairs, splits, linked_reads, gem_name):\ndef get_linked_plan(ranges, pairs, splits, linked_reads, gem_name):\n    insert_sizes = []\n\n    gem_poss = [[] for i in range(len(ranges))]\n\n    linked_pair_steps = []\n    # collect all the pairs in a gem\n    for name in linked_reads[gem_name][0]:\n        if name in pairs and len(pairs[name]) == 2:\n            pair = pairs[name]\n            plan = get_pair_plan(ranges, pair, linked_plan=True)\n            if plan:\n                insert_size, step = plan\n                insert_sizes.append(insert_size)\n                linked_pair_steps.append(step)\n\n    # collect all the splits in a gem\n    linked_split_steps = []\n    for name in linked_reads[gem_name][1]:\n        if name in splits:\n            split = splits[name]\n            plan = get_split_plan(ranges, split, linked_plan=True)\n            if plan:\n                insert_size, steps = plan\n                insert_sizes.append(insert_size)\n                linked_split_steps += steps\n\n    if len(linked_split_steps) == 0 and len(linked_pair_steps) == 0:\n        return None\n\n    for step in linked_split_steps + linked_pair_steps:\n        poss = [\n            (step.start_pos.chrm, step.start_pos.start),\n            (step.start_pos.chrm, step.start_pos.end),\n            (step.end_pos.chrm, step.end_pos.start),\n            (step.end_pos.chrm, step.end_pos.end),\n        ]\n        for pos in poss:\n            hit = get_range_hit(ranges, pos[0], pos[1])\n            if hit > -1:\n                gem_poss[hit].append(pos[1])\n\n    max_event_size = max(insert_sizes)\n\n    gem_steps = []\n\n    for i in range(len(ranges)):\n        if len(gem_poss[i]) == 0:\n            continue\n        start = genome_interval(ranges[i].chrm, min(gem_poss[i]), min(gem_poss[i]))\n        end = genome_interval(ranges[i].chrm, max(gem_poss[i]), max(gem_poss[i]))\n        gem_steps.append(plan_step(start, end, \"LINKED\"))\n\n    # if the gem extends beyond the range, then push the end pos to the\n    # end/begining of the range\n    if len(gem_steps) > 1:\n        gem_steps[0].end_pos.start = ranges[0].end\n        gem_steps[0].end_pos.end = ranges[0].end\n\n        gem_steps[1].start_pos.start = ranges[1].start\n        gem_steps[1].start_pos.end = ranges[1].start\n\n    info = {\n        \"INSERTSIZE\": max_event_size,\n        \"PAIR_STEPS\": linked_pair_steps,\n        \"SPLIT_STEPS\": linked_split_steps,\n    }\n\n    gem_steps[0].info = info\n\n    return max(insert_sizes), gem_steps\n\n\n# }}}\n\n# {{{ def plot_linked_reads(pairs,\ndef plot_linked_reads(\n    pairs,\n    splits,\n    linked_reads,\n    ax,\n    ranges,\n    curr_min_insert_size,\n    curr_max_insert_size,\n    marker_size,\n    jitter_bounds,\n):\n    \"\"\"Plots all LinkedReads for the region\n    \"\"\"\n    for linked_read in linked_reads:\n        plan = get_linked_plan(ranges, pairs, splits, linked_reads, linked_read)\n\n        if not plan:\n            continue\n\n        insert_size, steps = plan\n\n        insert_size = jitter(insert_size, bounds=jitter_bounds)\n\n        if not curr_min_insert_size or curr_min_insert_size > insert_size:\n            curr_min_insert_size = insert_size\n        if not curr_max_insert_size or curr_max_insert_size < insert_size:\n            curr_max_insert_size = insert_size\n\n        for step in steps:\n            p = [\n                map_genome_point_to_range_points(\n                    ranges, step.start_pos.chrm, step.start_pos.start\n                ),\n                map_genome_point_to_range_points(\n                    ranges, step.end_pos.chrm, step.end_pos.end\n                ),\n            ]\n            # ignore points outside window\n            if not points_in_window(p):\n                continue\n\n            READ_TYPES_USED[\"Linked read\"] = True\n\n            ax.plot(\n                p, [insert_size, insert_size], \"-\", color=\"green\", alpha=0.75, lw=0.25\n            )\n\n        for pair_step in steps[0].info[\"PAIR_STEPS\"]:\n            pair_step.info[\"INSERTSIZE\"] = insert_size\n            plot_pair_plan(ranges, pair_step, ax, marker_size, jitter_bounds)\n\n        for split_step in steps[0].info[\"SPLIT_STEPS\"]:\n            split_step.info[\"INSERTSIZE\"] = insert_size\n            plot_split_plan(ranges, split_step, ax, marker_size, jitter_bounds)\n\n    return [curr_min_insert_size, curr_max_insert_size]\n\n\n# }}}\n\n# {{{def plot_long_reads(long_reads,\ndef plot_long_reads(long_reads, ax, ranges, curr_min_insert_size, curr_max_insert_size, jitter_bounds):\n    \"\"\"Plots all LongReads for the region\n    \"\"\"\n\n    Path = mpath.Path\n\n    colors = {\n        \"Align\": \"orange\",\n        \"Deletion\": \"black\",\n        \"Inversion\": \"blue\",\n        \"Duplication\": \"red\",\n        \"InterChrm\": \"black\",\n        \"InterChrmInversion\": \"blue\",\n    }\n\n    for read_name in long_reads:\n        long_read_plan = get_long_read_plan(read_name, long_reads, ranges)\n\n        if long_read_plan is None:\n            continue\n        max_gap = long_read_plan[0]\n        steps = long_read_plan[1]\n        for step in steps:\n\n            p = [\n                map_genome_point_to_range_points(\n                    ranges, step.start_pos.chrm, step.start_pos.start\n                ),\n                map_genome_point_to_range_points(\n                    ranges, step.end_pos.chrm, step.end_pos.end\n                ),\n            ]\n\n            # some points are far outside of the printable area, so we\n            # ignore them\n            if not points_in_window(p):\n                continue\n\n            READ_TYPES_USED[\"Aligned long read\"] = True\n\n            event_type = step.info[\"TYPE\"]\n            READ_TYPES_USED[event_type] = True\n\n            if event_type == \"Align\":\n                ax.plot(\n                    p,\n                    [max_gap, max_gap],\n                    \"-\",\n                    color=colors[event_type],\n                    alpha=0.25,\n                    lw=1,\n                )\n\n                curr_max_insert_size = max(curr_max_insert_size, max_gap)\n            else:\n                x1 = p[0]\n                x2 = p[1]\n                # get offset to bend the line up\n                max_gap_offset = max(jitter(max_gap * 1.1, bounds=jitter_bounds), max_gap)\n                pp = mpatches.PathPatch(\n                    Path(\n                        [\n                            (x1, max_gap),\n                            (x1, max_gap_offset),\n                            (x2, max_gap_offset),\n                            (x2, max_gap),\n                        ],\n                        [Path.MOVETO, Path.CURVE4, Path.CURVE4, Path.CURVE4],\n                    ),\n                    fc=\"none\",\n                    color=colors[event_type],\n                    alpha=0.25,\n                    lw=1,\n                    ls=\":\",\n                )\n                ax.add_patch(pp)\n\n                # add some room for the bend line\n                curr_max_insert_size = max(curr_max_insert_size, max_gap_offset)\n\n    return [curr_min_insert_size, curr_max_insert_size]\n\n\n# }}}\n\n##Setup\n# {{{def pair(arg):\ndef pair(arg):\n    \"\"\"Defines behavior for ArgParse pairs \n\n    Pairs must be comma-separated list of two items\n    \"\"\"\n    try:\n        parsed_arg = [int(x) for x in arg.split(\",\")]\n        if len(parsed_arg) == 2:\n            return parsed_arg\n        else:\n            logger.error(\"Invalid number of pair values\")\n            sys.exit(1)\n    except Exception as e:\n        logger.error(\"Invalid pair values\")\n        print(e, file=sys.stderr)\n        sys.exit(1)\n\n\n# }}}\n\n# {{{def print_arguments(options):\ndef print_arguments(options):\n    \"\"\"Prints out the arguments to samplot as a json object\n\n    Used as metadata for PlotCritic\n    \"\"\"\n    if options.print_args or options.json_only:\n        import json\n\n        args_filename = os.path.splitext(options.output_file)[0] + \".json\"\n        args_info = {\n            \"titles\": options.titles if options.titles else \"None\",\n            \"reference\": options.reference if options.reference else \"None\",\n            \"bams\": options.bams,\n            \"output_file\": options.output_file,\n            \"start\": options.start,\n            \"end\": options.end,\n            \"chrom\": options.chrom,\n            \"window\": options.window,\n            \"max_depth\": options.max_depth if options.max_depth else \"None\",\n            \"sv_type\": options.sv_type,\n            \"transcript_file\": options.transcript_file\n            if options.transcript_file\n            else \"None\",\n        }\n        with open(args_filename, \"w\") as outfile:\n            json.dump(args_info, outfile)\n\n\n# }}}\n\n\n# {{{def setup_arguments():\ndef add_plot(parent_parser):\n    \"\"\"Defines the allowed arguments for plot function\n    \"\"\"\n    parser = parent_parser.add_parser(\n        \"plot\",\n        help=\"Plot an image of a genome region from \"\n        + \"CRAM/SAM alignments, \"\n        + \"optimized for structural variant call review\",\n    )\n\n    parser.add_argument(\n        \"-n\",\n        \"--titles\",\n        help=\"Space-delimited list of plot titles. \"\n        + \"Use quote marks to include spaces \"\n        + '(i.e. \"plot 1\" \"plot 2\")',\n        type=str,\n        nargs=\"+\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-r\",\n        \"--reference\",\n        help=\"Reference file for CRAM, required if \" + \"CRAM files used\",\n        type=str,\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-z\",\n        \"--z\",\n        type=int,\n        default=4,\n        help=\"Number of stdevs from the mean (default 4)\",\n        required=False,\n    )\n\n    def bam_file(bam):\n        if not os.path.isfile(bam):\n            parser.error(\"alignment file {} does not exist or is not a valid file\".format(bam))\n        options = [\"sam\", \"bam\", \"cram\"]\n        idx_options = [\"sai\", \"bai\", \"crai\", \"csi\"]\n        fields = os.path.splitext(bam)\n        ext = fields[1][1:].lower()\n        if ext not in options:\n            parser.error(\"alignment file {} is not in SAM/BAM/CRAM format\".format(bam))\n        idx_type = idx_options[options.index(ext)]\n        #try the type-specific index name\n        picard_bam = os.path.splitext(bam)[0]\n        if (not os.path.isfile(bam + \".\" + idx_type) and \n                not os.path.isfile(picard_bam + \".\" + idx_type)):\n            idx_type = idx_options[3]\n            #try the csi index name\n            if not os.path.isfile(bam + \".\" + idx_type):\n                parser.error(\"alignment file {} has no index\".format(bam))\n        return bam\n\n\n    parser.add_argument(\n        \"-b\",\n        \"--bams\",\n        type=bam_file,\n        nargs=\"+\",\n        help=\"Space-delimited list of BAM/CRAM file names\",\n        required=True,\n    )\n\n    parser.add_argument(\n        \"-o\", \n        \"--output_file\", \n        type=str, \n        help=\"Output file name/type. \"\n        +\"Defaults to {type}_{chrom}_{start}_{end}.png\",\n        required=False,\n    )\n    \n    parser.add_argument(\n        \"--output_dir\",\n        type=str,\n        default=\".\",\n        help=\"Output directory name. Defaults to working dir. \"\n        +\"Ignored if --output_file is set\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-s\",\n        \"--start\",\n        type=int,\n        help=\"Start position of region/variant (add multiple for translocation/BND events)\",\n        action=\"append\",\n        required=True,\n    )\n\n    parser.add_argument(\n        \"-e\",\n        \"--end\",\n        type=int,\n        help=\"End position of region/variant (add multiple for translocation/BND events)\",\n        action=\"append\",\n        required=True,\n    )\n\n    parser.add_argument(\n        \"-c\",\n        \"--chrom\", type=str,\n        help=\"Chromosome (add multiple for translocation/BND events)\",\n        action=\"append\",\n        required=True\n    )\n\n    parser.add_argument(\n        \"-w\",\n        \"--window\",\n        type=int,\n        help=\"Window size (count of bases to include \" + \"in view), default(0.5 * len)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-d\",\n        \"--max_depth\",\n        type=int,\n        help=\"Max number of normal pairs to plot\",\n        default=1,\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-t\",\n        \"--sv_type\",\n        type=str,\n        help=\"SV type. If omitted, plot is created \" + \"without variant bar\",\n        required=False,\n    )\n    \n    def gff_file(transcript_file):\n        if not os.path.isfile(transcript_file):\n            parser.error(\"transcript file {} does not exist or is not a valid file\".format(transcript_file))\n        options = [\"gff\", \"gff3\"]\n        fields = os.path.splitext(transcript_file)\n        ext = fields[1][1:]\n        if ext == \"gz\":\n            ext = os.path.splitext(fields[0])[1][1:]\n        ext = ext.lower()\n        if ext not in options:\n            parser.error(\"transcript file {} is not in GFF3 format\".format(transcript_file))\n\n        idx_file = transcript_file + \".tbi\"\n        if not os.path.isfile(idx_file):\n            idx_file = transcript_file + \".csi\"\n            if not os.path.isfile(idx_file):\n                parser.error(\"transcript file {} is missing .tbi/.csi index file\".format(transcript_file))\n        return transcript_file\n\n    parser.add_argument(\n        \"-T\", \"--transcript_file\",\n        help=\"GFF3 of transcripts\",\n        required=False,\n        type=gff_file,\n    )\n\n    parser.add_argument(\n        \"--transcript_filename\",\n        help=\"Name for transcript track\",\n        required=False,\n        type=str,\n    )\n    \n    parser.add_argument(\n        \"--max_coverage_points\",\n        help=\"number of points to plot in coverage axis (downsampled from region size for speed)\",\n        required=False,\n        type=int,\n        default=1000,\n    )\n\n    def bed_file(annotation_file):\n        if not os.path.isfile(annotation_file):\n            parser.error(\"annotation file {} does not exist or is not a valid file\".format(annotation_file))\n        fields = os.path.splitext(annotation_file)\n        ext = fields[1][1:]\n        if ext == \"gz\":\n            ext = os.path.splitext(fields[0])[1][1:]\n        ext = ext.lower()\n        if ext != \"bed\":\n            parser.error(\"annotation file {} is not in BED format\".format(annotation_file))\n\n        idx_file = annotation_file + \".tbi\"\n        if not os.path.isfile(idx_file):\n            idx_file = annotation_file + \".csi\"\n            if not os.path.isfile(idx_file):\n                parser.error(\"annotation file {} is missing .tbi index file\".format(annotation_file))\n        return annotation_file\n\n    parser.add_argument(\n        \"-A\",\n        \"--annotation_files\",\n        type=bed_file,\n        nargs=\"+\",\n        help=\"Space-delimited list of bed.gz tabixed \"\n        + \"files of annotations (such as repeats, \"\n        + \"mappability, etc.)\",\n        required=False,\n    )\n    \n    parser.add_argument(\n        \"--annotation_filenames\",\n        type=str,\n        nargs=\"+\",\n        help=\"Space-delimited list of names for the tracks in --annotation_files\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--coverage_tracktype\",\n        type=str,\n        help=\"type of track to use for low MAPQ \" + \"coverage plot.\",\n        choices=[\"stack\", \"superimpose\", \"none\"],\n        default=\"stack\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-a\",\n        \"--print_args\",\n        action=\"store_true\",\n        default=False,\n        help=\"Print commandline arguments to a json file, useful with PlotCritic\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-H\", \"--plot_height\", type=int, help=\"Plot height\", required=False\n    )\n\n    parser.add_argument(\n        \"-W\", \"--plot_width\", type=int, help=\"Plot width\", required=False\n    )\n\n    parser.add_argument(\n        \"-q\",\n        \"--include_mqual\",\n        type=int,\n        help=\"Min mapping quality of reads to be included in plot (default 1)\",\n        default=1,\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--separate_mqual\",\n        type=int,\n        help=\"coverage from reads with MAPQ <= separate_mqual \"\n        + \"plotted in lighter grey. To disable, \"\n        + \"pass in negative value\",\n        default=0,\n        required=False,\n    )\n\n    parser.add_argument(\n        \"-j\",\n        \"--json_only\",\n        action=\"store_true\",\n        default=False,\n        help=\"Create only the json file, not the \" + \"image plot\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--start_ci\",\n        help=\"confidence intervals of SV first \"\n        + \"breakpoint (distance from the \"\n        + \"breakpoint). Must be a \"\n        + \"comma-separated pair of ints (i.e. 20,40)\",\n        type=pair,\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--end_ci\",\n        help=\"confidence intervals of SV end \"\n        + \"breakpoint (distance from the \"\n        + \"breakpoint). Must be a \"\n        + \"comma-separated pair of ints (i.e. 20,40)\",\n        type=pair,\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--long_read\",\n        type=int,\n        default=1000,\n        help=\"Min length of a read to be treated as a \" + \"long-read (default 1000)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--ignore_hp\",\n        action=\"store_true\",\n        help=\"Choose to ignore HP tag in alignment files\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--min_event_size\",\n        type=int,\n        default=20,\n        help=\"Min size of an event in long-read \" + \"CIGAR to include (default 20)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--xaxis_label_fontsize\",\n        type=int,\n        default=6,\n        help=\"Font size for X-axis labels (default 6)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--yaxis_label_fontsize\",\n        type=int,\n        default=6,\n        help=\"Font size for Y-axis labels (default 6)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--legend_fontsize\",\n        type=int,\n        default=6,\n        help=\"Font size for legend labels (default 6)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--annotation_fontsize\",\n        type=int,\n        default=6,\n        help=\"Font size for annotation labels (default 6)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--hide_annotation_labels\",\n        action=\"store_true\",\n        default=False,\n        help=\"Hide the label (fourth column text) \"\n        + \"from annotation files, useful for regions \"\n        + \"with many annotations\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--coverage_only\",\n        action=\"store_true\",\n        default=False,\n        help=\"Hide all reads and show only coverage\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--max_coverage\",\n        default=0,\n        type=int,\n        help=\"apply a maximum coverage cutoff. Unlimited by default\",\n    )\n\n    parser.add_argument(\n        \"--same_yaxis_scales\",\n        action=\"store_true\",\n        default=False,\n        help=\"Set the scales of the Y axes to the \" + \"max of all\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--marker_size\",\n        type=int,\n        default=3,\n        help=\"Size of marks on pairs and splits (default 3)\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--jitter\",\n        type=float,\n        nargs=\"?\",\n        const=0.08,\n        default=0.0,\n        help=\"Add uniform random noise to insert sizes. This can be helpful \"\n             \"to resolve overlapping entries. Either a custom value (<1.0) is \"\n             \"supplied or %(const)s will be used.\"\n    )\n    parser.add_argument(\n        \"--dpi\",\n        type=int,\n        default=300,\n        help=\"Dots per inches (pixel count, default 300)\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--annotation_scalar\",\n        type=float,\n        default=.3,\n        help=\"scaling factor for the optional annotation/trascript tracks\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--zoom\",\n        type=int,\n        default=500000,\n        help=\"Only show +- zoom amount around breakpoints, \"\n            +\"much faster for large regions. \"\n            +\"Ignored if region smaller than --zoom (default 500000)\",\n        required=False,\n    )\n\n    parser.add_argument(\n        \"--debug\",\n        type=str,\n        help=\"Print debug statements\",\n        required=False\n    )\n    parser.add_argument(\n        \"--random-seed\",\n        type=int,\n        default=9999,\n        help=SUPPRESS,\n    )\n    parser.set_defaults(func=plot)\n\n\n# }}}\n\n# {{{def estimate_fragment_len(bam)\ndef estimate_fragment_len(bam, reference):\n    try:\n        if not reference:\n            bam_file = pysam.AlignmentFile(bam, \"rb\")\n        else:\n            bam_file = pysam.AlignmentFile(bam, \"rc\", reference_filename=reference)\n    except Exception as err:\n        logger.error(\"Error opening file {}\".format(bam_file))\n        print(err, file=sys.stderr)\n        sys.exit(1)\n\n    frag_lens = []\n\n    for i, read in enumerate(bam_file):\n        if i >= 10000:\n            break\n        frag_lens.append(abs(read.tlen))\n    if len(frag_lens) >= 5000:\n        return np.median(frag_lens)\n    else:\n        logger.warning(\n            \"Insufficient reads for fragment length estimate.\\nContinuing with unmodified window size\"\n        )\n        return 0\n\n\n# {{{def set_plot_dimensions(sv,\ndef set_plot_dimensions(\n    sv,\n    sv_type,\n    arg_plot_height,\n    arg_plot_width,\n    bams,\n    reference,\n    annotation_files,\n    transcript_file,\n    arg_window,\n    zoom,\n):\n    \"\"\"Chooses appropriate dimensions for the plot\n\n    Includes the number of samples, whether a variant type is included, and\n    any annotations in height. Includes the start, end, and window argument\n    in width If height and width are chosen by user, these are used instead\n\n    Return plot height, width, and window as integers\n    \"\"\"\n\n    plot_height = 5\n    plot_width = 8\n    if arg_plot_height:\n        plot_height = arg_plot_height\n    else:\n        num_subplots = len(bams)\n        if annotation_files:\n            num_subplots += 0.3 * len(annotation_files)\n        if transcript_file:\n            num_subplots += 0.3\n        plot_height = 2 + num_subplots\n\n    if arg_plot_width:\n        plot_width = arg_plot_width\n\n    window = 0\n    ranges = []\n    if arg_window:\n        window = arg_window\n\n    \"\"\" \n    Several things determine the window size. \n    1) SV is not given, window = 0\n    1) SV is given\n        1) it is directly set\n        2) it is not directly set\n           2.1) single interval SV\n           2.2) zoom set\n           2.3) 2-interval SV\n    \"\"\"\n    # if an SV type is given, then expand the window around its bounds\n    if sv_type:\n        # if the sv has one interval then set the window proportional\n        # to sv size and set one range\n        if len(sv) == 1:\n            if arg_window:\n                window = arg_window\n            else:\n                window = int((sv[0].end - sv[0].start) / 2)\n                frag_len = estimate_fragment_len(bams[0], reference)\n\n                if (0 < frag_len) and (window < 1.5 * frag_len):\n                    old_window = window\n                    window = int(1.5 * frag_len)\n                    logger.warning(\n                        \"Window size is under 1.5x the estimated fragment length \"\n                        + \"and will be resized to {}. Rerun with -w {} to override\".format(\n                            window, old_window\n                        )\n                    )\n\n            ranges = [\n                genome_interval(\n                    sv[0].chrm, max(0, sv[0].start - window), sv[0].end + window\n                )\n            ]\n\n            # if region is larger than zoom, set window to zoom and set two ranges\n            if window >= zoom:\n                window = zoom\n                ranges = [\n                    genome_interval(\n                        sv[0].chrm,\n                        max(0, sv[0].start - window),\n                        sv[0].start + window,\n                    ),\n                    genome_interval(\n                        sv[0].chrm, max(0, sv[0].end - window), sv[0].end + window\n                    ),\n                ]\n        elif len(sv) == 2:\n            if arg_window:\n                window = arg_window\n            elif zoom:\n                window = zoom\n            else:\n                window = 1000\n\n            ranges = [\n                genome_interval(\n                    sv[0].chrm, max(0, sv[0].start - window), sv[0].start + window\n                ),\n                genome_interval(\n                    sv[1].chrm, max(0, sv[1].end - window), sv[1].end + window\n                ),\n            ]\n        else:\n            logger.error(\"{} genome splits are not supported\".format(str(len(sv))))\n            sys.exit(1)\n    else:\n        ranges = [genome_interval(sv[0].chrm, sv[0].start, sv[0].end)]\n\n    return plot_height, plot_width, window, ranges\n\n\n# }}}\n\n# {{{def get_read_data(ranges,\ndef get_read_data(\n    ranges,\n    bams,\n    reference,\n    separate_mqual,\n    include_mqual,\n    coverage_only,\n    long_read_length,\n    min_event_size,\n    same_yaxis_scales,\n    max_depth,\n    z_score,\n    ignore_hp,\n):\n    \"\"\"Reads alignment files to extract reads for the region\n\n    Region and alignment files given with chrom, start, end, bams\n    If CRAM files are used, reference must be provided\n    Reads with mapping quality below include_mqual will not be retrieved\n    If coverage_only, reads are not kept and used only for checking\n    coverage Reads longer than long_read_length will be treated as long\n    reads Max coverages values will be set to same value for all samples if\n    same_yaxis_scales If max_depth, only max_depth reads will be retrieved,\n    although all will be included in coverage If PairedEnd read insert size\n    is greater than z_score standard deviations from mean, read will be\n    treated as discordant\n    \"\"\"\n\n    all_pairs = []\n    all_splits = []\n    all_coverages = []\n    all_long_reads = []\n    all_linked_reads = []\n\n    max_coverage = 0\n    haplotypes = [0, 1, 2]\n\n    for bam_file_name in bams:\n        bam_file = None\n        try:\n            if not reference:\n                bam_file = pysam.AlignmentFile(bam_file_name, \"rb\")\n            else:\n                bam_file = pysam.AlignmentFile(\n                    bam_file_name, \"rc\", reference_filename=reference\n                )\n        except Exception as err:\n            logger.error(\"This can be caused by issues with the alignment file. \"\n                    +\"Please make sure that it is sorted and indexed before trying again\")\n            print(err, file=sys.stderr)\n            sys.exit(1)\n\n        pairs = {}\n        splits = {}\n        long_reads = {}\n        coverage = {hp: {} for hp in haplotypes}\n        linked_reads = {}\n\n        for r in ranges:\n            # Define range boundries\n            range_start = max(0, r.start - 1000)\n            range_end = r.end + 1000\n\n            try:\n                bam_iter = bam_file.fetch(r.chrm, range_start, range_end)\n            except ValueError:\n                chrm = r.chrm\n                if chrm[:3] == \"chr\":\n                    chrm = chrm[3:]\n                else:\n                    chrm = \"chr\" + chrm\n                bam_iter = bam_file.fetch(chrm, range_start, range_end)\n\n            chrm = strip_chr(r.chrm)\n            if chrm not in coverage[0]:\n                for hp in haplotypes:\n                    coverage[hp][chrm] = {}\n\n            # Define a zeros matrix to hold coverage value over the range for all\n            # haplotyps. If using separate_mqual the first column will hold the coverage\n            # for high quality reads and the second column low quality reads. Otherwise\n            # all coverage will be in the second column.\n            range_len = range_end - range_start\n            range_hp_coverage = {hp: np.zeros((range_len, 2), dtype=int) for hp in haplotypes}\n\n            for read in bam_iter:\n                if (\n                    read.is_qcfail\n                    or read.is_unmapped\n                    or read.is_duplicate\n                    or int(read.mapping_quality) < include_mqual\n                ):\n                    continue\n\n                if not coverage_only:\n                    if read.query_length >= long_read_length:\n                        add_long_reads(bam_file, read, long_reads, min_event_size, ignore_hp)\n                    else:\n                        add_pair_end(bam_file, read, pairs, linked_reads, ignore_hp)\n                        add_split(read, splits, bam_file, linked_reads, ignore_hp)\n\n                # Add read coverage to the specified haplotype and column\n                hp = 0 if ignore_hp or not read.has_tag(\"HP\") else read.get_tag(\"HP\")\n                column = 0 if separate_mqual and (read.mapping_quality > separate_mqual) else 1\n                add_coverage(read, range_hp_coverage[hp], range_start, column)\n\n            # Tally the coverage for each position and updata coverage dict.\n            for hp, range_coverage in range_hp_coverage.items():\n                # Skip empty haplotypes\n                if (range_coverage.sum() == 0).all():\n                    continue\n\n                for position in range(range_start, range_end):\n                    coverage[hp][chrm][position] = list(range_coverage[position-range_start])\n\n        if (\n            len(pairs) == 0\n            and len(splits) == 0\n            and len(long_reads) == 0\n            and len(linked_reads) == 0\n        ):\n            if not coverage_only:\n                logger.warning(\n                    \"No data returned from fetch in regions {} from {}\".format(\n                        \" \".join([str(r) for r in ranges]),\n                        bam_file\n                    )\n                )\n\n        # Update max_coverage and remove any empty haplotype dict from coverage dict\n        for hp in haplotypes:\n            hp_covered = False\n            for chrm in coverage[hp]:\n                sn_coverages = [\n                    v for values in coverage[hp][chrm].values() for v in values\n                ]\n                curr_max = 0\n                if len(sn_coverages) > 0:\n                    curr_max = np.percentile(sn_coverages, 99.5)\n                if curr_max > max_coverage:\n                    max_coverage = curr_max\n\n                if sum(sn_coverages) > 0:\n                    hp_covered = True\n\n            if not hp_covered:\n                del coverage[hp]\n\n        all_coverages.append(coverage)\n        all_pairs.append(pairs)\n        all_splits.append(splits)\n        all_long_reads.append(long_reads)\n        all_linked_reads.append(linked_reads)\n\n    read_data = {\n        \"all_pairs\": all_pairs,\n        \"all_splits\": all_splits,\n        \"all_coverages\": all_coverages,\n        \"all_long_reads\": all_long_reads,\n        \"all_linked_reads\": all_linked_reads,\n    }\n\n    # Sample +/- pairs in the normal insert size range\n    if max_depth:\n        read_data[\"all_pairs\"] = downsample_pairs(\n            max_depth, z_score, read_data[\"all_pairs\"]\n        )\n    if not same_yaxis_scales:\n        max_coverage = 0\n    return read_data, max_coverage\n\n\n# }}}\n\n# {{{def downsample_pairs(max_depth, z_score, all_pairs):\ndef downsample_pairs(max_depth, z_score, all_pairs):\n    \"\"\"Downsamples to keep only max_depth normal pairs from all PairedEnd\n    reads \n    \"\"\"\n    for bam_i in range(len(all_pairs)):\n        for hp_i in all_pairs[bam_i]:\n            all_pairs[bam_i][hp_i] = sample_normal(\n                max_depth, all_pairs[bam_i][hp_i], z_score\n            )\n    return all_pairs\n\n\n# }}}\n\n# {{{def set_haplotypes(curr_coverage):\ndef set_haplotypes(curr_coverage):\n    \"\"\"Creates a list to manage counting haplotypes for subplots\n    \"\"\"\n    hps = sorted(curr_coverage.keys(), reverse=True)\n    # if there are multiple haplotypes, must have 0,1,2\n    if len(hps) > 1 or (len(hps) == 1 and hps[0] != 0):\n        if 0 not in hps:\n            hps.append(0)\n        if 1 not in hps:\n            hps.append(1)\n        if 2 not in hps:\n            hps.append(2)\n    elif 0 not in hps:\n        hps.append(0)\n    hps.sort(reverse=True)\n    return hps\n\n\n# }}}\n\n# {{{def plot_samples(ranges,\ndef plot_samples(\n    ranges,\n    read_data,\n    grid,\n    ax_i,\n    number_of_axes,\n    bams,\n    chrom,\n    coverage_tracktype,\n    titles,\n    same_yaxis_scales,\n    xaxis_label_fontsize,\n    yaxis_label_fontsize,\n    annotation_files,\n    transcript_file,\n    max_coverage_points,\n    max_coverage,\n    marker_size,\n    coverage_only,\n    jitter_bounds,\n):\n\n    \"\"\"Plots all samples\n    \"\"\"\n    max_insert_size = 0\n\n    # If jitter > 0.08 is use we need to shift the ylim a bit to not hide any entires.\n    ylim_margin = max(1.02 + jitter_bounds, 1.10)\n    for i in range(len(bams)):\n        #ax is never used, annotating this for readability\n        ax = plt.subplot(grid[ax_i])\n        hps = set_haplotypes(read_data[\"all_coverages\"][i])\n        inner_axs = gridspec.GridSpecFromSubplotSpec(\n            len(hps), 1, subplot_spec=grid[ax_i], wspace=0.0, hspace=0.5\n        )\n        axs = {}\n        for j in range(len(hps)):\n            axs[j] = plt.subplot(inner_axs[hps[j]])\n\n        curr_min_insert_size = None\n        curr_max_insert_size = 0\n\n        cover_axs = {}\n        for hp in hps:\n            curr_ax = axs[hp]\n\n            curr_splits = []\n            if hp in read_data[\"all_splits\"][i]:\n                curr_splits = read_data[\"all_splits\"][i][hp]\n\n            curr_linked_reads = []\n            if hp in read_data[\"all_linked_reads\"][i]:\n                curr_linked_reads = read_data[\"all_linked_reads\"][i][hp]\n\n            curr_long_reads = []\n            if hp in read_data[\"all_long_reads\"][i]:\n                curr_long_reads = read_data[\"all_long_reads\"][i][hp]\n\n            curr_pairs = []\n            if hp in read_data[\"all_pairs\"][i]:\n                curr_pairs = read_data[\"all_pairs\"][i][hp]\n\n            curr_coverage = {}\n            if hp in read_data[\"all_coverages\"][i]:\n                curr_coverage = read_data[\"all_coverages\"][i][hp]\n\n            cover_ax = plot_coverage(\n                curr_coverage,\n                curr_ax,\n                ranges,\n                len(hps),\n                max_coverage,\n                coverage_tracktype,\n                yaxis_label_fontsize,\n                max_coverage_points,\n            )\n\n            if len(curr_linked_reads) > 0:\n                curr_min_insert_size, curr_max_insert_size = plot_linked_reads(\n                    curr_pairs,\n                    curr_splits,\n                    curr_linked_reads,\n                    curr_ax,\n                    ranges,\n                    curr_min_insert_size,\n                    curr_max_insert_size,\n                    marker_size,\n                    jitter_bounds\n                )\n            elif len(curr_long_reads) > 0:\n                curr_min_insert_size, curr_max_insert_size = plot_long_reads(\n                    curr_long_reads,\n                    curr_ax,\n                    ranges,\n                    curr_min_insert_size,\n                    curr_max_insert_size,\n                    jitter_bounds\n                )\n            else:\n                curr_min_insert_size, curr_max_insert_size = plot_pairs(\n                    curr_pairs,\n                    curr_ax,\n                    ranges,\n                    curr_min_insert_size,\n                    curr_max_insert_size,\n                    marker_size,\n                    jitter_bounds\n                )\n\n                curr_min_insert_size, curr_max_insert_size = plot_splits(\n                    curr_splits,\n                    curr_ax,\n                    ranges,\n                    curr_min_insert_size,\n                    curr_max_insert_size,\n                    marker_size,\n                    jitter_bounds,\n                )\n\n            cover_axs[hp] = cover_ax\n            if curr_max_insert_size and (curr_max_insert_size > max_insert_size):\n                max_insert_size = curr_max_insert_size\n\n        # {{{ set axis parameters\n        # set the axis title to be either one passed in or filename\n        curr_ax = axs[hps[0]]\n        if titles and len(titles) == len(bams):\n            curr_ax.set_title(titles[i], fontsize=8, loc=\"left\")\n        else:\n            curr_ax.set_title(os.path.basename(bams[i]), fontsize=8, loc=\"left\")\n\n        if len(axs) > 1:\n            for j in axs:\n                curr_ax = axs[j]\n                fp = dict(size=8, backgroundcolor=\"white\")\n                text = \"HP: \"\n                if j == 0:\n                    text += \"Undef\"\n                else:\n                    text += str(j)\n                at = AnchoredText(\n                    text, loc=2, prop=fp, borderpad=0, pad=0, frameon=False\n                )\n                curr_ax.add_artist(at)\n\n        for j in hps:\n            curr_ax = axs[j]\n            curr_ax.set_xlim([0, 1])\n            if same_yaxis_scales:\n                curr_ax.set_ylim([0, max(1, max_insert_size * ylim_margin)])\n            else:\n                curr_ax.set_ylim([0, max(1, curr_max_insert_size * ylim_margin)])\n            curr_ax.spines[\"top\"].set_visible(False)\n            curr_ax.spines[\"bottom\"].set_visible(False)\n            curr_ax.spines[\"left\"].set_visible(False)\n            curr_ax.spines[\"right\"].set_visible(False)\n            curr_ax.tick_params(axis=\"y\", labelsize=yaxis_label_fontsize)\n            # if there's one hp, 6 ticks fit. Otherwise, do 3\n            tick_count = 6 if len(hps) == 1 else 3\n            curr_ax.yaxis.set_major_locator(ticker.LinearLocator(tick_count))\n            curr_ax.ticklabel_format(useOffset=False, style='plain')\n            \n            curr_ax.tick_params(axis=\"both\", length=0)\n            curr_ax.set_xticklabels([])\n            if coverage_only:\n                curr_ax.yaxis.set_visible(False)\n\n        last_sample_num = number_of_axes - 1\n        if annotation_files:\n            last_sample_num -= len(annotation_files)\n        if transcript_file:\n            last_sample_num -= 1\n\n        if ax_i == last_sample_num:\n            curr_ax = axs[hps[-1]]\n\n            labels = []\n            if len(ranges) == 1:\n                labels = [\n                    int(ranges[0].start + l * (ranges[0].end - ranges[0].start))\n                    for l in curr_ax.xaxis.get_majorticklocs()\n                ]\n            elif len(ranges) == 2:\n                x_ticks = curr_ax.xaxis.get_majorticklocs()\n                labels_per_range = int(\n                    len(curr_ax.xaxis.get_majorticklocs()) / len(ranges)\n                )\n                labels = [\n                    int(ranges[0].start + l * (ranges[0].end - ranges[0].start))\n                    for l in x_ticks[:labels_per_range]\n                ]\n                try:\n                    labels += [\n                        int(ranges[-1].start + l * (ranges[-1].end - ranges[-1].start))\n                        for l in x_ticks[labels_per_range:]\n                    ]\n                except Exception as e:\n                    logger.error(labels_per_range)\n                    print(e, file=sys.stderr)\n                    sys.exit(1)\n            else:\n                logger.error(\"Ranges greater than 2 are not supported\")\n                sys.exit(1)\n            \n            curr_ax.set_xticklabels(labels, fontsize=xaxis_label_fontsize)\n            chrms = [x.chrm for x in ranges]\n            curr_ax.set_xlabel(\"Chromosomal position on \" + \"/\".join(chrms), fontsize=8)\n    \n        curr_ax = axs[hps[int(len(hps) / 2)]]\n        curr_ax.set_ylabel(\"Insert size\", fontsize=8)\n        cover_ax = cover_axs[hps[int(len(hps) / 2)]]\n        cover_ax.set_ylabel(\"Coverage\", fontsize=8)\n        # }}}\n\n        ax_i += 1\n    return ax_i\n\n\n# }}}\n\n# {{{def plot_legend(fig, legend_fontsize):\ndef plot_legend(fig, legend_fontsize, marker_size):\n    \"\"\"Plots the figure legend\n    \"\"\"\n    marker_colors = []\n    marker_labels = []\n    read_colors = {\n        \"Deletion/Normal\": \"black\",\n        \"Duplication\": \"red\",\n        \"Inversion\": \"blue\",\n        \"Aligned long read\": \"orange\",\n        \"Linked read\": \"green\",\n    }\n\n    for read_type in READ_TYPES_USED:\n        if read_type in read_colors:\n            color = read_colors[read_type]\n            flag = READ_TYPES_USED[read_type]\n            if flag:\n                marker_colors.append(color)\n                marker_labels.append(read_type)\n    legend_elements = []\n\n    for color in marker_colors:\n        legend_elements += [\n            plt.Line2D([0, 0], [0, 1], color=color, linestyle=\"-\", lw=1)\n        ]\n    if READ_TYPES_USED[\"Split-read\"]:\n        marker_labels.append(\"Split read\")\n        legend_elements += [\n            plt.Line2D(\n                [0, 0],\n                [0, 1],\n                markerfacecolor=\"None\",\n                markeredgecolor=\"grey\",\n                color=\"grey\",\n                marker=\"o\",\n                markersize=marker_size,\n                linestyle=\":\",\n                lw=1,\n            )\n        ]\n\n    if READ_TYPES_USED[\"Paired-end read\"]:\n        marker_labels.append(\"Paired-end read\")\n        legend_elements += [\n            plt.Line2D(\n                [0, 0],\n                [0, 1],\n                markerfacecolor=\"None\",\n                markeredgecolor=\"grey\",\n                color=\"grey\",\n                marker=\"s\",\n                markersize=marker_size,\n                linestyle=\"-\",\n                lw=1,\n            )\n        ]\n\n    fig.legend(\n        legend_elements, marker_labels, loc=1, fontsize=legend_fontsize, frameon=False\n    )\n\n\n# }}}\n\n# {{{def create_gridspec(bams, transcript_file, annotation_files, sv_type ):\ndef create_gridspec(bams, transcript_file, annotation_files, sv_type, read_data, annotation_scalar):\n    \"\"\"Helper function for creation of a correctly-sized GridSpec instance\n    \"\"\"\n    # give one axis to display each sample\n    num_ax = len(bams)\n\n    # add another if we are displaying the SV\n    if sv_type:\n        num_ax += 1\n\n    # add another if a annotation file is given\n    if transcript_file:\n        num_ax += 1\n\n    if annotation_files:\n        num_ax += len(annotation_files)\n\n    # set the relative sizes for each\n    ratios = []\n    if sv_type:\n        ratios = [1]\n\n    for i in range(len(bams)):\n        ratios.append(len(read_data[\"all_coverages\"][i]) * 3)\n        if len(read_data[\"all_coverages\"]) > 0:\n            ratios[-1] = 9\n\n    if annotation_files:\n        ratios += [annotation_scalar] * len(annotation_files)\n    if transcript_file:\n        ratios.append(annotation_scalar * 3)\n\n    return gridspec.GridSpec(num_ax, 1, height_ratios=ratios), num_ax\n\n\n# }}}\n\n##Annotations/Transcript methods\n# {{{def get_plot_annotation_plan(ranges, annotation_file):\ndef get_plot_annotation_plan(ranges, annotation_file):\n    annotation_plan = []\n    for r in ranges:\n        itr = get_tabix_iter(r.chrm, r.start, r.end, annotation_file)\n        if not (itr):\n            continue\n        for row in itr:\n            A = row.rstrip().split()\n            A[0] = strip_chr(A[0])\n            chrm = A[0]\n            start = int(A[1])\n            end = int(A[2])\n\n            interval = genome_interval(chrm, start, end)\n\n            # check to see if any part of this alignment overlaps a plot\n            # range\n            in_range = False\n            for r in ranges:\n                if r.intersect(interval) == 0:\n                    in_range = True\n            if in_range:\n                step = plan_step(\n                    genome_interval(chrm, start, start),\n                    genome_interval(chrm, end, end),\n                    \"ANNOTATION\",\n                )\n                if len(A) > 3:\n                    try:\n                        v = float(A[3])\n                        step.event = \"FLOAT_ANNOTATION\"\n                        step.info = v\n                    except ValueError:\n                        step.event = \"STRING_ANNOTATION\"\n                        step.info = A[3]\n\n                annotation_plan.append(step)\n    return annotation_plan\n\n\n# }}}\n\n# {{{def plot_annotations(annotation_files, chrom, start, end,\ndef plot_annotations(\n    annotation_files, annotation_filenames, ranges, hide_annotation_labels, annotation_fontsize, grid, ax_i, annotation_scalar,\n):\n    \"\"\"Plots annotation information from region \n    \"\"\"\n    if not annotation_filenames:\n        annotation_filenames = []\n        for annotation_file in annotation_files:\n            annotation_filenames.append(os.path.basename(annotation_file))\n\n    for i,annotation_file in enumerate(annotation_files):\n        annotation_plan = get_plot_annotation_plan(ranges, annotation_file)\n        annotation_filename = annotation_filenames[i]\n\n        if len(annotation_plan) == 0:\n            continue\n        ax = plt.subplot(grid[ax_i])\n        ax_i += 1\n\n        for step in annotation_plan:\n            p = [\n                map_genome_point_to_range_points(\n                    ranges, step.start_pos.chrm, step.start_pos.start\n                ),\n                map_genome_point_to_range_points(\n                    ranges, step.end_pos.chrm, step.end_pos.end\n                ),\n            ]\n            # if an annotation lies outside the window, its coordinate will be None, so we trim to the window\n            if p[0] is None:\n                p[0] = 0\n            if p[1] is None:\n                p[1] = 1\n\n            if step.event == \"ANNOTATION\":\n                ax.plot(p, [0, 0], \"-\", color=\"black\", lw=5)\n            elif step.event == \"FLOAT_ANNOTATION\":\n                ax.plot(p, [0, 0], \"-\", color=str(step.info), lw=15)\n            elif step.event == \"STRING_ANNOTATION\":\n                ax.plot(p, [0, 0], \"-\", color=\"black\", lw=15)\n                if step.info and not hide_annotation_labels:\n                    ax.text(\n                        p[0],\n                        0.06,\n                        step.info,\n                        color=\"black\",\n                        fontsize=annotation_fontsize,\n                    )\n            else:\n                logger.error(\"Unsupported annotation type: {}\".format(step.event))\n                sys.exit(1)\n\n            # set axis parameters\n            ax.set_xlim([0, 1])\n            ax.spines[\"top\"].set_visible(False)\n            ax.spines[\"bottom\"].set_visible(False)\n            ax.spines[\"left\"].set_visible(False)\n            ax.spines[\"right\"].set_visible(False)\n            ax.set_title(annotation_filename, fontsize=8, loc=\"left\")\n            ax.tick_params(axis=\"x\", length=0)\n            ax.tick_params(axis=\"y\", length=0)\n            ax.set_xticklabels([])\n            ax.set_yticklabels([])\n\n\n# }}}\n\n# {{{def get_interval_range_plan_start_end(ranges, interval):\ndef get_interval_range_plan_start_end(ranges, interval):\n\n    # transcript can span ranges\n    start_range_hit_i = get_range_hit(ranges, interval.chrm, interval.start)\n    end_range_hit_i = get_range_hit(ranges, interval.chrm, interval.end)\n\n    if start_range_hit_i is None and end_range_hit_i is None:\n        for i, range_item in enumerate(ranges):\n            if (\n                (strip_chr(range_item.chrm) == strip_chr(interval.chrm))\n                and (interval.start <= range_item.start <= interval.end)\n                and (interval.start <= range_item.end <= interval.end)\n            ):\n                start_range_hit_i = i\n                end_range_hit_i = i\n\n    start = None\n    end = None\n    # neither end is in range, add nothing\n    if start_range_hit_i == None and end_range_hit_i == None:\n        return None, None\n    # start is in, end is not\n    elif end_range_hit_i == None:\n        start = genome_interval(\n            interval.chrm,\n            max(interval.start, ranges[start_range_hit_i].start),\n            max(interval.start, ranges[start_range_hit_i].start),\n        )\n        end = genome_interval(\n            interval.chrm, ranges[start_range_hit_i].end, ranges[start_range_hit_i].end\n        )\n    # end is in, start is not\n    elif start_range_hit_i == None:\n        start = genome_interval(\n            interval.chrm, ranges[end_range_hit_i].start, ranges[end_range_hit_i].start\n        )\n        end = genome_interval(\n            interval.chrm,\n            min(interval.end, ranges[end_range_hit_i].end),\n            min(interval.end, ranges[end_range_hit_i].end),\n        )\n    # in same range or in different ranges\n    else:\n        start = genome_interval(\n            interval.chrm,\n            max(interval.start, ranges[start_range_hit_i].start),\n            max(interval.start, ranges[start_range_hit_i].start),\n        )\n        end = genome_interval(\n            interval.chrm,\n            min(interval.end, ranges[end_range_hit_i].end),\n            min(interval.end, ranges[end_range_hit_i].end),\n        )\n    return start, end\n\n\n# }}}\n\n# {{{def get_transcript_plan(ranges, transcript_file):\ndef get_transcript_plan(ranges, transcript_file):\n    genes = {}\n    transcripts = {}\n    cdss = {}\n\n    for r in ranges:\n        itr = get_tabix_iter(r.chrm, r.start, r.end, transcript_file)\n        if not itr:\n            continue\n        for row in itr:\n            gene_annotation = row.rstrip().split()\n\n            if gene_annotation[2] == \"gene\":\n                info = dict(\n                    [list(val.split(\"=\")) for val in gene_annotation[8].split(\";\")]\n                )\n\n                info[\"strand\"] = gene_annotation[6] == \"+\"\n\n                if \"Name\" not in info:\n                    continue\n\n                genes[info[\"Name\"]] = [\n                    genome_interval(\n                        gene_annotation[0],\n                        int(gene_annotation[3]),\n                        int(gene_annotation[4]),\n                    ),\n                    info,\n                ]\n            elif gene_annotation[2] in [\"transcript\", \"mRNA\"]:\n                info = dict(\n                    [list(val.split(\"=\")) for val in gene_annotation[8].split(\";\")]\n                )\n                info[\"strand\"] = gene_annotation[6] == \"+\"\n\n                if info[\"Parent\"] not in transcripts:\n                    transcripts[info[\"Parent\"]] = {}\n                transcripts[info[\"Parent\"]][info[\"ID\"]] = [\n                    genome_interval(\n                        gene_annotation[0],\n                        int(gene_annotation[3]),\n                        int(gene_annotation[4]),\n                    ),\n                    info,\n                ]\n            elif gene_annotation[2] == \"CDS\":\n                info = dict(\n                    [list(val.split(\"=\")) for val in gene_annotation[8].split(\";\")]\n                )\n                info[\"strand\"] = gene_annotation[6] == \"+\"\n\n                if info[\"Parent\"] not in cdss:\n                    cdss[info[\"Parent\"]] = {}\n\n                if info[\"ID\"] not in cdss[info[\"Parent\"]]:\n                    cdss[info[\"Parent\"]][info[\"ID\"]] = []\n\n                cdss[info[\"Parent\"]][info[\"ID\"]].append(\n                    genome_interval(\n                        gene_annotation[0],\n                        int(gene_annotation[3]),\n                        int(gene_annotation[4]),\n                    )\n                )\n    transcript_plan = []\n    for gene in genes:\n        gene_id = genes[gene][1][\"ID\"]\n        if gene_id not in transcripts:\n            continue\n        for transcript in transcripts[gene_id]:\n            interval, info = transcripts[gene_id][transcript]\n            start, end = get_interval_range_plan_start_end(ranges, interval)\n\n            if not start or not end:\n                continue\n\n            step = plan_step(start, end, \"TRANSCRIPT\")\n            step.info = {\"Name\": None, \"Strand\": None, \"Exons\": None}\n            step.info[\"Name\"] = info[\"Name\"]\n            step.info[\"Strand\"] = info[\"strand\"]\n\n            exons = []\n            if transcript in cdss:\n                for cds in cdss[transcript]:\n                    for exon in cdss[transcript][cds]:\n                        start, end = get_interval_range_plan_start_end(ranges, exon)\n                        if start and end:\n                            exons.append(plan_step(start, end, \"EXON\"))\n            if len(exons) > 0:\n                step.info[\"Exons\"] = exons\n\n            transcript_plan.append(step)\n    return transcript_plan\n\n\n# }}}\n\n# {{{ def plot_transcript(transcript_file, chrom, start, end,\ndef plot_transcript(\n    transcript_file, transcript_filename, ranges, grid, annotation_fontsize, xaxis_label_fontsize, annotation_scalar,\n):\n    \"\"\"Plots a transcript file annotation\n    \"\"\"\n    if not transcript_filename:\n        transcript_filename = os.path.basename(transcript_file)\n    transcript_idx = 0\n    transcript_idx_max = 0\n    currect_transcript_end = 0\n    ax = plt.subplot(grid[-1])\n\n    transcript_plan = get_transcript_plan(ranges, transcript_file)\n\n    for step in transcript_plan:\n        p = [\n            map_genome_point_to_range_points(\n                ranges, step.start_pos.chrm, step.start_pos.start\n            ),\n            map_genome_point_to_range_points(\n                ranges, step.end_pos.chrm, step.end_pos.end\n            ),\n        ]\n        # if an annotation lies outside the window, its coordinate will be None, so we trim to the window\n        if p[0] is None:\n            p[0] = 0\n        if p[1] is None:\n            p[1] = 0\n\n        # Reset transcript index outside of current stack\n        if p[0] > currect_transcript_end:\n            transcript_idx = 0\n\n        currect_transcript_end = max(p[1], currect_transcript_end)\n\n        ax.plot(\n            p, [transcript_idx, transcript_idx], \"-\", color=\"cornflowerblue\", lw=0.5,\n            solid_capstyle=\"butt\",\n        )\n\n        # Print arrows throughout gene to show direction.\n        nr_arrows = 2 + int((p[1]-p[0])/0.02)\n        arrow_locs = np.linspace(p[0], p[1], nr_arrows)\n        arrowprops = dict(arrowstyle=\"->\", color=\"cornflowerblue\", lw=0.5,\n                          mutation_aspect=2, mutation_scale=3)\n\n        if step.info[\"Strand\"]:\n            # Add left-facing arrows\n            for arrow_loc in arrow_locs[1:]:\n                ax.annotate(\n                    \"\",\n                    xy=(arrow_loc, transcript_idx),\n                    xytext=(p[0], transcript_idx),\n                    arrowprops=arrowprops,\n                    annotation_clip=True,\n                )\n        else:\n            # Add right-facing arrows\n            for arrow_loc in arrow_locs[:-1]:\n                ax.annotate(\n                    \"\",\n                    xy=(arrow_loc, transcript_idx),\n                    xytext=(p[1], transcript_idx),\n                    arrowprops=arrowprops,\n                    annotation_clip=True,\n                )\n\n        if step.info[\"Exons\"]:\n            for exon in step.info[\"Exons\"]:\n                p_exon = [\n                    map_genome_point_to_range_points(\n                        ranges, exon.start_pos.chrm, exon.start_pos.start\n                    ),\n                    map_genome_point_to_range_points(\n                        ranges, exon.end_pos.chrm, exon.end_pos.end\n                    ),\n                ]\n                if not points_in_window(p_exon):\n                    continue\n\n                ax.plot(\n                    p_exon,\n                    [transcript_idx, transcript_idx],\n                    \"-\",\n                    color=\"cornflowerblue\",\n                    solid_capstyle=\"butt\",\n                    lw=4,\n                )\n\n        ax.text(\n            sum(p)/2,\n            transcript_idx + 0.1,\n            step.info[\"Name\"],\n            color=\"blue\",\n            fontsize=annotation_fontsize,\n            ha=\"center\"\n        )\n\n        transcript_idx += 1\n        transcript_idx_max = max(transcript_idx, transcript_idx_max)\n\n    # set axis parameters\n    ax.set_xlim([0, 1])\n    ax.set_ylim([transcript_idx_max * -0.1, 0.01+(transcript_idx_max * 1.01)])\n    ax.spines[\"top\"].set_visible(False)\n    ax.spines[\"bottom\"].set_visible(False)\n    ax.spines[\"left\"].set_visible(False)\n    ax.spines[\"right\"].set_visible(False)\n\n    ax.tick_params(axis=\"x\", length=0)\n    ax.tick_params(axis=\"y\", length=0)\n    ax.set_xticklabels([])\n    ax.set_yticklabels([])\n    ax.set_title(transcript_filename, fontsize=8, loc=\"left\")\n\n\n# }}}\n\n\n########################################################################\n# main block\n########################################################################\ndef plot(parser, options, extra_args=None):\n    \"\"\"\n    To support translocations, the SVs are specified as an array of \n    genome_interval. For now we let that array be size 1 or 2.\n    \"\"\"\n    if options.debug:\n        logger.setLevel(logging.DEBUG)\n    \n    random.seed(options.random_seed)\n    if options.print_args or options.json_only:\n        print_arguments(options)\n        if options.json_only:\n            sys.exit(0)\n\n    if options.output_file:\n        output_file = options.output_file\n    else:\n        if not os.path.isdir(options.output_dir):\n            os.mkdir(options.output_dir)\n        name_fields = [\n            options.sv_type,\n            \"-\".join(options.chrom),\n            \"-\".join([str(s) for s in options.start]),\n            \"-\".join([str(e) for e in options.end]),\n        ]\n        if options.sv_type:\n            output_file = os.path.join(options.output_dir, \"_\".join(name_fields))\n        else:\n            output_file = os.path.join(options.output_dir, \"_\".join(name_fields[1:]))\n    if (options.annotation_files \n            and options.annotation_filenames \n            and len(options.annotation_files) != len(options.annotation_filenames)):\n        logger.warning(\"annotation filenames do not match annotation files\")\n        sys.exit(1)\n\n    for bam in options.bams:\n        if \".cram\" in bam:\n            if not options.reference:\n                logger.error(\"Missing argument reference (-r/--reference) required for CRAM\")\n                sys.exit(1)\n\n    if len(options.chrom) != len(options.start) != len(options.end):\n        logger.error(\"The number of chromosomes ({}), starts ({}), and ends ({}) do not match.\".format(\n            len(options.chrom),\n            len(options.start),\n            len(options.end)\n            )\n        )\n        sys.exit()\n\n    sv = []\n    for i in range(len(options.chrom)):\n        options.chrom[i] = strip_chr(options.chrom[i])\n        sv.append(genome_interval(options.chrom[i], options.start[i], options.end[i]))\n    # set up plot\n    plot_height, plot_width, window, ranges = set_plot_dimensions(\n        sv,\n        options.sv_type,\n        options.plot_height,\n        options.plot_width,\n        options.bams,\n        options.reference,\n        options.annotation_files,\n        options.transcript_file,\n        options.window,\n        options.zoom,\n    )\n\n    marker_size = options.marker_size\n\n    # set up sub plots\n    matplotlib.rcParams.update({\"font.size\": 12})\n    fig = plt.figure(figsize=(plot_width, plot_height))\n\n    # read alignment data\n    read_data, max_coverage = get_read_data(\n        ranges,\n        options.bams,\n        options.reference,\n        options.separate_mqual,\n        options.include_mqual,\n        options.coverage_only,\n        options.long_read,\n        options.min_event_size,\n        options.same_yaxis_scales,\n        options.max_depth,\n        options.z,\n        options.ignore_hp,\n    )\n\n    # set up grid organizer\n    grid, num_ax = create_gridspec(\n        options.bams,\n        options.transcript_file,\n        options.annotation_files,\n        options.sv_type,\n        read_data,\n        options.annotation_scalar,\n    )\n    current_axis_idx = 0\n\n    # plot variant on top\n    if options.sv_type:\n        current_axis_idx = create_variant_plot(\n            grid,\n            current_axis_idx,\n            sv,\n            options.sv_type,\n            ranges,\n            options.start_ci,\n            options.end_ci,\n        )\n    if options.max_coverage:\n        max_coverage = options.max_coverage\n\n    # Plot each sample\n    current_axis_idx = plot_samples(\n        ranges,\n        read_data,\n        grid,\n        current_axis_idx,\n        num_ax,\n        options.bams,\n        options.chrom,\n        options.coverage_tracktype,\n        options.titles,\n        options.same_yaxis_scales,\n        options.xaxis_label_fontsize,\n        options.yaxis_label_fontsize,\n        options.annotation_files,\n        options.transcript_file,\n        options.max_coverage_points,\n        max_coverage,\n        marker_size,\n        options.coverage_only,\n        options.jitter,\n    )\n    # plot legend\n    plot_legend(fig, options.legend_fontsize, marker_size)\n\n    # Plot annotation files\n    if options.annotation_files:\n        plot_annotations(\n            options.annotation_files,\n            options.annotation_filenames,\n            ranges,\n            options.hide_annotation_labels,\n            options.annotation_fontsize,\n            grid,\n            current_axis_idx,\n            options.annotation_scalar,\n        )\n\n    # Plot sorted/bgziped/tabixed transcript file\n    if options.transcript_file:\n        plot_transcript(\n            options.transcript_file,\n            options.transcript_filename,\n            ranges,\n            grid,\n            options.annotation_fontsize,\n            options.xaxis_label_fontsize,\n            options.annotation_scalar,\n        )\n\n    # save\n    matplotlib.rcParams[\"agg.path.chunksize\"] = 100000\n    plt.tight_layout(pad=0.8, h_pad=0.1, w_pad=0.1)\n    try:\n        plt.savefig(output_file, dpi=options.dpi)\n    except Exception as e:\n        logger.error(\n            \"Failed to save figure {}\".format(output_file)\n        )\n        print(e)\n\n    plt.close(fig)\n# }}}\n"
  },
  {
    "path": "samplot/samplot_vcf.py",
    "content": "#!/usr/bin/env python\n# -*- coding: utf-8 -*-\n\"\"\"\nCreate samplot vcf commands to execute and generate\ncompanion HTML image browser.\n\nNote: additional arguments are passed through to samplot plot\n\"\"\"\nfrom __future__ import print_function\n\nimport argparse\nfrom collections import Counter\nimport logging\nimport operator\nimport os\nimport random\nimport sys\nimport re\n\nimport pysam\nfrom jinja2 import Environment, FileSystemLoader, select_autoescape\n\ntry:\n    from shlex import quote\nexcept ImportError:\n    from pipes import quote\n\nfrom .samplot import add_plot\n\n\nlogger = logging.getLogger(__name__)\n\ncmp_lookup = {\n    \">\": operator.gt,  # e.g. DHFC < 0.5\n    \"<\": operator.lt,\n    \"<=\": operator.le,\n    \">=\": operator.ge,\n    \"==\": operator.eq,\n    \"contains\": operator.contains,  # e.g. CSQ contains HIGH\n    \"exists\": lambda a, b: True,  # e.g. exists smoove_gene\n}\n\n\nclass Sample(object):\n    __slots__ = [\n        \"family_id\",\n        \"id\",\n        \"paternal_id\",\n        \"maternal_id\",\n        \"mom\",\n        \"dad\",\n        \"kids\",\n        \"i\",\n    ]\n\n    def __init__(self, line):\n        toks = line.rstrip().split()\n        self.family_id = toks[0]\n        self.id = toks[1]\n        self.paternal_id = toks[2]\n        self.maternal_id = toks[3]\n        self.kids = []\n        self.i = -1  # index in the vcf.\n\n    def __repr__(self):\n        return \"Sample(id:{id},paternal_id:{pid},maternal_id:{mid})\".format(\n            id=self.id, pid=self.paternal_id, mid=self.maternal_id\n        )\n\n\ndef flatten(value, sep=\",\"):\n    \"\"\"\n    >>> flatten([1,2,3,4])\n    '1,2,3,4'\n    >>> flatten((5,6))\n    '5,6'\n    >>> flatten(0.987654321)\n    '0.987654'\n    >>> flatten(7)\n    '7'\n    >>> flatten(\"flatten\")\n    'flatten'\n    \"\"\"\n    flat = None\n    # tuple or list\n    if isinstance(value, tuple) or isinstance(value, list):\n        flat = sep.join([str(i) for i in value])\n    # reformats long float values\n    elif isinstance(value, float):\n        flat = \"%.6f\" % (value,)\n    # string and int\n    else:\n        flat = str(value)\n    return flat\n\n\ndef get_format_fields(ids, variant):\n    \"\"\"\n    args:\n        ids (list) - list of FORMAT field IDs, e.g. ['AS', 'AP', 'DHFFC']\n        variant (pysam.libcbcf.VariantRecord)\n\n    returns:\n        list\n    \"\"\"\n    sample_format = []\n    for i, sample_fields in enumerate(variant.samples.values()):\n        for field_id in ids:\n            sample_field_val = flatten(sample_fields.get(field_id, \"\"))\n            if sample_field_val:\n                if len(sample_format) < i + 1:\n                    sample_format.append(\"\")\n                else:\n                    sample_format[i] += \" \"\n                sample_format[i] += \"{}={}\".format(field_id, sample_field_val)\n    return sample_format\n\n\ndef get_format_title(samples, ids, variant):\n    \"\"\"\n    args:\n        samples (list) - list of sample IDs in order of VCF annotations\n        ids (list) - list of FORMAT field IDs, e.g. ['AS', 'AP', 'DHFFC']\n        variant (pysam.libcbcf.VariantRecord)\n\n    returns:\n        dict\n    \"\"\"\n    fields = get_format_fields(ids, variant)\n    return dict(zip(samples, fields))\n\n\ndef make_plot_titles(samples, attr_values):\n    \"\"\"\n    keeping this method separate in the event we add more things to the title\n\n    args:\n        samples (list) - list of sample IDs\n        attr_values (str) - string of VCF FORMAT values\n\n    returns:\n        dict\n\n    >>> make_plot_titles(\n        ['s1', 's2', 's3'],\n            {\n                's1': 'AS=0 AP=0',\n                's2': 'AS=0 AP=1',\n                's3': 'AS=1 AP=1'\n            }\n        )\n    {\n        's1': \"'s1 AS=0 AP=0'\",\n        's2': \"'s2 AS=0 AP=1'\",\n        's3': \"'s3 AS=1 AP=1'\"\n    }\n    \"\"\"\n    plot_titles = dict()\n    for sample in samples:\n        if sample in attr_values:\n            plot_titles[sample] = quote(\"%s %s\" % (sample, attr_values[sample]))\n    return plot_titles\n\n\ndef get_overlap(\n    tabix,\n    chrom,\n    start,\n    end,\n    priority=[\"exon\", \"gene\", \"transcript\", \"cds\"],\n    no_hit=\"intergenic\",\n    fix_chr=True,\n):\n    \"\"\"\n    args:\n        tabix (pysam.libctabix.TabixFile) - open TabixFile\n        chrom (str)\n        start (int)\n        end (int)\n        priority (Optional[list]) - order of preferred region annotation\n        no_hit (Optional[str]) - use this annotation if no matches among priority\n        fix_chr (Optional[bool]) - try to fetch a region using both\n                            non-'chr' and 'chr' prefix on failures\n\n    returns:\n        str\n    \"\"\"\n    overlaps = None\n    try:\n        overlaps = set(\n            [i.split(\"\\t\")[2].lower() for i in tabix.fetch(chrom, start, end)]\n        )\n    except IndexError:\n        # probably not a gff3\n        logger.warning(\"Invalid annotation file specified for --gff3\")\n        overlaps = None\n    except ValueError:\n        if fix_chr:\n            # try removing chr\n            if chrom.startswith(\"chr\"):\n                overlaps = get_overlap(\n                    tabix, chrom[3:], start, end, priority, no_hit, False\n                )\n            # or adding chr\n            else:\n                overlaps = get_overlap(\n                    tabix,\n                    \"chr{chrom}\".format(chrom=chrom),\n                    start,\n                    end,\n                    priority,\n                    no_hit,\n                    False,\n                )\n    except:\n        # bad regions\n        logger.warning(\n            \"Error fetching {chrom}:{start}-{end}\".format(\n                chrom=chrom, start=start, end=end\n            )\n        )\n        overlaps = None\n\n    overlap = \"\"\n    if overlaps:\n        for feature in priority:\n            if feature in overlaps:\n                overlap = feature\n                break\n    else:\n        # fetching overlaps failed\n        overlap = \"unknown\"\n\n    if not overlap and no_hit:\n        overlap = no_hit\n    return overlap\n\n\ndef parse_ped(path, vcf_samples=None):\n    if path is None:\n        return {}\n    samples = []\n    look = {}\n    for line in open(path):\n        samples.append(Sample(line))\n        look[samples[-1].id] = samples[-1]\n\n    for s in samples:\n        s.dad = look.get(s.paternal_id)\n        if s.dad is not None:\n            s.dad.kids.append(s)\n        s.mom = look.get(s.maternal_id)\n        if s.mom is not None:\n            s.mom.kids.append(s)\n    # match these samples to the ones in the VCF.\n    if vcf_samples is not None:\n        result = []\n        for i, variant_sample in enumerate(vcf_samples):\n            if variant_sample not in look:\n                continue\n            result.append(next(s for s in samples if s.id == variant_sample))\n            result[-1].i = i\n        samples = result\n\n    return {s.id: s for s in samples}\n\n\ndef get_names_to_bams(bams, name_list=None):\n    \"\"\"\n    get mapping from names (read group samples) to bam paths)\n    this is useful because the VCF has the names and we'll want the bam paths\n    for those samples\n    if name_list is passed in as a parameter those will be used instead\n    \"\"\"\n    names = {}\n    if name_list:\n        if len(name_list) != len(bams):\n            logger.error(\"List of sample IDs does not match list of alignment files.\")\n            sys.exit(1)\n\n        for i, p in enumerate(bams):\n            names[name_list[i]] = p\n    else:\n        for p in bams:\n            b = pysam.AlignmentFile(p)\n            # TODO - catch specific exception\n            try:\n                names[b.header[\"RG\"][0][\"SM\"]] = p\n            except Exception as e:\n                logger.error(\"No RG field in alignment file {}\".format(p))\n                logger.error(\"Include ordered list of sample IDs to avoid this error\")\n                print(e, file=sys.stderr)\n                sys.exit(1)\n    return names\n\n\ndef tryfloat(v):\n    try:\n        return float(v)\n    except:\n        return v\n\n\ndef to_exprs(astr):\n    \"\"\"\n    an expr is just a 3-tuple of \"name\", fn, value\"\n    e.g. \"DHFFC\", operator.lt, 0.7\"\n    >>> to_exprs(\"DHFFC < 0.5 & SVTYPE == 'DEL'\")\n    [('DHFFC', <built-in function lt>, 0.5), ('SVTYPE', <built-in function eq>, 'DEL')]\n\n    >>> to_exprs(\"CSQ contains 'HIGH'\")\n    [('CSQ', <built-in function contains>, 'HIGH')]\n    \"\"\"\n    astr = (x.strip() for x in astr.strip().split(\"&\"))\n    result = []\n    for a in astr:\n        a = [x.strip() for x in a.split()]\n        if len(a) == 2:\n            assert a[1] == \"exists\", (\"bad expression\", a)\n            a.append(\"extra_arg\")\n        assert len(a) == 3, (\"bad expression\", a)\n        assert a[1] in cmp_lookup, (\n            \"comparison:\"\n            + a[1]\n            + \" not supported. must be one of:\"\n            + \",\".join(cmp_lookup.keys())\n        )\n        result.append((a[0], cmp_lookup[a[1]], tryfloat(a[2].strip(\"'\").strip('\"'))))\n    return result\n\n\ndef check_expr(vdict, expr):\n    \"\"\"\n    >>> check_expr({\"CSQ\": \"asdfHIGHasdf\"},\n            to_exprs(\"CSQ contains 'HIGH'\"))\n    True\n\n    >>> check_expr({\"CSQ\": \"asdfHIGHasdf\", \"DHFC\": 1.1},\n            to_exprs(\"CSQ contains 'HIGH' & DHFC < 0.5\"))\n    False\n\n    >>> check_expr({\"CSQ\": \"asdfHIGHasdf\", \"DHFC\": 1.1},\n            to_exprs(\"CSQ contains 'HIGH' & DHFC < 1.5\"))\n    True\n\n    >>> check_expr({\"smoove_gene\": \"asdf\"},\n            to_exprs(\"smoove_gene exists\"))\n    True\n\n    >>> check_expr({\"smooe_gene\": \"asdf\"},\n            to_exprs(\"smoove_gene exists\"))\n    False\n\n    >>> check_expr({\"smoove_gene\": \"\"},\n            to_exprs(\"smoove_gene exists\"))\n    True\n    \"\"\"\n\n    # a single set of exprs must be \"anded\"\n    for name, fcmp, val in expr:\n        # NOTE: asking for a missing annotation will return false.\n        if name not in vdict:\n            return False\n        if not fcmp(vdict[name], val):\n            return False\n    return True\n\n\ndef make_single(vdict):\n    \"\"\"\n    >>> d = {\"xx\": (1,)}\n    >>> make_single(d)\n    {'xx': 1}\n    \"\"\"\n    for k in vdict.keys():\n        if isinstance(vdict[k], tuple) and len(vdict[k]) == 1:\n            vdict[k] = vdict[k][0]\n    return vdict\n\n\ndef get_dn_row(ped_samples):\n    for s in ped_samples.values():\n        if s.mom is not None and s.dad is not None:\n            return '{title:\"de novo\", field:\"dn\"}'\n    return \"\"\n\n\ndef read_important_regions(bedfilename):\n    if not bedfilename:\n        return None\n    important_regions = {}\n    with open(bedfilename, \"r\") as bedfile:\n        for line in bedfile:\n            pos_fields = line.strip().split()\n            region_string = \"_\".join(pos_fields[1:3])\n            if pos_fields[0] not in important_regions:\n                important_regions[pos_fields[0]] = []\n            important_regions[pos_fields[0]].append(region_string)\n\n    return important_regions\n\n\ndef var_in_important_regions(important_regions, chrom, start, end, svtype):\n    if not important_regions:\n        # if no important regions are set all locations are valid\n        return True\n\n    if chrom in important_regions:\n        for region in important_regions[chrom]:\n            region_st, region_end = [int(x) for x in region.split(\"_\")]\n            if (\n                region_st <= start <= region_end\n                or region_st <= end <= region_end\n                or start <= region_st <= end\n            ):\n                return True\n\n    logger.debug(\n        \"Skipping {} at {}:{}-{}, outside important_regions coordinates\".format(\n            svtype, chrom, start, end\n        )\n    )\n    return False\n\n\ndef cram_input(bams):\n    for bam in bams:\n        if bam.endswith(\".cram\"):\n            return True\n    return False\n\n\ndef above_call_rate(gts, sample_count, min_call_rate, svtype, chrom, start, end):\n    \"\"\"\n    skips variants with call rate below min_call_rate if set\n    \"\"\"\n    if not min_call_rate:\n        return True\n\n    call_rate = (sample_count - sum(None in g for g in gts)) / sample_count\n    if min_call_rate and (call_rate < min_call_rate):\n        logger.debug(\n            (\n                \"Skipping {} at {}:{}-{}, call rate of variant \"\n                + \"({}) below min_call_rate\"\n            ).format(svtype, chrom, start, end, call_rate),\n        )\n        return False\n    return True\n\n\ndef below_max_hets(gts, max_hets, svtype, chrom, start, end):\n    \"\"\"\n    skips variants with more than max_hets heterozygotes\n    if max_hets is set\n    \"\"\"\n    if not max_hets:\n        return False\n\n    # requisite hets/hom-alts\n    het_count = sum(sum(x) >= 1 for x in gts if None not in x)\n    if het_count > max_hets:\n        logger.debug(\n            \"Skipping {} at {}:{}-{}, more than max_hets heterozygotes\".format(\n                svtype, chrom, start, end\n            )\n        )\n        return False\n    return True\n\n\ndef no_variant_found(gts, svtype, chrom, start, end):\n    \"\"\"\n    skips variants with no non-ref samples\n    \"\"\"\n    if not any(sum(x) > 0 for x in gts if None not in x):\n        logger.debug(\n            \"Skipping {} at {}:{}-{}, no samples have non-ref genotypes\".format(\n                svtype, chrom, start, end\n            )\n        )\n        return True\n    return False\n\n\ndef get_plottable_samples(\n    gts, variant, plot_all, filters, svtype, chrom, start, end,\n):\n    \"\"\"\n    gets the samples and indices for all those which need to be plotted,\n    which means passing filters and, if not plot_all, having a nonref genotype\n    \"\"\"\n    if plot_all:\n        test_idxs = [i for i, gt in enumerate(gts)]\n        if len(test_idxs) == 0:\n            logger.debug(\n                \"No samples found for {} at {}:{}-{}\".format(svtype, chrom, start, end)\n            )\n    else:\n        test_idxs = [i for i, gt in enumerate(gts) if None not in gt and sum(gt) > 0]\n        if len(test_idxs) == 0:\n            logger.debug(\n                \"No non-reference samples found for {} at {}:{}-{}\".format(\n                    svtype, chrom, start, end\n                )\n            )\n\n    test_samples = [s for i, s in enumerate(variant.samples.values()) if i in test_idxs]\n\n    # apply filters if set\n    if len(filters) == 0:\n        idxs = test_idxs\n    else:\n        idxs = []\n        odict = make_single(dict(variant.info.items()))\n        for i, ts in enumerate(test_samples):\n            vdict = odict.copy()\n            vdict.update(make_single(dict(ts.items())))\n\n            if any(check_expr(vdict, fs) for fs in filters):\n                idxs.append(test_idxs[i])\n    if len(idxs) == 0:\n        logger.debug(\n            \"No samples pass filters for {} at {}:{}-{}\".format(\n                svtype, chrom, start, end\n            )\n        )\n    return idxs, test_samples\n\n\ndef get_variant_samples(\n    idxs, vcf_samples, names_to_bams, svtype, chrom, start, end,\n):\n    \"\"\"\n    gets the samples that need to be plotted and have alignment files assigned\n    \"\"\"\n    variant_samples = []\n    for i in idxs:\n        if vcf_samples[i] in names_to_bams:\n            variant_samples.append(vcf_samples[i])\n    if len(variant_samples) == 0:\n        logger.debug(\n            (\n                \"Skipping {} at {}:{}-{}, no plottable samples \"\n                + \"with matched alignment files\"\n            ).format(svtype, chrom, start, end),\n        )\n    return variant_samples\n\n\ndef get_denovos(\n    denovo_row,\n    test_samples,\n    variant_samples,\n    ped_samples,\n    svtype,\n    chrom,\n    start,\n    end,\n    dn_only,\n):\n    \"\"\"\n    we call it a de novo if the sample passed the filters but the mom and\n    dad had homref genotypes before filtering.\n    so stringent filtering on the kid and lenient on parents.\n    \"\"\"\n    denovo_svs = []\n    if denovo_row != \"\":\n        test_sample_names = {s.name for s in test_samples}\n        for variant_sample in variant_samples:\n            sample = ped_samples[variant_sample]\n            if sample.mom is None or sample.dad is None:\n                continue\n            if (\n                sample.mom.id not in test_sample_names\n                and sample.dad.id not in test_sample_names\n            ):\n                denovo_svs.append(sample.id)\n\n    if len(denovo_svs) <= 0 and dn_only:\n        logger.debug(\n            \"Skipping {} at {}:{}-{}, dn_only selected and no de novos found\".format(\n                svtype, chrom, start, end\n            ),\n        )\n    return denovo_svs\n\n\ndef get_family_controls(\n    ped,\n    denovo_svs,\n    variant_samples,\n    ped_samples,\n    max_hets,\n    bams,\n    names_to_bams,\n    vcf_samples_set,\n):\n    \"\"\"\n    tries to find family members to use as controls for putative de novos\n    \"\"\"\n    # do DN samples first so we can see parents.\n    # TODO also need to do the non-denovos as they seem to have been forgotten\n    for variant_sample in denovo_svs + [\n        x for x in variant_samples if x not in denovo_svs\n    ]:\n        sample = ped_samples.get(variant_sample)\n        if sample is None:\n            continue\n        if (\n            sample.mom is not None\n            and sample.mom.id not in variant_samples\n            and sample.mom.id in vcf_samples_set\n        ):\n            variant_samples.append(\"mom-of-%s[%s]\" % (variant_sample, sample.mom.id))\n            bams.append(names_to_bams[sample.mom.id])\n        if (\n            sample.dad is not None\n            and sample.dad.id not in variant_samples\n            and sample.dad.id in vcf_samples_set\n        ):\n            variant_samples.append(\"dad-of-%s[%s]\" % (variant_sample, sample.dad.id))\n            bams.append(names_to_bams[sample.dad.id])\n        for kid in sample.kids:\n            if kid.id not in variant_samples and kid.id in vcf_samples_set:\n                variant_samples.append(\"kid-of-%s[%s]\" % (variant_sample, kid.id))\n                bams.append(names_to_bams[kid.id])\n            if max_hets:\n                if len(bams) > 1.5 * max_hets:\n                    break\n        if max_hets:\n            if len(bams) > 1.5 * max_hets:\n                break\n    return variant_samples, bams\n\n\ndef get_nonfamily_controls(\n    gts, vcf_samples, variant_samples, names_to_bams, min_entries, bams\n):\n    # extend with some controls:\n    hom_ref_idxs = [\n        i for i, gt in enumerate(gts) if len(gt) == 2 and gt[0] == 0 and gt[1] == 0\n    ]\n\n    if len(hom_ref_idxs) > 3:\n        random.shuffle(hom_ref_idxs)\n\n    hom_ref_samples = []\n    for i in hom_ref_idxs:\n        if vcf_samples[i] in names_to_bams:\n            hom_ref_samples.append(vcf_samples[i])\n\n    to_add_count = min_entries - len(bams)\n    bams.extend(names_to_bams[s] for s in hom_ref_samples[:to_add_count])\n    variant_samples += [\"control-sample:\" + s for s in hom_ref_samples[:to_add_count]]\n    return variant_samples, bams\n\n\ndef create_metadata(\n    variant,\n    translocation_chrom,\n    svtype,\n    sample_str,\n    n_samples,\n    annotations,\n    denovo_row,\n    denovo_svs,\n):\n    \"\"\"\n    creates a dict with the info about the SV\n    that will be used in the website\n    \"\"\"\n    data_dict = {\n        \"chrom\": variant.chrom,\n        \"chrom2\": translocation_chrom,\n        \"start\": variant.start,\n        \"end\": variant.stop,\n        \"svtype\": svtype,\n        \"svlength\": variant.stop - variant.start,\n        \"samples\": sample_str,\n        \"nsamples\": n_samples,\n    }\n    if annotations:\n        data_dict[\"overlaps\"] = get_overlap(\n            annotations, variant.chrom, variant.start, variant.stop\n        )\n    if denovo_row != \"\":\n        data_dict[\"dn\"] = \",\".join(denovo_svs)\n    return data_dict\n\n\ndef format_template(\n    variant,\n    data_dict,\n    max_entries,\n    bams,\n    variant_samples,\n    plot_titles,\n    out_dir,\n    output_type,\n    svtype,\n    downsample,\n    pass_through_args,\n):\n    \"\"\"\n    formates the template string for generation of the final command\n    \"\"\"\n    if data_dict[\"chrom2\"] is None:\n        figname_template = \"{svtype}_{chrom}_{start}_{end}.{itype}\"\n    else:\n        figname_template = \"{svtype}_{chrom}_{start}_{chrom2}_{end}.{itype}\"\n\n    fig_path = os.path.join(\n        out_dir, figname_template.format(itype=output_type, **data_dict),\n    )\n\n    if \"CIPOS\" in variant.info:\n        v = variant.info[\"CIPOS\"]\n        cipos = \"--start_ci '%s,%s'\" % (abs(int(v[0])), abs(int(v[1])))\n    else:\n        cipos = \"\"\n    if \"CIEND\" in variant.info:\n        v = variant.info[\"CIEND\"]\n        ciend = \"--end_ci '%s,%s'\" % (abs(int(v[0])), abs(int(v[1])))\n    else:\n        ciend = \"\"\n    # dynamically set Z to speed drawing and remove noise for larger events\n    z = 3\n    if variant.stop - variant.start > 2000:\n        z = 4\n    if variant.stop - variant.start > 10000:\n        z = 6\n    if variant.stop - variant.start > 20000:\n        z = 9\n    if data_dict[\"chrom2\"] is None:\n        z = 3\n\n    if max_entries:\n        bams = bams[:max_entries]\n        variant_samples = variant_samples[:max_entries]\n\n    # update titles based on FORMAT fields requested\n    title_list = list()\n    for variant_sample in variant_samples:\n        if variant_sample in plot_titles:\n            title_list.append(plot_titles[variant_sample])\n        else:\n            title_list.append(variant_sample)\n\n    start = variant.start\n    stop = variant.stop\n    start2 = None\n    stop2 = None\n\n    if data_dict[\"chrom2\"] is None:\n        template = (\n            \"samplot plot {extra_args} -z {z} -n {titles} \"\n            + \"{cipos} {ciend} {svtype} -c {chrom} -s {start} \"\n            + \"-e {end} -o {fig_path} -d {downsample} -b {bams}\"\n        )\n    else:\n        template = (\n            \"samplot plot {extra_args} -z {z} -n {titles} \"\n            + \"{cipos} {ciend} {svtype} -c {chrom} -s {start} \"\n            + \"-e {end} -o {fig_path} -d {downsample} -b {bams} \"\n            + \"-c {chrom2} -s {start2} -e {end2}\"\n        )\n        # For interchromosomal variants the 2nd breakpoint position should \n        # not be encoded in INFO/END tag although some callers still do this. \n        # Currently it is unclear if there is a good replacement. Delly uses \n        # INFO/POS2 for this, GATK-SV uses INFO/END2, dysgu uses INFO/CHR2_POS.\n        # see:  https://github.com/dellytools/delly/issues/159\n        # see: https://gatk.broadinstitute.org/hc/en-us/articles/5334587352219-How-to-interpret-SV-VCFs\n        # TODO - if the SV breakpoints are specified in the ALT field one \n        #        could use this info to get the 2nd breakpoint position\n        if \"POS2\" in variant.info:\n            start2 = variant.info[\"POS2\"]\n        elif \"END2\" in variant.info:\n            start2 = variant.info[\"END2\"]\n        elif \"CHR2_POS\" in variant.info:\n            start2 = variant.info[\"CHR2_POS\"]\n        else:\n            start2 = stop\n            # Update stop if INFO/END denotes the 2nd breakpoint\n            stop = start + 1\n\n        stop2 = start2 + 1\n        \n\n    command = template.format(\n        extra_args=\" \".join(pass_through_args),\n        bams=\" \".join(bams),\n        titles=\" \".join(title_list),\n        z=z,\n        cipos=cipos,\n        ciend=ciend,\n        svtype=\"-t \" + svtype if svtype != \"SV\" else \"\",\n        fig_path=fig_path,\n        chrom=variant.chrom,\n        start=start,\n        end=stop,\n        downsample=downsample,\n        chrom2=data_dict[\"chrom2\"],\n        start2=start2,\n        end2=stop2,\n    ) + \"\\n\"\n    return command\n\n\ndef write_site(table_data, out_dir, output_type, annotations, denovo_row):\n    # grab the template\n    env = Environment(\n        loader=FileSystemLoader(os.path.join(os.path.dirname(__file__), \"templates\")),\n        autoescape=select_autoescape([\"html\"]),\n    )\n    html_template = env.get_template(\"samplot_vcf.html\")\n    # write index.html\n    with open(\"{out_dir}/index.html\".format(out_dir=out_dir), \"w\") as fh:\n        print(\n            html_template.render(\n                data=table_data,\n                plot_type=output_type,\n                gff3=\"true\" if annotations else \"false\",\n                denovo=\"true\" if denovo_row else \"false\",\n            ),\n            file=fh,\n        )\n\n\ndef is_simply_skippable(\n    variant,\n    vcf_samples,\n    gts,\n    important_regions,\n    max_mb,\n    min_bp,\n    min_call_rate,\n    max_hets,\n    plot_all,\n    translocation_chrom,\n):\n    \"\"\"\n    checks several basic terms that could filter this variant out\n    specifically, if the variant type is INS,\n    or fails the important regions,\n    max_mb, min_bp, min_call_rate, or max_hets filters\n    \"\"\"\n    svtype = variant.info.get(\"SVTYPE\", \"SV\")\n\n    # skips variants outside important regions if those are set\n    if not var_in_important_regions(\n        important_regions, variant.chrom, variant.start, variant.stop, svtype,\n    ):\n        return True\n\n    # skips insertions\n    if svtype in (\"INS\"):\n        logger.debug(\n            \"Skipping {} at {}:{}-{}, INS type not supported\".format(\n                svtype, variant.chrom, variant.start, variant.stop\n            )\n        )\n        return True\n\n    # skips variants over max_mb length, if set\n    if max_mb and (variant.stop - variant.start > max_mb * 1000000):\n        logger.debug(\n            \"Skipping {} at {}:{}-{}, variant length greater than max_mb\".format(\n                svtype, variant.chrom, variant.start, variant.stop\n            )\n        )\n        return True\n    \n    # skips variants under min_bp, if set\n    if (variant.stop - variant.start < min_bp) and translocation_chrom is None:\n        logger.debug(\n            \"Skipping {} at {}:{}-{}, variant length shorter than min_bp\".format(\n                svtype, variant.chrom, variant.start, variant.stop\n            )\n        )\n        return True\n\n    # skips variants if the call rate is below min_call_rate, if set\n    if not above_call_rate(\n        gts,\n        len(vcf_samples),\n        min_call_rate,\n        svtype,\n        variant.chrom,\n        variant.start,\n        variant.stop,\n    ):\n        return True\n\n    # skips variants if there are more hets than max_hets, if set\n    if below_max_hets(\n        gts, max_hets, svtype, variant.chrom, variant.start, variant.stop\n    ):\n        return True\n\n    # skips variants where no sample is non-ref, if plot_all is not set\n    if not plot_all:\n        if no_variant_found(\n            gts, svtype, variant.chrom, variant.start, variant.stop\n        ):\n            return True\n\n    return False\n\n\ndef generate_commands(\n    vcf,\n    plot_all,\n    max_mb,\n    min_bp,\n    min_call_rate,\n    max_hets,\n    dn_only,\n    ped,\n    important_regions,\n    format_field_ids,\n    min_entries,\n    max_entries,\n    out_dir,\n    output_type,\n    downsample,\n    filters,\n    ped_samples,\n    denovo_row,\n    names_to_bams,\n    annotations,\n    pass_through_args,\n):\n    \"\"\"\n    for every variant in vcf, process and output plot\n    command - if and only if it passes filters\n    \"\"\"\n    commands = []\n    table_data = []\n    vcf_samples = vcf.header.samples\n    vcf_samples_set = set(vcf_samples)\n    vcf_samples_list = list(vcf_samples)\n    vcf_stats = Counter()\n\n    # Check if VCF samples match BAMs\n    if vcf_samples_set != set(names_to_bams):\n        missing_vcf_samples = vcf_samples_set - set(names_to_bams)\n        missing_bam_samples = set(names_to_bams) - vcf_samples_set\n        logger.warning(\n            \"VCF samples and BAMs do not match. \"\n            \"This may be due to different sample names in the VCF and BAMs.\"\n        )\n        if missing_vcf_samples:\n            logger.warning(\n                \"VCF samples missing from BAMs: {}\".format(\", \".join(missing_vcf_samples))\n            )\n        if missing_bam_samples:\n            logger.warning(\n                \"BAMs missing from VCF samples: {}\".format(\", \".join(missing_bam_samples))\n            )\n\n    for var_count, variant in enumerate(vcf):\n        translocation_chrom = None\n        svtype = variant.info.get(\"SVTYPE\", \"SV\")\n\n        # get genotypes\n        gts = [s.get(\"GT\", (None, None)) for s in variant.samples.values()]\n\n        # handle translocations\n        if svtype in [\"BND\", \"TRA\"]:\n            try:\n                translocation_chrom = variant.info.get(\"CHR2\")\n            except (KeyError, ValueError) as e:\n                logger.debug(e)\n                logger.info(f\"Translocation {svtype} on {variant.chrom}:{variant.start}\"\n                              \"skipped due to missing CHR2 INFO field.\")\n\n        if is_simply_skippable(\n            variant,\n            vcf_samples,\n            gts,\n            important_regions,\n            max_mb,\n            min_bp,\n            min_call_rate,\n            max_hets,\n            plot_all,\n            translocation_chrom,\n        ):\n            vcf_stats[\"Skipped\"] += 1\n            continue\n\n        # gets the list of samples to plot\n        # skips ref samples if plot_all isn't set\n        # and applies user-defined filters\n        idxs, test_samples = get_plottable_samples(\n            gts,\n            variant,\n            plot_all,\n            filters,\n            svtype,\n            variant.chrom,\n            variant.start,\n            variant.stop,\n        )\n        if len(idxs) == 0:\n            vcf_stats[\"No plottable samples\"] += 1\n            continue\n\n        # matches alignment files to variant samples\n        variant_samples = get_variant_samples(\n            idxs,\n            vcf_samples,\n            names_to_bams,\n            svtype,\n            variant.chrom,\n            variant.start,\n            variant.stop,\n        )\n        if len(variant_samples) <= 0:\n            vcf_stats[\"No plottable samples with matched BAM\"] += 1\n            continue\n\n        bams = [names_to_bams[s] for s in variant_samples]\n\n        # finds putative de novo variants\n        denovo_svs = get_denovos(\n            denovo_row,\n            test_samples,\n            variant_samples,\n            ped_samples,\n            svtype,\n            variant.chrom,\n            variant.start,\n            variant.stop,\n            dn_only,\n        )\n        if dn_only and (len(denovo_svs) <= 0):\n            vcf_stats[\"Non de novo ('--dn_only' specified)\"] += 1\n            continue\n\n        # save fields for the html.\n        n_samples = len(variant_samples)\n        # semi-colon delimited eases CSV export from HTML\n        sample_str = \";\".join(variant_samples)\n        # dict holding sample to FORMAT title string\n        plot_titles = dict()\n        if format_field_ids:\n            format_attrs = get_format_title(vcf_samples_list, format_field_ids, variant)\n            plot_titles = make_plot_titles(variant_samples, format_attrs)\n\n        # get control samples if possible\n        # try to get family members if ped is set\n        # and reference samples is ped is not set\n        if ped is not None:\n            variant_samples, bams = get_family_controls(\n                ped,\n                denovo_svs,\n                variant_samples,\n                ped_samples,\n                max_hets,\n                bams,\n                names_to_bams,\n                vcf_samples_set,\n            )\n        elif min_entries and len(bams) < min_entries:\n            variant_samples, bams = get_nonfamily_controls(\n                gts, vcf_samples, variant_samples, names_to_bams, min_entries, bams\n            )\n\n        data_dict = create_metadata(\n            variant,\n            translocation_chrom,\n            svtype,\n            sample_str,\n            n_samples,\n            annotations,\n            denovo_row,\n            denovo_svs,\n        )\n        table_data.append(data_dict)\n\n        command = format_template(\n            variant,\n            data_dict,\n            max_entries,\n            bams,\n            variant_samples,\n            plot_titles,\n            out_dir,\n            output_type,\n            svtype,\n            downsample,\n            pass_through_args,\n        )\n        commands.append(command)\n\n\n    logger.debug(\"VCF entry count: {}\".format(var_count + 1))\n    if vcf_stats:\n        logger.debug(\"VCF entrys filtered out: {}\".format(sum(vcf_stats.values())))\n        for reason, count in vcf_stats.items():\n            logger.debug(\" - {}: {}\".format(reason, count))\n\n    return commands, table_data\n\n\ndef run_plot_command(command_string: str):\n    # Setup a parser for translating the command_string\n    parent_parser = argparse.ArgumentParser()\n    sub_parser = parent_parser.add_subparsers(title=\"[sub-commands]\", dest=\"command\")\n    add_plot(sub_parser)\n\n    # Convert command_string to list and remove leading 'samplot' argument\n    # Taken from https://stackoverflow.com/a/524796.\n    # NOTE: If python2 is dropped, `shlex.split` could be used for simpler syntax\n    command = [p.strip(\"'\") for p in re.split(\"( |\\\\\\\".*?\\\\\\\"|'.*?')\", command_string.strip()) if p.strip()]\n    command = command[1:]\n\n    # Skipped parse_known_args here since extra_args are not used in `samplot plot`.\n    # This means that any fauly extra arguments given to `samplot vcf` will raise\n    # and error here\n    args = parent_parser.parse_args(command)\n    args.func(parent_parser, args)\n\n\ndef vcf(parser, args, pass_through_args):\n    \"\"\"\n    Generate commands and html for plotting/reviewing variants from VCF\n    \"\"\"\n    if args.debug:\n        logger.setLevel(logging.DEBUG)\n\n    if args.dn_only and not args.ped:\n        logger.error(\"Missing --ped, required when using --dn_only\")\n        sys.exit(1)\n\n    if cram_input(args.bams):\n        if \"-r\" not in pass_through_args and \"--reference\" not in pass_through_args:\n            logger.error(\n                \"ERROR: missing reference file required for CRAM. \"\n                + \"Use -r option. (Run `samplot.py -h` for more help)\"\n            )\n            sys.exit(1)\n\n    vcf = pysam.VariantFile(args.vcf)\n    vcf_samples = vcf.header.samples\n\n    annotations = None\n    if args.gff3:\n        annotations = pysam.TabixFile(args.gff3)\n\n    filters = [to_exprs(f) for f in args.filter]\n\n    ped_samples = parse_ped(args.ped, vcf_samples)\n\n    # this is empty unless we have a sample with both parents defined.\n    denovo_row = get_dn_row(ped_samples)\n\n    if not os.path.exists(args.out_dir):\n        os.makedirs(args.out_dir)\n\n    # connect the sample IDs to bam files\n    names_to_bams = get_names_to_bams(args.bams, args.sample_ids)\n\n    # check that at least one sample is can be plotted  \n    if not any(vcf_sample in names_to_bams for vcf_sample in vcf_samples):\n        other = \"'--sample_ids'\" if args.sample_ids else \"BAM\"\n        logger.error(\"Samples in VCF do not match samples specified in {}\".format(other))\n        logger.error(\"VCF samples: {}\".format(', '.join(vcf_samples)))\n        logger.error(\"{} samples: {}\".format(other, ', '.join(vcf_samples)))\n        sys.exit(1)\n\n    # if important regions are included, load those intervals\n    # and only show SVs inside them\n    important_regions = read_important_regions(args.important_regions)\n\n    # user-requested FORMAT fields to add to plot title\n    format_field_ids = None\n    if args.format:\n        format_field_ids = args.format.split(\",\")\n\n    # for every variant in vcf, process and output plot\n    # command - if and only if it passes filters\n    commands, table_data = generate_commands(\n        vcf,\n        args.plot_all,\n        args.max_mb,\n        args.min_bp,\n        args.min_call_rate,\n        args.max_hets,\n        args.dn_only,\n        args.ped,\n        important_regions,\n        format_field_ids,\n        args.min_entries,\n        args.max_entries,\n        args.out_dir,\n        args.output_type,\n        args.downsample,\n        filters,\n        ped_samples,\n        denovo_row,\n        names_to_bams,\n        annotations,\n        pass_through_args,\n    )\n\n    write_site(table_data, args.out_dir, args.output_type, annotations, denovo_row)\n\n    if args.manual_run:\n        with open(args.command_file, \"w\") as outfile:\n            outfile.writelines(commands)\n    else:\n        if args.threads == 1:\n            for command in commands:\n                run_plot_command(command)\n        else:\n            from multiprocessing import Pool\n            with Pool(processes=args.threads) as pool:\n                pool.map(run_plot_command, commands)\n\n\ndef add_vcf(parent_parser):\n    \"\"\"Defines allowed arguments for samplot's vcf plotter\n    \"\"\"\n    import doctest\n\n    parser = parent_parser.add_parser(\n        \"vcf\",\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter,\n        help=\"Generates commands to plot images with `samplot plot`,\"\n        + \" using VCF file to define regions\",\n    )\n\n    if len(sys.argv) > 1 and sys.argv[1] == \"test\":\n        r = doctest.testmod()\n        print(r)\n        sys.exit(r.failed)\n\n    parser.add_argument(\"--vcf\", \"-v\", help=\"VCF file containing structural variants\")\n    parser.add_argument(\n        \"-d\", \"--out-dir\", help=\"path to write output images\", default=\"samplot-out\",\n    )\n    parser.add_argument(\n        \"--ped\", help=\"path to ped (or .fam) file\",\n    )\n    parser.add_argument(\n        \"--dn_only\",\n        help=\"plots only putative de novo variants (PED file required)\",\n        action=\"store_true\",\n    )\n    parser.add_argument(\n        \"--min_call_rate\",\n        type=float,\n        help=\"only plot variants with at least this call-rate\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--filter\",\n        action=\"append\",\n        help=\"simple filter that samples\"\n        + \" must meet. Join multiple filters with '&' \"\n        + \"and specify --filter multiple times for 'or'\"\n        + \" e.g. DHFFC < 0.7 & SVTYPE = 'DEL'\",\n        default=[],\n    )\n    parser.add_argument(\n        \"-O\",\n        \"--output_type\",\n        choices=(\"png\", \"pdf\", \"eps\", \"jpg\"),\n        help=\"type of output figure\",\n        default=\"png\",\n    )\n    parser.add_argument(\n        \"--max_hets\",\n        type=int,\n        help=\"only plot variants with at most this many heterozygotes\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--min_entries\",\n        type=int,\n        help=\"try to include homref samples as controls to get this many samples in plot\",\n        default=6,\n        required=False,\n    )\n    parser.add_argument(\n        \"--max_entries\",\n        type=int,\n        help=\"only plot at most this many heterozygotes\",\n        default=10,\n        required=False,\n    )\n    parser.add_argument(\n        \"--max_mb\",\n        type=int,\n        help=\"skip variants longer than this many megabases\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--min_bp\",\n        type=int,\n        help=\"skip variants shorter than this many bases\",\n        default=20,\n    )\n    parser.add_argument(\n        \"--important_regions\",\n        help=\"only report variants that overlap regions in this bed file\",\n        required=False,\n    )\n    parser.add_argument(\n        \"-b\",\n        \"--bams\",\n        type=str,\n        nargs=\"+\",\n        help=\"Space-delimited list of BAM/CRAM file names\",\n        required=True,\n    )\n    parser.add_argument(\n        \"--sample_ids\",\n        type=str,\n        nargs=\"+\",\n        help=\"Space-delimited list of sample IDs, \"\n        + \"must have same order as BAM/CRAM file names. \"\n        + \"BAM RG tag required if this is omitted.\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--command_file\",\n        help=\"store commands in this file.\",\n        default=\"samplot_vcf_cmds.tmp\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--format\",\n        default=\"AS,AP,DHFFC\",\n        help=\"comma separated list of FORMAT fields to include in sample plot title\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--gff3\",\n        help=\"genomic regions (.gff with .tbi in same directory) \"\n        + \"used when building HTML table and table filters\",\n        required=False,\n    )\n    parser.add_argument(\n        \"--downsample\", help=\"Number of normal reads/pairs to plot\", default=1, type=int\n    )\n    parser.add_argument(\n        \"--manual_run\",\n        help=\"disables auto-run for the plotting commands\",\n        default=False,\n        action=\"store_true\",\n    )\n    parser.add_argument(\n        \"--plot_all\",\n        help=\"plots all samples and all variants - \"\n        + \"limited by any filtering arguments set\",\n        default=False,\n        action=\"store_true\",\n    )\n    parser.add_argument(\n        \"-t\", \"--threads\",\n        type=int,\n        default=1,\n        help=\"Number of threads to use to generate plots. Default: %(default)s\",\n    )\n    parser.add_argument(\n        \"--debug\",\n        help=\"prints out the reason for skipping any skipped variant entry\",\n        default=False,\n        action=\"store_true\",\n    )\n\n    parser.set_defaults(func=vcf)\n\n\nif __name__ == \"__main__\":\n    print(\"Run as samplot module with `samplot vcf`\")\n"
  },
  {
    "path": "samplot/templates/samplot_vcf.html",
    "content": "<!DOCTYPE html>\n<html lang='en'>\n\n<head>\n    <meta charset='utf-8'>\n    <title>samplot</title>\n\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/d3/5.9.2/d3.min.js\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/crossfilter2/1.4.7/crossfilter.min.js\"\n        type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/dc/3.0.12/dc.min.js\" type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js\" type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.bundle.min.js\"\n        type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/viewerjs/1.5.0/viewer.min.js\" type=\"text/javascript\"></script>\n    <script src=\"https://cdn.datatables.net/v/bs4/dt-1.10.20/b-1.6.1/b-html5-1.6.1/sc-2.0.1/sl-1.3.1/datatables.min.js\"\n        type=\"text/javascript\"></script>\n\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/viewerjs/1.5.0/viewer.min.css\" rel=\"stylesheet\"\n        type=\"text/css\" />\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.8.2/css/all.min.css\" rel=\"stylesheet\"\n        type=\"text/css\" />\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css\" rel=\"stylesheet\"\n        type=\"text/css\" />\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/dc/3.0.12/dc.min.css\" rel=\"stylesheet\" type=\"text/css\" />\n    <link href=\"https://cdn.datatables.net/v/bs4/dt-1.10.20/b-1.6.1/b-html5-1.6.1/sc-2.0.1/sl-1.3.1/datatables.min.css\"\n        rel=\"stylesheet\" type=\"text/css\" />\n\n    <style type=\"text/css\">\n        #filter-menu .dropdown-menu {\n            min-height: 100px;\n            max-height: 100vh;\n            overflow-y: auto;\n            overflow-x: hidden;\n            background-color: #edf0f2;\n        }\n\n        span.no-show {\n            display: none;\n        }\n\n        span.show-ellipsis:after {\n            content: \"...\";\n        }\n\n        .datatable-info {\n            font-size: .9em;\n        }\n\n        #variant-table_info {\n            padding-top: 8px;\n        }\n\n        table.dataTable thead th.sorting:after,\n        table.dataTable thead th.sorting_asc:after,\n        table.dataTable thead th.sorting_desc:after,\n        table.dataTable thead th.sorting:before,\n        table.dataTable thead th.sorting_asc:before,\n        table.dataTable thead th.sorting_desc:before {\n            font-family: FontAwesome !important;\n        }\n\n        .modal-content {\n            width: 610px;\n        }\n\n        h7 {\n            font-size: .95rem;\n        }\n\n        body {\n            height: 100vh\n        }\n\n        div.dts div.dataTables_scrollBody {\n            background: white;\n        }\n    </style>\n</head>\n\n<body>\n    <nav class=\"navbar navbar-dark bg-dark p-0 pl-2\">\n        <a class=\"navbar-brand text-light p-0\" href=\"https://github.com/ryanlayer/samplot\">samplot</a>\n    </nav>\n\n    <div class=\"modal fade\" id=\"filter-modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"filter-modal\"\n        aria-hidden=\"true\">\n        <div class=\"modal-dialog\" role=\"document\">\n            <div class=\"modal-content\">\n                <div class=\"modal-header\">\n                    <div class=\"flex-column\">\n                        <h5 class=\"modal-title\" id=\"filter-modal\">Filters</h5>\n                        <h7 class=\"pl-2 text-secondary\" id=\"variant-count\">\n                            <a href=\"javascript:dc.filterAll(); dc.renderAll();\">Reset All</a>\n                        </h7>\n                    </div>\n                </div>\n                <div class=\"modal-body\">\n                    <div class=\"container\">\n                        <div class=\"row pt-2\">\n                            <div class=\"col\">\n                                <h5>Sample</h5>\n                            </div>\n                        </div>\n                        <div class=\"row pb-3\">\n                            <div class=\"col-12\">\n                                <div id=\"sample-search\"></div>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"nsamples-chart\">\n                            <div class=\"col-4\">\n                                <h5># of Samples</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:nsamplesChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"size-chart\">\n                            <div class=\"col-4\">\n                                <h5>Size</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:sizeChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"type-chart\">\n                            <div class=\"col-4\">\n                                <h5>SV Type</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:typeChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"chrom-chart\">\n                            <div class=\"col-4\">\n                                <h5>Chromosome</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:chromChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"overlaps-chart\" hidden>\n                            <div class=\"col-4\">\n                                <h5>SV Overlaps</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:overlapsChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                    </div>\n                </div>\n                <div class=\"modal-footer\">\n                    <button type=\"button\" class=\"btn btn-outline-secondary\" data-dismiss=\"modal\"\n                        onclick=\"javascript:dc.filterAll(); dc.renderAll();\"\n                        title=\"Clear selection and close\">Cancel</button>\n                    <button type=\"button\" class=\"btn btn-primary\" data-dismiss=\"modal\"\n                        title=\"Apply filters\">Apply</button>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <div class=\"container-fluid h-90\">\n        <div class=\"row\" id=\"variant-table-placeholder\">\n            <div class=\"col-12\">\n                <div style=\"height:415px\">\n                    <div class=\"d-flex justify-content-center align-items-center text-muted h-100\">\n                        <div class=\"d-flex flex-column\">\n                            <i class=\"fas fa-10x fa-table\"></i>\n                        </div>\n                    </div>\n                </div>\n            </div>\n        </div>\n\n        <div class=\"row pb-1\" id=\"variant-table-div\" hidden>\n            <div class=\"col-12\">\n                <div class=\"table-responsive\">\n                    <table id=\"variant-table\" class=\"table table-hover display nowrap\" width=\"100%\"></table>\n                </div>\n            </div>\n        </div>\n    </div>\n</body>\n\n<script>\n    const data = {{ data|tojson }}\n    const plot_type = \"{{plot_type}}\"\n    const annotation = {{ gff3 }}\n    const denovo = {{ denovo }}\n\n    dc.config.defaultColors(d3.schemeSet1)\n\n    // plot constraints\n    const plotw = 585\n    const ploth = 150\n\n    // table filters\n    var searchInput = dc.textFilterWidget(\"#sample-search\")\n    var nsamplesChart = dc.barChart(\"#nsamples-chart\")\n    var sizeChart = dc.barChart(\"#size-chart\")\n    var typeChart = dc.barChart(\"#type-chart\")\n    var chromChart = dc.barChart(\"#chrom-chart\")\n    var overlapsChart\n    // shows filter impact in modal header\n    var variantCount = dc.dataCount(\"#variant-count\")\n\n    // used to access filtered table data\n    var chromDimension\n    // datatables obj\n    var variant_table\n    // crossfilter obj\n    var ndx\n\n    $('#filter-modal').on('hidden.bs.modal', function () {\n        update_table()\n    })\n\n    const table_click = (selection, table) => {\n        table.$('tr.selected').removeClass('selected')\n        $(selection).addClass('selected')\n        let current = $('tr.selected')\n        let next = current.next()\n        let prev = current.prev()\n\n        let row = table.rows('.selected').data()[0]\n        let img = new Image()\n        img.src = `${row.svtype}_${row.chrom}_${row.start}_${row.end}.${plot_type}`\n        if ( row.chrom2) {\n            img.src = `${row.svtype}_${row.chrom}_${row.start}_${row.chrom2}_${row.end}.${plot_type}`\n        }\n        img.onerror = function(){\n           alert(`${img.src} not found`);\n        }\n        let viewer = new Viewer(img, {\n            hidden: function () {\n                viewer.destroy()\n            },\n            title: function () {\n                return `${row.svtype} on chromosome ${row.chrom} at ${row.start}-${row.end}`\n            },\n            toolbar: {\n                zoomIn: 4,\n                zoomOut: 4,\n                oneToOne: 4,\n                reset: 4,\n                prev: {\n                    show: prev.length > 0 ? true : false,\n                    size: \"large\",\n                    click: function () {\n                        viewer.destroy()\n                        table_click(prev, table)\n                    }\n                },\n                play: { show: false },\n                next: {\n                    show: next.length > 0 ? true : false,\n                    size: \"large\",\n                    click: function () {\n                        viewer.destroy()\n                        table_click(next, table)\n                    }\n                },\n                rotateLeft: { show: false },\n                rotateRight: { show: false },\n                flipHorizontal: { show: false },\n                flipVertical: { show: false },\n            },\n            transition: false,\n            navbar: false,\n        })\n        viewer.show()\n    }\n\n    function build_table(data) {\n        // hide the placeholder and show the datatable\n        d3.select('#variant-table-placeholder').property(\"hidden\", true)\n        d3.select('#variant-table-div').property(\"hidden\", false)\n\n        let cols = [\n            { data: 'chrom', title: 'Chrom' },\n            { data: 'start', title: 'Start' },\n            { data: 'end', title: 'End' },\n            { data: 'svlength', title: 'Size' },\n            { data: 'svtype', title: 'SV Type' },\n            { data: 'nsamples', title: '# of Samples' },\n            { data: 'samples', title: 'Samples' },\n        ]\n        if (annotation) {\n            d3.select('#overlaps-chart').property(\"hidden\", false)\n            cols.push({ data: 'overlaps', title: 'Overlaps' })\n        }\n        if (denovo) {\n            cols.push({ data: 'dn', title: 'De novo' })\n        }\n\n        variant_table = $(\"#variant-table\").DataTable({\n            data: data,\n            columns: cols,\n            deferRender: true,\n            scrollY: '80vh',\n            scrollCollapse: true,\n            scroller: true,\n            info: true,\n            buttons: [\n                'copyHtml5', 'csvHtml5'\n            ],\n            infoCallback: (oSettings, iStart, iEnd, iMax, iTotal, sPre) => {\n                return `\n            <span class=\"datatable-info\">\n                <span class=\"pr-2\">Showing <b>${iStart}</b> - <b>${iEnd}</b> of <b>${iTotal}</b> records</span>\n                <button type=\"button\" class=\"btn btn-primary btn-sm\" data-toggle=\"modal\" data-target=\"#filter-modal\" title=\"Show filters\">\n                    <span class=\"fas fa-filter\"></span>\n                </button>\n                <span class=\"dropup\">\n                    <button type=\"button\" class=\"btn btn-sm btn-primary dropdown-toggle\" id=\"download-menu\" title=\"Save table\" data-toggle=\"dropdown\" aria-haspopup=\"true\" aria-expanded=\"false\">\n                        <span class=\"fas fa-save\"></span>\n                    </button>\n                    <span class=\"dropdown-menu\" aria-labelledby=\"download-menu\">\n                        <h6 class=\"dropdown-header\">Save ${iTotal} rows as:</h6>\n                        <button class=\"dropdown-item\" type=\"button\" id=\"csv-button-download\" onclick=\"csv_button_click()\">\n                            CSV\n                        </button>\n                        <button class=\"dropdown-item\" type=\"button\" id=\"copy-button-download\" onclick=\"copy_button_click()\">\n                            Copy\n                        </button>\n                    </span>\n                </span>\n            </span>\n            `\n            },\n            columnDefs: [\n                {\n                    targets: (annotation ? [0, 1, 2, 3, 4, 5, 7] : [0, 1, 2, 3, 4, 5]),\n                    width: '15%'\n                },\n                // https://datatables.net/blog/2016-02-26\n                {\n                    targets: 6,\n                    render: function (data, type, row) {\n                        if (type === 'display' && data != null) {\n                            data = data.replace(/<(?:.|\\n)*?>/gm, '');\n                            if (data.length > 40) {\n                                return '<span class=\"show-ellipsis\">' + data.substr(0, 40) + '</span><span class=\"no-show\">' + data.substr(40) + '</span>';\n                            } else {\n                                return data;\n                            }\n                        } else {\n                            return data;\n                        }\n                    }\n                }\n            ],\n            // search is applied using crossfilter\n            searching: false,\n            lengthChange: false,\n            order: [[0, 'asc'], [1, 'asc']],\n        })\n\n        // register table clicks on sample_column\n        variant_table.on('click', 'tr', function () {\n            table_click(this, variant_table)\n        })\n    }\n\n    function csv_button_click() {\n        variant_table.button('.buttons-csv').trigger()\n    }\n\n    function copy_button_click() {\n        variant_table.button('.buttons-copy').trigger()\n    }\n\n    function update_table() {\n        variant_table.clear()\n        variant_table.rows.add(chromDimension.top(Infinity))\n        variant_table.draw()\n    }\n\n    function remove_empty_bins(source_group) {\n        return {\n            all: function () {\n                return source_group.all().filter(function (d) {\n                    return d.value != 0\n                })\n            }\n        }\n    }\n\n    // https://jsfiddle.net/gordonwoodhull/g34Ldwaz/8/\n    // https://github.com/dc-js/dc.js/issues/348\n    function index_group(group) {\n        return {\n            all: function () {\n                return group.all().map(function (kv, i) {\n                    return { key: i, value: kv.value }\n                })\n            }\n        }\n    }\n\n    $(document).ready(function () {\n\n        ndx = crossfilter(data)\n        var all = ndx.groupAll()\n\n        chromDimension = ndx.dimension((d) => { return d.chrom })\n        build_table(chromDimension.top(Infinity))\n        var chromGroup = chromDimension.group().reduceCount()\n        var nonEmptyChromGroup = remove_empty_bins(chromGroup)\n\n        var searchDimension = ndx.dimension(function (d) {\n            return d.samples\n        })\n        searchInput\n            .dimension(searchDimension)\n            .on('renderlet', function () {\n                d3.selectAll(\".dc-text-filter-input\")\n                    .classed(\"form-control\", true)\n                d3.selectAll(\"#sample-search.dc-chart\")\n                    .classed(\"col-12\", true)\n            })\n\n        var sizeDimension = ndx.dimension(function (d) {\n            var round\n            if (d.svlength < 100) {\n                round = 100\n            } else if (d.svlength < 1000) {\n                round = 100\n            } else if (d.svlength < 10000) {\n                round = 1000\n            } else if (d.svlength < 100000) {\n                round = 10000\n            } else if (d.svlength < 1000000) {\n                round = 100000\n            } else if (d.svlength < 10000000) {\n                round = 1000000\n            } else {\n                round = 10000000\n            }\n            return Math.round(d.svlength / round) * round\n        })\n        var sizeGroup = sizeDimension.group().reduceCount()\n        var nonEmptySizeGroup = remove_empty_bins(sizeGroup)\n        // for brushing, need to track keys at numeric indexes\n        var sizeKeys = nonEmptySizeGroup.all().map(dc.pluck('key')).slice()\n\n        var typeDimension = ndx.dimension((d) => { return d.svtype })\n        var typeGroup = typeDimension.group().reduceCount()\n        var nonEmptyTypeGroup = remove_empty_bins(typeGroup)\n\n        var nsamplesDimension = ndx.dimension((d) => { return d.nsamples })\n        var nsamplesDimension = ndx.dimension(function (d) {\n            var round\n            if (d.nsamples < 10) {\n                round = 1\n            } else if (d.nsamples < 100) {\n                round = 10\n            } else if (d.nsamples < 1000) {\n                round = 100\n            } else if (d.nsamples < 10000) {\n                round = 1000\n            } else {\n                round = 10000\n            }\n            return Math.round(d.nsamples / round) * round\n        })\n        var nsamplesGroup = nsamplesDimension.group().reduceCount()\n        var nonEmptyNsamplesGroup = remove_empty_bins(nsamplesGroup)\n        var nsamplesKeys = nonEmptyNsamplesGroup.all().map(dc.pluck('key')).slice()\n\n        // number of samples\n        nsamplesChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleLinear().domain([0, nsamplesKeys.length]))\n            .round(Math.floor)\n            .brushOn(true)\n            .elasticX(true)\n            .dimension(nsamplesDimension)\n            .group(index_group(nonEmptyNsamplesGroup))\n            .elasticY(true)\n            .yAxisLabel('Count')\n            .filterPrinter(function (filters) {\n                var filter = filters[0]\n                return nsamplesKeys[filter[0]] + ' - ' + nsamplesKeys[filter[1]]\n            })\n        // limit the number of labels along x-axis\n        nsamplesChart.xAxis().ticks(20)\n        nsamplesChart.yAxis().ticks(5)\n        // update labels from keys\n        nsamplesChart.xAxis().tickFormat(function (v) {\n            return nsamplesKeys[v]\n        })\n        nsamplesChart.filterHandler(function (dimension, filters) {\n            if (filters.length === 0) {\n                // the empty case (no filtering)\n                dimension.filter(null)\n            } else {\n                dimension.filterRange([nsamplesKeys[filters[0][0]], nsamplesKeys[filters[0][1]]])\n            }\n            return filters\n        })\n\n        // SV length\n        sizeChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleLinear().domain([0, sizeKeys.length]))\n            .round(Math.floor)\n            .brushOn(true)\n            .elasticX(true)\n            .dimension(sizeDimension)\n            .group(index_group(nonEmptySizeGroup))\n            .elasticY(true)\n            .yAxisLabel('Count')\n            .filterPrinter(function (filters) {\n                var filter = filters[0]\n                return sizeKeys[filter[0]] + ' - ' + sizeKeys[filter[1]]\n            })\n            // adds left padding to plots inside filtering panel\n            .on('renderlet', function () {\n                d3.selectAll(\"svg\")\n                    .classed(\"pl-3\", true)\n            })\n        // limit the number of labels along x-axis\n        sizeChart.xAxis().ticks(10)\n        sizeChart.yAxis().ticks(5)\n        // update labels from keys\n        sizeChart.xAxis().tickFormat(function (v) {\n            return sizeKeys[v]\n        })\n        // update the status format for this chart\n        sizeChart.filterHandler(function (dimension, filters) {\n            if (filters.length === 0) {\n                // the empty case (no filtering)\n                dimension.filter(null)\n            } else {\n                dimension.filterRange([sizeKeys[filters[0][0]], sizeKeys[filters[0][1]]])\n            }\n            return filters\n        })\n\n        // sv type\n        typeChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleBand())\n            .xUnits(dc.units.ordinal)\n            .elasticX(true)\n            .elasticY(true)\n            .dimension(typeDimension)\n            .group(nonEmptyTypeGroup)\n            .yAxisLabel('Count')\n        typeChart.yAxis().ticks(5)\n\n        // chromosome\n        chromChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleBand())\n            .xUnits(dc.units.ordinal)\n            .yAxisLabel('Count')\n            .elasticX(true)\n            .elasticY(true)\n            .dimension(chromDimension)\n            .group(nonEmptyChromGroup)\n            .ordering((d) => {\n                v = parseInt(d.key)\n                if (v) {\n                    return v\n                } else {\n                    return d.key\n                }\n            })\n        chromChart.yAxis().ticks(5)\n\n        // overlaps\n        if (annotation) {\n            var overlapsDimension = ndx.dimension((d) => { return d.overlaps })\n            var overlapsGroup = overlapsDimension.group().reduceCount()\n            var nonEmptyOverlapsGroup = remove_empty_bins(overlapsGroup)\n            overlapsChart = dc.barChart(\"#overlaps-chart\")\n            overlapsChart\n                .width(plotw).height(ploth).gap(1)\n                .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n                .x(d3.scaleBand())\n                .xUnits(dc.units.ordinal)\n                .elasticX(true)\n                .elasticY(true)\n                .dimension(overlapsDimension)\n                .group(nonEmptyOverlapsGroup)\n                .yAxisLabel('Count')\n            overlapsChart.yAxis().ticks(5)\n        }\n\n        variantCount\n            .crossfilter(ndx)\n            .groupAll(all)\n            // (_optional_) `.html` sets different html when some records or all records are selected.\n            // `.html` replaces everything in the anchor with the html given using the following function.\n            // `%filter-count` and `%total-count` are replaced with the values obtained.\n            .html({\n                some: '<strong>%filter-count</strong> selected out of <strong>%total-count</strong> records' +\n                    ' | <a href=\\'javascript:dc.filterAll(); dc.renderAll();\\'>Reset All</a>',\n                all: '<strong>%total-count</strong> records'\n            });\n\n        dc.renderAll()\n    })\n\n</script>\n\n</html>\n"
  },
  {
    "path": "setup.py",
    "content": "import re\n\nfrom setuptools import find_packages, setup\n\n\nwith open(\"README.md\", \"r\") as fh:\n    long_description = fh.read()\n\nwith open(\"samplot/__init__.py\", \"r\") as fd:\n    version = re.search(\n        r'^__version__\\s*=\\s*[\\'\"]([^\\'\"]*)[\\'\"]', fd.read(), re.MULTILINE\n    ).group(1)\n\nwith open(\"requirements.txt\", \"r\") as f:\n    requires = f.read().splitlines()\n\n\nsetup(\n    name=\"samplot\",\n    version=version,\n    description=\"plotting package for genomic structural variation\",\n    long_description=long_description,\n    long_description_content_type='text/markdown',\n    author=\"Jonathan Belyeu\",\n    author_email=\"jrbelyeu@gmail.com\",\n    url=\"https://github.com/ryanlayer/samplot.git\",\n    packages=find_packages(exclude=[\"tests\", \"*.tests\", \"*.tests.*\", \"tests.*\"]),\n    package_data={\"\": [\"LICENSE\", \"README.md\"]},\n    data_files=[(\"samplot\", [\"samplot/templates/samplot_vcf.html\"])],\n    include_package_data=True,\n    install_requires=requires,\n    license=\"MIT\",\n    zip_safe=False,\n    entry_points={\"console_scripts\": [\"samplot = samplot.__main__:main\"]},\n    classifiers=[\n        \"Development Status :: 4 - Beta\",\n        \"Intended Audience :: Science/Research\",\n        \"Topic :: Scientific/Engineering :: Bio-Informatics\",\n    ],\n)\n"
  },
  {
    "path": "ssshtest",
    "content": "#!/bin/bash\n\n############################################################\n#  Program: ssshtest\n#  Authors : Ryan M Layer ryan.layer@gmail.com\n#            Brent S Pedersen bpederse@gmail.com\n\n# (c) 2015 - Ryan Layer, Brent Pedersen\n############################################################\n\nPROGRAM_NAME=sshtest\nVERSION=0.1.5\n\nRED='\\033[0;31m'\nBRED='\\033[1;31m' # bold\n\nGREEN='\\033[0;32m'\nBGREEN='\\033[1;32m' # bold\n\nBLUE='\\033[0;33m'\nBOLD='\\033[0;1m'\nNC='\\033[0m' # No Color\n\nPASS=\" ${BGREEN}PASS${NC}\"\nFAIL=\" ${BRED}FAIL${NC}\"\n\nCOLS=`tput cols`\n\nSTDOUT_FILE=${TMPDIR:-/tmp}/o.$$\nSTDERR_FILE=${TMPDIR:-/tmp}/e.$$\nOUTVAL=\nERRVAL=\nRETVAL=\nCMD=\nVERBOSE=\n\nTOTAL=0\nSUCCESSES=0\nFAILS=0\n\nFLAG=0\n\nSTOP_ON_FAIL=0\n\ntrap report EXIT\n\nTESTS_TO_RUN=($@)\n\nRUN_NAME=\n\n#{{{ Command line parsing\nusage()\n{\n    cat << EOF\n\nusage: $0 OPTIONS\n\nOPTIONS can be:\n    -h      Show this message\n    -v      Print success messages\nEOF\n}\n\n# Check options passed in.\nwhile getopts \"h v\" OPTION\ndo\n    case $OPTION in\n        h)\n            usage\n            exit 1\n            ;;\n        v)\n            VERBOSE=1\n            ;;\n        ?)\n            usage\n            exit\n            ;;\n    esac\ndone\n#}}}\n\n#{{{ exit codes\nEX_OK=0\n\n#The command was used incorrectly, e.g., with the wrong number of arguments, a\n#bad flag, a bad syntax in a parameter, or whatever.\nEX_USAGE=64\n\n#The input data was incorrect in some way.  This should only be used for user's\n#data and not system files.\nEX_DATAERR=65\n\n#An input file (not a system file) did not exist or was not readable.  This\n#could also include errors like ``No message'' to a mailer (if it cared to\n#catch it).\nEX_NOINPUT=66\n\n#The user specified did not exist.  This might be used for mail addresses or\n#remote logins.\nEX_NOUSER=67\n\n#The host specified did not exist.  This is used in mail addresses or network\n#requests.\nEX_NOHOST=68\n\n#A service is unavailable.  This can occur if a support program or file does\n#not exist.  This can also be used as a catchall message when something you\n#wanted to do doesn't work, but you don't know why.\nEX_UNAVAILABLE=69\n\n#An internal software error has been detected.  This should be limited to\n#non-operating system related errors as possible.\nEX_SOFTWARE=70\n\n#An operating system error has been detected.  This is intended to be used for\n#such things as ``cannot fork'', ``cannot create pipe'', or the like.  It\n#includes things like getuid returning a user that does not exist in the passwd\n#file.\nEX_OSERR=71\n\n#Some system file (e.g., /etc/passwd, /var/run/utmp, etc.) does not exist,\n#cannot be opened, or has some sort of error (e.g., syntax error).\nEX_OSFILE=72\n\n#A (user specified) output file cannot be created.\nEX_CANTCREAT=73\n\n#An error occurred while doing I/O on some file.\nEX_IOERR=74\n\n#Temporary failure, indicating something that is not really an error.  In\n#sendmail, this means that a mailer (e.g.) could not create a connection, and\n#the request should be reattempted later.\nEX_TEMPFAIL=75\n\n#The remote system returned something that was ``not possible'' during a\n#protocol exchange.\nEX_PROTOCOL=76\n\n#You did not have sufficient permission to perform the operation.  This is not\n#intended for file system problems, which should use EX_NOINPUT or\n#EX_CANTCREAT, but rather for higher level permissions.\nEX_NOPERM=77\n\n#Something was found in an unconfigured or misconfigured state.\nEX_CONFIG=78\n#}}}\n\n#{{{ function report {\nfunction report {\n    rm -f $STDOUT_FILE $STDERR_FILE\n\n    echo -e \"\\n$PROGRAM_NAME v$VERSION\\n\"\n\n    if [ \"$STOP_ON_FAIL\" -ne \"0\" ]\n    then\n        if [ \"$FAILS\" -ne \"0\" ]\n        then\n            printf \"${BOLD}TESTING STOPPED ON FIRST FAIL${NC}\\n\\n\"\n        fi\n    fi\n\n    printf \"${NC}%-10s${NC}Tests\\n\" $TOTAL\n\n    if [ \"$FAILS\" -ne \"0\" ]\n    then\n        printf \"${BRED}%-10s${NC}${BOLD}Failures${NC}\\n\" $FAILS\n        printf \"${BGREEN}%-10s${NC}Successes\\n\" $SUCCESSES\n    else\n        printf \"${BRED}%-10s${NC}Failures\\n\" $FAILS\n        printf \"${BGREEN}%-10s${NC}${BOLD}Successes${NC}\\n\" $SUCCESSES\n    fi\n\n    tear_down\n\n    exit $FAILS\n}\n#}}}\n\n#{{{ function run {\nfunction run {\n    RUN_NAME=$1\n    shift\n\n    FLAG=0\n    if [ \"${#TESTS_TO_RUN[*]}\" -eq 0 ]\n    then\n        FLAG=1\n\t\n\telse \n\t\n\t\tfor i in \"${TESTS_TO_RUN[@]}\"\n\t\tdo\n\t\t\tif [ \"$RUN_NAME\" == \"$i\" ]\n\t\t\tthen\n\t\t\t\tFLAG=1\n\t\t\t\tbreak\n\t\t\tfi\n\t\tdone\n    fi\n\n    if [ \"$FLAG\" -eq 0 ]\n    then\n        return\n    else\n        export $RUN_NAME=1\n    fi\n\n    CMD=\"$@\"\n\n    START=$(date +%s);\n    O=\"$(\"$@\" >$STDOUT_FILE 2>$STDERR_FILE)\"\n    RETVAL=$?\n    END=$(date +%s);\n    TOTAL_TIME=$((END-START))\n\n    RUN_TIME=\"$TOTAL_TIME sec\"\n\n\n    OUTVAL=`cat $STDOUT_FILE`\n    ERRVAL=`cat $STDERR_FILE`\n\n    #make it pretty\n    RUN_NAME=${BOLD}$RUN_NAME${NC}\n\tELINES=$(wc -l $STDERR_FILE | awk '{print $1 }' &)\n\tOLINES=$(wc -l $STDOUT_FILE | awk '{print $1 }' &)\n\twait\n    echo -e \"\\n$RUN_NAME ran in $RUN_TIME with $ELINES/$OLINES lines to STDERR/OUT\"\n}\n#}}}\n\n#{{{ function print_exit_code {\nfunction print_exit_code {\n    case $1 in\n        $EX_OK)\n            echo \"EX_OK\"\n            ;;\n        $EX_USAGE)\n            echo \"EX_USAGE\"\n            ;;\n        $EX_DATAERR)\n            echo \"EX_DATAERR\"\n            ;;\n        $EX_NOINPUT)\n            echo \"EX_NOINPUT\"\n            ;;\n        $EX_NOUSER)\n            echo \"EX_NOUSER\"\n            ;;\n        $EX_NOHOST)\n            echo \"EX_NOHOST\"\n            ;;\n        $EX_UNAVAILABLE)\n            echo \"EX_UNAVAILABLE\"\n            ;;\n        $EX_SOFTWARE)\n            echo \"EX_SOFTWARE\"\n            ;;\n        $EX_OSERR)\n            echo \"EX_OSERR\"\n            ;;\n        $EX_OSFILE)\n            echo \"EX_OSFILE\"\n            ;;\n        $EX_CANTCREAT)\n            echo \"EX_CANTCREAT\"\n            ;;\n        $EX_IOERR)\n            echo \"EX_IOERR\"\n            ;;\n        $EX_TEMPFAIL)\n            echo \"EX_TEMPFAIL\"\n            ;;\n        $EX_PROTOCOL)\n            echo \"EX_PROTOCOL\"\n            ;;\n        $EX_NOPERM)\n            echo \"EX_NOPERM\"\n            ;;\n        $EX_CONFIG)\n            echo \"EX_CONFIG\"\n            ;;\n        *)\n            echo \"Unknown code: $1\"\n    esac\n}\n#}}}\n\n#{{{function assert_exit_code {\nfunction assert_exit_code {\n    \n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    E=$(print_exit_code $1)\n    O=$(print_exit_code $RETVAL)\n    if [ $RETVAL -ne $1 ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL EXIT CODE (LINE $LINE)\"\n        echo -e \"-->\\texpected $E, observed $O\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS EXIT CODE (LINE $LINE)\"\n        if [ $VERBOSE ] \n        then\n            echo -e \"-->\\texpected $E, observed $O\"\n        fi\n    fi\n}\n#}}}\n\n#{{{ function assert_no_stdout {\nfunction assert_no_stdout {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ -n \"$OUTVAL\" ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL NON-EMPTY STDOUT (LINE $LINE)\"\n        echo -e \"-->\\t$OUTVAL\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS EMPTY STDOUT (LINE $LINE)\"\n    fi\n}\n#}}}\n\n#{{{ function assert_no_stderr {\nfunction assert_no_stderr {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ -n \"$ERRVAL\" ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL NON-EMPTY STDERR(LINE $LINE)\"\n        echo -e \"-->\\t$ERRVAL\"\n        tail $STDERR_FILE \n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS EMPTY STDERR(LINE $LINE)\"\n    fi\n}\n#}}}\n\n#{{{function assert_stderr {\nfunction assert_stderr {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ -z \"$ERRVAL\" ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL EMPTY STDERR(LINE $LINE)\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS EMPTY STDERR(LINE $LINE)\"\n        if [ $VERBOSE ] \n        then\n            echo -e \"-->\\t$ERRVAL\"\n        fi\n    fi\n}\n#}}}\n\n#{{{function assert_stdout {\nfunction assert_stdout {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ -z \"$OUTVAL\" ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL EMPTY STDOUT (LINE $LINE)\"\n        tail $STDERR_FILE \n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS NON-EMPTY STDOUT (LINE $LINE)\"\n        if [ $VERBOSE ] \n        then\n            echo -e \"-->\\t$ERRVAL\"\n        fi\n    fi\n}\n#}}}\n\n#{{{function assert_in_stderr {\nfunction assert_in_stderr {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ -z \"$ERRVAL\" ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL EMPTY STDERR (LINE $LINE)\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        if [[ $ERRVAL == *\"$1\"* ]]\n        then\n            SUCCESSES=$((SUCCESSES + 1))\n            echo -e \"$PASS STDERR CONTAINS \\\"$1\\\" (LINE $LINE)\"\n            if [ $VERBOSE ] \n            then\n                echo -e \"-->\\t$ERRVAL\"\n            fi\n        else\n            FAILS=$((FAILS + 1))\n            echo -e \"$FAIL STDERR DOES NOT CONTAIN \\\"$1\\\" (LINE $LINE)\"\n            echo -e \"-->\\t$ERRVAL\"\n            tail $STDERR_FILE\n            if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n        fi\n    fi\n}\n#}}}\n\n#{{{function assert_in_stdout {\nfunction assert_in_stdout {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ -z \"$OUTVAL\" ]\n    then\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL EMPTY STDOUT (LINE $LINE)\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    else\n        if [[ $OUTVAL == *\"$1\"* ]]\n        then\n            SUCCESSES=$((SUCCESSES + 1))\n            echo -e \"$PASS STDOUT CONTAINS \\\"$1\\\" (LINE $LINE)\"\n            if [ $VERBOSE ] \n            then\n                echo -e \"-->\\t$OUTVAL\"\n            fi\n        else\n            FAILS=$((FAILS + 1))\n            echo -e \"$FAIL STDOUT DOES NOT CONTAIN \\\"$1\\\" (LINE $LINE)\"\n            echo -e \"-->\\t$OUTVAL\"\n            tail $STDERR_FILE\n            if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n        fi\n    fi\n}\n#}}}\n\n#{{{ function assert_equal {\nfunction assert_equal {\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ \"$1\" == \"$2\" ]\n    then\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS \\\"$1\\\" == \\\"$2\\\" (LINE $LINE)\"\n    else\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL \\\"$1\\\" != \\\"$2\\\" (LINE $LINE)\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    fi\n}\n#}}}\n\n#{{{ function assert_true {\nfunction assert_true {\n\n    COMMAND=(\"$@\")\n    RES=`${COMMAND[@]}`\n    echo $RES || \"AAAAAAAAAAA\"\n\n    if [ \"$FLAG\" -eq 0 ];then return; fi\n\n    LINE=$(caller | cut -d \" \" -f1)\n\n    TOTAL=$((TOTAL + 1))\n    if [ \"${COMMAND[@]}\" == true ]\n    then\n        SUCCESSES=$((SUCCESSES + 1))\n        echo -e \"$PASS $* (LINE $LINE)\"\n    else\n        FAILS=$((FAILS + 1))\n        echo -e \"$FAIL $* (LINE $LINE)\"\n        tail $STDERR_FILE\n        if [ $STOP_ON_FAIL -ne \"0\" ];then exit; fi\n    fi\n}\n#}}}\n\n#{{{function tear_down \nfunction tear_down \n{\n    :\n    #define this function in your test to clean things up in the end\n}\n#}}}\n"
  },
  {
    "path": "test/README.md",
    "content": "These BAM files contain reads that align to the small set of SV-containing regions below.\n\nRegions included in BAM files\n```\n2\t89160083\t89186670\tINV\n4\t113984874\t113987369\tDEL\n5\t1021803\t\t1026877\t\tDEL\n12\t12543868\t12547613\tINV\n12\t47289448\t47310758\tINV\n19\t12693867\t12699924\tDEL\n```\n\nVariants:\n```\n2\t89161083\t89185670\tINV\n4\t113985874\t113986369\tDEL\n5\t1022803\t\t1025877\t\tDEL\n12\t12544868\t12546613\tINV\n12\t47290448\t47309758\tINV\n19\t12694867\t12698924\tDEL\n```\n"
  },
  {
    "path": "test/data/README.md",
    "content": "This directory contains data and scripts for the download of that data. Alignments are from Genome in a Bottle public resources.\nRunning the `subset_alignments.sh` script will download the data available in these alignment files. \nThese alignments only includes reads from the regions included in the `examples_padded.bed` file.\nThe regions of interest (SVs and one normal region) are indicated in the `examples.bed` file.\n"
  },
  {
    "path": "test/data/commands.sh",
    "content": "set -e\n\n#download hg19 reference for cram\nFILE=\"hg19.fa.gz\"\nif [ ! -f $FILE ]; then\n    wget http://hgdownload.cse.ucsc.edu/goldenPath/hg19/bigZips/hg19.fa.gz\n    gunzip hg19.fa.gz\n    bgzip hg19.fa\nfi\n\n#images of each type with all technologies\nmkdir -p test_imgs\nsamplot plot -n Illumina PacBio ONT 10X -t DEL -c 1 -s 24804397 -e 24807302 -o test_imgs/DEL_1_24804397_24807302.png -b HG002_Illumina.bam HG002_PacBio.bam HG002_ONT.cram HG002_10X.bam  -r hg19.fa.gz\nsamplot plot -n Illumina PacBio ONT 10X -t DUP -c 4 -s 99813786 -e 99817098 -o test_imgs/DUP_4_99813786_99817098.png -b HG002_Illumina.bam HG002_PacBio.bam HG002_ONT.cram HG002_10X.bam -r hg19.fa.gz\nsamplot plot -n Illumina PacBio ONT 10X -t DUP -c 11 -s 67974431 -e 67975639 -o test_imgs/DUP_11_67974431_67975639.png -b HG002_Illumina.bam HG002_PacBio.bam HG002_ONT.cram HG002_10X.bam -r hg19.fa.gz\nsamplot plot -n Illumina PacBio ONT 10X -t INV -c 12 -s 12544867 -e 12546613 -o test_imgs/INV_12_12544867_12546613.png -b HG002_Illumina.bam HG002_PacBio.bam HG002_ONT.cram HG002_10X.bam -r hg19.fa.gz\n\n#zoom example\nsamplot plot -n Illumina PacBio ONT 10X -t DUP -c 4 -s 99813786 -e 99817098 -o test_imgs/DUP_4_99813786_99817098_zoom.png -b HG002_Illumina.bam HG002_PacBio.bam HG002_ONT.cram HG002_10X.bam -r hg19.fa.gz --zoom 1000\n\n#trios with no variant\nsamplot plot -n HG002 HG003 HG004 -c 1 -s 43059290 -e 43059950 -o test_imgs/1_43059290_43059950.png -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\n\n#trios of each type\nsamplot plot -n HG002 HG003 HG004 -t DEL -c 1 -s 24804397 -e 24807302 -o test_imgs/trio_DEL_1_24804397_24807302.png -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\nsamplot plot -n HG002 HG003 HG004 -t DUP -c 4 -s 99813786 -e 99817098 -o test_imgs/trio_DUP_4_99813786_99817098.png -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\nsamplot plot -n HG002 HG003 HG004 -t DUP -c 11 -s 67974431 -e 67975639 -o test_imgs/trio_DUP_11_67974431_67975639.png -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\nsamplot plot -n HG002 HG003 HG004 -t INV -c 12 -s 12544867 -e 12546613 -o test_imgs/trio_INV_12_12544867_12546613.png -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\n\n#create a temporary example website\nmkdir -p test_site\nsamplot vcf -d test_site/ --vcf test.vcf --sample_ids HG002 HG003 HG004 -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam > test_site_cmds.sh\n"
  },
  {
    "path": "test/data/examples.bed",
    "content": "1\t24804398\t24807302\tDEL\tHET\n1\t43059290\t43059950\tNA\tHOM\n4\t99813787\t99817098\tDUP\tHOM\n11\t67974432\t67975639\tDUP\tHET\n12\t12544868\t12546613\tINV\tHOM\n19\t12694867\t12698924\tDEL\tHOM\n"
  },
  {
    "path": "test/data/examples_padded.bed",
    "content": "1\t24802398\t24809302\tDEL\tHET\n1\t43057290\t43061950\tNA\tHOM\n4\t99811787\t99819098\tDUP\tHOM\n11\t67972432\t67977639\tDUP\tHET\n12\t12542868\t12548613\tINV\tHOM\n19\t12692867\t12700924\tDEL\tHOM\n"
  },
  {
    "path": "test/data/subset_alignments.sh",
    "content": "#download example regions from GIAB 300X Illumina Ashkenazi Trio\nsamtools view -h -b -L examples_padded.bed ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG002_NA24385_son/NIST_HiSeq_HG002_Homogeneity-10953946/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG002.hs37d5.300x.bam > HG002_Illumina.bam\nsamtools view -h -b -L examples_padded.bed ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG003_NA24149_father/NIST_HiSeq_HG003_Homogeneity-12389378/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG003.hs37d5.300x.bam > HG003_Illumina.bam\nsamtools view -h -b -L examples_padded.bed ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG004_NA24143_mother/NIST_HiSeq_HG004_Homogeneity-14572558/NHGRI_Illumina300X_AJtrio_novoalign_bams/HG004.hs37d5.300x.bam > HG004_Illumina.bam\n\n#download example regions from GIAB 300X Illumina Ashkenazi Trio Son\nsamtools view -h -b -L examples_padded.bed ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG002_NA24385_son/PacBio_MtSinai_NIST/Baylor_NGMLR_bam_GRCh37/HG002_PB_70x_RG_HP10XtrioRTG.bam > HG002_PacBio.bam\nsamtools view -h -b -L examples_padded.bed ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/analysis/10XGenomics_ChromiumGenome_LongRanger2.2_Supernova2.0.1_04122018/GRCh37/NA24385_300G/HG002_10x_84x_RG_HP10xtrioRTG.bam > HG002_10X.bam\nsamtools view -h -b -C -L examples_padded.bed ftp://ftp-trace.ncbi.nlm.nih.gov/giab/ftp/data/AshkenazimTrio/HG002_NA24385_son/Ultralong_OxfordNanopore/combined_2018-08-10/HG002_ONTrel2_16x_RG_HP10xtrioRTG.cram > HG002_ONT.cram\n\n#index new alignment files\nsamtools index HG002_10X.bam\nsamtools index HG002_Illumina.bam\nsamtools index HG002_ONT.cram\nsamtools index HG002_PacBio.bam\nsamtools index HG003_Illumina.bam\nsamtools index HG004_Illumina.bam\n"
  },
  {
    "path": "test/data/test.ped",
    "content": "0001    HG004   0   0   0\n0001    HG003   0   0   1\n0001    HG002   HG003   HG004   0\n"
  },
  {
    "path": "test/data/test.vcf",
    "content": "##fileformat=VCFv4.1\n##fileDate=20170929\n##reference=ftp://ftp.1000genomes.ebi.ac.uk//vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa.gz\n##INFO=<ID=CHR2,Number=1,Type=String,Description=\"Chromosome for END coordinate\">\n##INFO=<ID=TSD,Number=1,Type=String,Description=\"Precise Target Site Duplication for bases, if unknown, value will be NULL\">\n##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant\">\n##INFO=<ID=MSTART,Number=1,Type=Integer,Description=\"Mitochondrial start coordinate of inserted sequence\">\n##INFO=<ID=MLEN,Number=1,Type=Integer,Description=\"Estimated length of mitochondrial insert\">\n##INFO=<ID=MEND,Number=1,Type=Integer,Description=\"Mitochondrial end coordinate of inserted sequence\">\n##INFO=<ID=MEINFO,Number=4,Type=String,Description=\"Mobile element info of the form NAME,START,END<POLARITY; If there is only 5' OR 3' support for this call, will be NULL NULL for START and END\">\n##INFO=<ID=SVLEN,Number=.,Type=Integer,Description=\"Difference in length between REF and ALT alleles\">\n##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description=\"Imprecise structural variation\">\n##INFO=<ID=CIEND,Number=2,Type=Integer,Description=\"Confidence interval around END for imprecise variants\">\n##INFO=<ID=CIPOS,Number=2,Type=Integer,Description=\"Confidence interval around POS for imprecise variants\">\n##INFO=<ID=END,Number=1,Type=Integer,Description=\"End coordinate of this variant\">\n##ALT=<ID=INV,Description=\"Inversion\">\n##ALT=<ID=INS:ME:SVA,Description=\"Insertion of SVA element\">\n##ALT=<ID=INS:ME:LINE1,Description=\"Insertion of LINE1 element\">\n##ALT=<ID=INS:ME:ALU,Description=\"Insertion of ALU element\">\n##ALT=<ID=DUP,Description=\"Duplication\">\n##ALT=<ID=DEL,Description=\"Deletion\">\n##ALT=<ID=CNV,Description=\"Copy Number Polymorphism\">\n##ALT=<ID=CN9,Description=\"Copy number allele: 9 copies\">\n##ALT=<ID=CN8,Description=\"Copy number allele: 8 copies\">\n##ALT=<ID=CN7,Description=\"Copy number allele: 7 copies\">\n##ALT=<ID=CN6,Description=\"Copy number allele: 6 copies\">\n##ALT=<ID=CN5,Description=\"Copy number allele: 5 copies\">\n##ALT=<ID=CN4,Description=\"Copy number allele: 4 copies\">\n##ALT=<ID=CN3,Description=\"Copy number allele: 3 copies\">\n##ALT=<ID=CN2,Description=\"Copy number allele: 2 copies\">\n##ALT=<ID=CN1,Description=\"Copy number allele: 1 copy\">\n##ALT=<ID=CN0,Description=\"Copy number allele: 0 copies\">\n##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype quality\">\n##FORMAT=<ID=SQ,Number=1,Type=Float,Description=\"Phred-scaled probability that this site is variant (non-reference in this sample\">\n##FORMAT=<ID=GL,Number=G,Type=Float,Description=\"Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy\">\n##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read depth\">\n##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count, with partial observations recorded fractionally\">\n##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observations, with partial observations recorded fractionally\">\n##FORMAT=<ID=QR,Number=1,Type=Integer,Description=\"Sum of quality of reference observations\">\n##FORMAT=<ID=QA,Number=A,Type=Integer,Description=\"Sum of quality of alternate observations\">\n##FORMAT=<ID=RS,Number=1,Type=Integer,Description=\"Reference allele split-read observation count, with partial observations recorded fractionally\">\n##FORMAT=<ID=AS,Number=A,Type=Integer,Description=\"Alternate allele split-read observation count, with partial observations recorded fractionally\">\n##FORMAT=<ID=ASC,Number=A,Type=Integer,Description=\"Alternate allele clipped-read observation count, with partial observations recorded fractionally\">\n##FORMAT=<ID=RP,Number=1,Type=Integer,Description=\"Reference allele paired-end observation count, with partial observations recorded fractionally\">\n##FORMAT=<ID=AP,Number=A,Type=Integer,Description=\"Alternate allele paired-end observation count, with partial observations recorded fractionally\">\n##FORMAT=<ID=AB,Number=A,Type=Float,Description=\"Allele balance, fraction of observations from alternate allele, QA/(QR+QA)\">\n##FILTER=<ID=PASS,Description=\"All filters passed\">\n##contig=<ID=1,length=249250621>\n##contig=<ID=2,length=243199373>\n##contig=<ID=3,length=198022430>\n##contig=<ID=4,length=191154276>\n##contig=<ID=5,length=180915260>\n##contig=<ID=6,length=171115067>\n##contig=<ID=7,length=159138663>\n##contig=<ID=8,length=146364022>\n##contig=<ID=9,length=141213431>\n##contig=<ID=10,length=135534747>\n##contig=<ID=11,length=135006516>\n##contig=<ID=12,length=133851895>\n##contig=<ID=13,length=115169878>\n##contig=<ID=14,length=107349540>\n##contig=<ID=15,length=102531392>\n##contig=<ID=16,length=90354753>\n##contig=<ID=17,length=81195210>\n##contig=<ID=18,length=78077248>\n##contig=<ID=19,length=59128983>\n##contig=<ID=20,length=63025520>\n##contig=<ID=21,length=48129895>\n##contig=<ID=22,length=51304566>\n##contig=<ID=X,length=155270560>\n##contig=<ID=Y,length=59373566>\n##bcftools_viewVersion=1.3.1-1-g4d44e83-dirty+htslib-1.3.1-12-g0454d47\n##bcftools_viewCommand=view -c 1 -s NA12878 ALL.wgs.mergedSV.v8.20130502.svs.genotypes.vcf.gz\n##bcftools_viewCommand=view -i '(SVTYPE=\"DEL\" || SVTYPE=\"DUP\" || SVTYPE=\"INV\" || SVTYPE=\"INS\")' test.vcf\n#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tHG002\tHG003\tHG004\n1\t24804398\t1\tT\t<DEL>\t2087.90\tPASS\tSVTYPE=DEL;CIEND=100,100;CIPOS=1000,1000;END=24807302;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t0/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/0:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t1/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n1\t24804399\t1\tT\t<DEL>\t2087.90\tPASS\tSVTYPE=TRA;CIEND=0,0;CIPOS=0,0;END=43059290;CHR2=1\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t0/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/0:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t1/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n1\t24804400\t1\tT\t<DEL>\t2087.90\tPASS\tSVTYPE=TRA;CIEND=0,0;CIPOS=0,0;END=99813787;CHR2=4\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t0/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/0:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t1/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n1\t43059290\t2\tT\t<DEL>\t2087.90\tPASS\tSVTYPE=DEL;CIEND=0,0;CIPOS=0,0;END=43059950;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t0/0:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/0:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t0/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n4\t99813787\t3\tT\t<DUP>\t2087.90\tPASS\tSVTYPE=DUP;CIEND=0,0;CIPOS=0,0;END=99817098;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t1/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t1/1:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t1/1:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n11\t67974432\t4\tT\t<DUP>\t2087.90\tPASS\tSVTYPE=DUP;CIEND=0,0;CIPOS=0,0;END=67975639;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t0/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/1:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t0/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n12\t12544868\t5\tT\t<INV>\t2087.90\tPASS\tSVTYPE=INV;CIEND=0,0;CIPOS=0,0;END=12546613;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t0/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/1:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t1/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n19\t12694867\t6\tT\t<DEL>\t2087.90\tPASS\tSVTYPE=DEL;CIEND=0,0;CIPOS=0,0;END=12698924;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t1/1:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t1/1:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t1/1:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n19\t12694868\t6\tT\t<DEL>\t2087.90\tPASS\tSVTYPE=DEL;CIEND=0,0;CIPOS=0,0;END=12698924;\tGT:GQ:SQ:GL:DP:RO:AO:QR:QA:RS:AS:ASC:RP:AP:AB\t1/0:146:581.24:-64,-6,-51:109:77:31:76:30:52:9:2:24:18:0.28\t0/0:130:1506.66:-153,-15,-2:52:0:52:0:51:0:23:3:0:24:1\t0/0:200:0.00:-0,-54,-181:182:182:0:181:0:120:0:0:61:0:0\n"
  },
  {
    "path": "test/data/test_site/README.md",
    "content": "Site generated using:\n\n```\nsamplot vcf --vcf test/data/test.vcf -b test/data/HG002_Illumina.bam test/data/HG003_Illumina.bam test/data/HG004_Illumina.bam --format GT,GQ --sample_ids HG002 HG003 HG004\n```\n"
  },
  {
    "path": "test/data/test_site/index.html",
    "content": "<!DOCTYPE html>\n<html lang='en'>\n\n<head>\n    <meta charset='utf-8'>\n    <title>samplot</title>\n\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/d3/5.9.2/d3.min.js\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/crossfilter2/1.4.7/crossfilter.min.js\"\n        type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/dc/3.0.12/dc.min.js\" type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js\" type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/js/bootstrap.bundle.min.js\"\n        type=\"text/javascript\"></script>\n    <script src=\"https://cdnjs.cloudflare.com/ajax/libs/viewerjs/1.5.0/viewer.min.js\" type=\"text/javascript\"></script>\n    <script src=\"https://cdn.datatables.net/v/bs4/dt-1.10.20/b-1.6.1/b-html5-1.6.1/sc-2.0.1/sl-1.3.1/datatables.min.js\"\n        type=\"text/javascript\"></script>\n\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/viewerjs/1.5.0/viewer.min.css\" rel=\"stylesheet\"\n        type=\"text/css\" />\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.8.2/css/all.min.css\" rel=\"stylesheet\"\n        type=\"text/css\" />\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css\" rel=\"stylesheet\"\n        type=\"text/css\" />\n    <link href=\"https://cdnjs.cloudflare.com/ajax/libs/dc/3.0.12/dc.min.css\" rel=\"stylesheet\" type=\"text/css\" />\n    <link href=\"https://cdn.datatables.net/v/bs4/dt-1.10.20/b-1.6.1/b-html5-1.6.1/sc-2.0.1/sl-1.3.1/datatables.min.css\"\n        rel=\"stylesheet\" type=\"text/css\" />\n\n    <style type=\"text/css\">\n        #filter-menu .dropdown-menu {\n            min-height: 100px;\n            max-height: 100vh;\n            overflow-y: auto;\n            overflow-x: hidden;\n            background-color: #edf0f2;\n        }\n\n        span.no-show {\n            display: none;\n        }\n\n        span.show-ellipsis:after {\n            content: \"...\";\n        }\n\n        .datatable-info {\n            font-size: .9em;\n        }\n\n        #variant-table_info {\n            padding-top: 8px;\n        }\n\n        table.dataTable thead th.sorting:after,\n        table.dataTable thead th.sorting_asc:after,\n        table.dataTable thead th.sorting_desc:after,\n        table.dataTable thead th.sorting:before,\n        table.dataTable thead th.sorting_asc:before,\n        table.dataTable thead th.sorting_desc:before {\n            font-family: FontAwesome !important;\n        }\n\n        .modal-content {\n            width: 610px;\n        }\n\n        h7 {\n            font-size: .95rem;\n        }\n\n        body {\n            height: 100vh\n        }\n\n        div.dts div.dataTables_scrollBody {\n            background: white;\n        }\n    </style>\n</head>\n\n<body>\n    <nav class=\"navbar navbar-dark bg-dark p-0 pl-2\">\n        <a class=\"navbar-brand text-light p-0\" href=\"https://github.com/ryanlayer/samplot\">samplot</a>\n    </nav>\n\n    <div class=\"modal fade\" id=\"filter-modal\" tabindex=\"-1\" role=\"dialog\" aria-labelledby=\"filter-modal\"\n        aria-hidden=\"true\">\n        <div class=\"modal-dialog\" role=\"document\">\n            <div class=\"modal-content\">\n                <div class=\"modal-header\">\n                    <div class=\"flex-column\">\n                        <h5 class=\"modal-title\" id=\"filter-modal\">Filters</h5>\n                        <h7 class=\"pl-2 text-secondary\" id=\"variant-count\">\n                            <a href=\"javascript:dc.filterAll(); dc.renderAll();\">Reset All</a>\n                        </h7>\n                    </div>\n                </div>\n                <div class=\"modal-body\">\n                    <div class=\"container\">\n                        <div class=\"row pt-2\">\n                            <div class=\"col\">\n                                <h5>Sample</h5>\n                            </div>\n                        </div>\n                        <div class=\"row pb-3\">\n                            <div class=\"col-12\">\n                                <div id=\"sample-search\"></div>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"nsamples-chart\">\n                            <div class=\"col-4\">\n                                <h5># of Samples</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:nsamplesChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"size-chart\">\n                            <div class=\"col-4\">\n                                <h5>Size</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:sizeChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"type-chart\">\n                            <div class=\"col-4\">\n                                <h5>SV Type</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:typeChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"chrom-chart\">\n                            <div class=\"col-4\">\n                                <h5>Chromosome</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:chromChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                        <div class=\"row\" id=\"overlaps-chart\" hidden>\n                            <div class=\"col-4\">\n                                <h5>SV Overlaps</h5>\n                            </div>\n                            <div class=\"col-8 text-right\">\n                                <span class=\"reset text-muted\" style=\"display: none;\">[<span\n                                        class=\"filter\"></span>]</span>\n                                <a class=\"reset\" href=\"javascript:overlapsChart.filterAll();dc.redrawAll();\"\n                                    style=\"display: none;\">Reset</a>\n                            </div>\n                        </div>\n                    </div>\n                </div>\n                <div class=\"modal-footer\">\n                    <button type=\"button\" class=\"btn btn-outline-secondary\" data-dismiss=\"modal\"\n                        onclick=\"javascript:dc.filterAll(); dc.renderAll();\"\n                        title=\"Clear selection and close\">Cancel</button>\n                    <button type=\"button\" class=\"btn btn-primary\" data-dismiss=\"modal\"\n                        title=\"Apply filters\">Apply</button>\n                </div>\n            </div>\n        </div>\n    </div>\n\n    <div class=\"container-fluid h-90\">\n        <div class=\"row\" id=\"variant-table-placeholder\">\n            <div class=\"col-12\">\n                <div style=\"height:415px\">\n                    <div class=\"d-flex justify-content-center align-items-center text-muted h-100\">\n                        <div class=\"d-flex flex-column\">\n                            <i class=\"fas fa-10x fa-table\"></i>\n                        </div>\n                    </div>\n                </div>\n            </div>\n        </div>\n\n        <div class=\"row pb-1\" id=\"variant-table-div\" hidden>\n            <div class=\"col-12\">\n                <div class=\"table-responsive\">\n                    <table id=\"variant-table\" class=\"table table-hover display nowrap\" width=\"100%\"></table>\n                </div>\n            </div>\n        </div>\n    </div>\n</body>\n\n<script>\n    const data = [{\"chrom\": \"1\", \"end\": 24807302, \"nsamples\": 2, \"samples\": \"HG002;HG004\", \"start\": 24804397, \"svlength\": 2905, \"svtype\": \"DEL\"}, {\"chrom\": \"4\", \"end\": 99817098, \"nsamples\": 3, \"samples\": \"HG002;HG003;HG004\", \"start\": 99813786, \"svlength\": 3312, \"svtype\": \"DUP\"}, {\"chrom\": \"11\", \"end\": 67975639, \"nsamples\": 2, \"samples\": \"HG002;HG003\", \"start\": 67974431, \"svlength\": 1208, \"svtype\": \"DUP\"}, {\"chrom\": \"12\", \"end\": 12546613, \"nsamples\": 3, \"samples\": \"HG002;HG003;HG004\", \"start\": 12544867, \"svlength\": 1746, \"svtype\": \"INV\"}, {\"chrom\": \"19\", \"end\": 12698924, \"nsamples\": 3, \"samples\": \"HG002;HG003;HG004\", \"start\": 12694866, \"svlength\": 4058, \"svtype\": \"DEL\"}]\n    const plot_type = \"png\"\n    const annotation = false\n    const denovo = false\n\n    dc.config.defaultColors(d3.schemeSet1)\n\n    // plot constraints\n    const plotw = 585\n    const ploth = 150\n\n    // table filters\n    var searchInput = dc.textFilterWidget(\"#sample-search\")\n    var nsamplesChart = dc.barChart(\"#nsamples-chart\")\n    var sizeChart = dc.barChart(\"#size-chart\")\n    var typeChart = dc.barChart(\"#type-chart\")\n    var chromChart = dc.barChart(\"#chrom-chart\")\n    var overlapsChart\n    // shows filter impact in modal header\n    var variantCount = dc.dataCount(\"#variant-count\")\n\n    // used to access filtered table data\n    var chromDimension\n    // datatables obj\n    var variant_table\n    // crossfilter obj\n    var ndx\n\n    $('#filter-modal').on('hidden.bs.modal', function () {\n        update_table()\n    })\n\n    const table_click = (selection, table) => {\n        table.$('tr.selected').removeClass('selected')\n        $(selection).addClass('selected')\n        let current = $('tr.selected')\n        let next = current.next()\n        let prev = current.prev()\n\n        let row = table.rows('.selected').data()[0]\n        let img = new Image()\n        img.src = `${row.svtype}_${row.chrom}_${row.start}_${row.end}.${plot_type}`\n        let viewer = new Viewer(img, {\n            hidden: function () {\n                viewer.destroy()\n            },\n            title: function () {\n                return `${row.svtype} on chromosome ${row.chrom} at ${row.start}-${row.end}`\n            },\n            toolbar: {\n                zoomIn: 4,\n                zoomOut: 4,\n                oneToOne: 4,\n                reset: 4,\n                prev: {\n                    show: prev.length > 0 ? true : false,\n                    size: \"large\",\n                    click: function () {\n                        viewer.destroy()\n                        table_click(prev, table)\n                    }\n                },\n                play: { show: false },\n                next: {\n                    show: next.length > 0 ? true : false,\n                    size: \"large\",\n                    click: function () {\n                        viewer.destroy()\n                        table_click(next, table)\n                    }\n                },\n                rotateLeft: { show: false },\n                rotateRight: { show: false },\n                flipHorizontal: { show: false },\n                flipVertical: { show: false },\n            },\n            transition: false,\n            navbar: false,\n        })\n        viewer.show()\n    }\n\n    function build_table(data) {\n        // hide the placeholder and show the datatable\n        d3.select('#variant-table-placeholder').property(\"hidden\", true)\n        d3.select('#variant-table-div').property(\"hidden\", false)\n\n        let cols = [\n            { data: 'chrom', title: 'Chrom' },\n            { data: 'start', title: 'Start' },\n            { data: 'end', title: 'End' },\n            { data: 'svlength', title: 'Size' },\n            { data: 'svtype', title: 'SV Type' },\n            { data: 'nsamples', title: '# of Samples' },\n            { data: 'samples', title: 'Samples' },\n        ]\n        if (annotation) {\n            d3.select('#overlaps-chart').property(\"hidden\", false)\n            cols.push({ data: 'overlaps', title: 'Overlaps' })\n        }\n        if (denovo) {\n            cols.push({ data: 'dn', title: 'De novo' })\n        }\n\n        variant_table = $(\"#variant-table\").DataTable({\n            data: data,\n            columns: cols,\n            deferRender: true,\n            scrollY: '80vh',\n            scrollCollapse: true,\n            scroller: true,\n            info: true,\n            buttons: [\n                'copyHtml5', 'csvHtml5'\n            ],\n            infoCallback: (oSettings, iStart, iEnd, iMax, iTotal, sPre) => {\n                return `\n            <span class=\"datatable-info\">\n                <span class=\"pr-2\">Showing <b>${iStart}</b> - <b>${iEnd}</b> of <b>${iTotal}</b> records</span>\n                <button type=\"button\" class=\"btn btn-primary btn-sm\" data-toggle=\"modal\" data-target=\"#filter-modal\" title=\"Show filters\">\n                    <span class=\"fas fa-filter\"></span>\n                </button>\n                <span class=\"dropup\">\n                    <button type=\"button\" class=\"btn btn-sm btn-primary dropdown-toggle\" id=\"download-menu\" title=\"Save table\" data-toggle=\"dropdown\" aria-haspopup=\"true\" aria-expanded=\"false\">\n                        <span class=\"fas fa-save\"></span>\n                    </button>\n                    <span class=\"dropdown-menu\" aria-labelledby=\"download-menu\">\n                        <h6 class=\"dropdown-header\">Save ${iTotal} rows as:</h6>\n                        <button class=\"dropdown-item\" type=\"button\" id=\"csv-button-download\" onclick=\"csv_button_click()\">\n                            CSV\n                        </button>\n                        <button class=\"dropdown-item\" type=\"button\" id=\"copy-button-download\" onclick=\"copy_button_click()\">\n                            Copy\n                        </button>\n                    </span>\n                </span>\n            </span>\n            `\n            },\n            columnDefs: [\n                {\n                    targets: (annotation ? [0, 1, 2, 3, 4, 5, 7] : [0, 1, 2, 3, 4, 5]),\n                    width: '15%'\n                },\n                // https://datatables.net/blog/2016-02-26\n                {\n                    targets: 6,\n                    render: function (data, type, row) {\n                        if (type === 'display' && data != null) {\n                            data = data.replace(/<(?:.|\\n)*?>/gm, '');\n                            if (data.length > 40) {\n                                return '<span class=\"show-ellipsis\">' + data.substr(0, 40) + '</span><span class=\"no-show\">' + data.substr(40) + '</span>';\n                            } else {\n                                return data;\n                            }\n                        } else {\n                            return data;\n                        }\n                    }\n                }\n            ],\n            // search is applied using crossfilter\n            searching: false,\n            lengthChange: false,\n            order: [[0, 'asc'], [1, 'asc']],\n        })\n\n        // register table clicks on sample_column\n        variant_table.on('click', 'tr', function () {\n            table_click(this, variant_table)\n        })\n    }\n\n    function csv_button_click() {\n        variant_table.button('.buttons-csv').trigger()\n    }\n\n    function copy_button_click() {\n        variant_table.button('.buttons-copy').trigger()\n    }\n\n    function update_table() {\n        variant_table.clear()\n        variant_table.rows.add(chromDimension.top(Infinity))\n        variant_table.draw()\n    }\n\n    function remove_empty_bins(source_group) {\n        return {\n            all: function () {\n                return source_group.all().filter(function (d) {\n                    return d.value != 0\n                })\n            }\n        }\n    }\n\n    // https://jsfiddle.net/gordonwoodhull/g34Ldwaz/8/\n    // https://github.com/dc-js/dc.js/issues/348\n    function index_group(group) {\n        return {\n            all: function () {\n                return group.all().map(function (kv, i) {\n                    return { key: i, value: kv.value }\n                })\n            }\n        }\n    }\n\n    $(document).ready(function () {\n\n        ndx = crossfilter(data)\n        var all = ndx.groupAll()\n\n        chromDimension = ndx.dimension((d) => { return d.chrom })\n        build_table(chromDimension.top(Infinity))\n        var chromGroup = chromDimension.group().reduceCount()\n        var nonEmptyChromGroup = remove_empty_bins(chromGroup)\n\n        var searchDimension = ndx.dimension(function (d) {\n            return d.samples\n        })\n        searchInput\n            .dimension(searchDimension)\n            .on('renderlet', function () {\n                d3.selectAll(\".dc-text-filter-input\")\n                    .classed(\"form-control\", true)\n                d3.selectAll(\"#sample-search.dc-chart\")\n                    .classed(\"col-12\", true)\n            })\n\n        var sizeDimension = ndx.dimension(function (d) {\n            var round\n            if (d.svlength < 100) {\n                round = 100\n            } else if (d.svlength < 1000) {\n                round = 100\n            } else if (d.svlength < 10000) {\n                round = 1000\n            } else if (d.svlength < 100000) {\n                round = 10000\n            } else if (d.svlength < 1000000) {\n                round = 100000\n            } else if (d.svlength < 10000000) {\n                round = 1000000\n            } else {\n                round = 10000000\n            }\n            return Math.round(d.svlength / round) * round\n        })\n        var sizeGroup = sizeDimension.group().reduceCount()\n        var nonEmptySizeGroup = remove_empty_bins(sizeGroup)\n        // for brushing, need to track keys at numeric indexes\n        var sizeKeys = nonEmptySizeGroup.all().map(dc.pluck('key')).slice()\n\n        var typeDimension = ndx.dimension((d) => { return d.svtype })\n        var typeGroup = typeDimension.group().reduceCount()\n        var nonEmptyTypeGroup = remove_empty_bins(typeGroup)\n\n        var nsamplesDimension = ndx.dimension((d) => { return d.nsamples })\n        var nsamplesDimension = ndx.dimension(function (d) {\n            var round\n            if (d.nsamples < 10) {\n                round = 1\n            } else if (d.nsamples < 100) {\n                round = 10\n            } else if (d.nsamples < 1000) {\n                round = 100\n            } else if (d.nsamples < 10000) {\n                round = 1000\n            } else {\n                round = 10000\n            }\n            return Math.round(d.nsamples / round) * round\n        })\n        var nsamplesGroup = nsamplesDimension.group().reduceCount()\n        var nonEmptyNsamplesGroup = remove_empty_bins(nsamplesGroup)\n        var nsamplesKeys = nonEmptyNsamplesGroup.all().map(dc.pluck('key')).slice()\n\n        // number of samples\n        nsamplesChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleLinear().domain([0, nsamplesKeys.length]))\n            .round(Math.floor)\n            .brushOn(true)\n            .elasticX(true)\n            .dimension(nsamplesDimension)\n            .group(index_group(nonEmptyNsamplesGroup))\n            .elasticY(true)\n            .yAxisLabel('Count')\n            .filterPrinter(function (filters) {\n                var filter = filters[0]\n                return nsamplesKeys[filter[0]] + ' - ' + nsamplesKeys[filter[1]]\n            })\n        // limit the number of labels along x-axis\n        nsamplesChart.xAxis().ticks(20)\n        nsamplesChart.yAxis().ticks(5)\n        // update labels from keys\n        nsamplesChart.xAxis().tickFormat(function (v) {\n            return nsamplesKeys[v]\n        })\n        nsamplesChart.filterHandler(function (dimension, filters) {\n            if (filters.length === 0) {\n                // the empty case (no filtering)\n                dimension.filter(null)\n            } else {\n                dimension.filterRange([nsamplesKeys[filters[0][0]], nsamplesKeys[filters[0][1]]])\n            }\n            return filters\n        })\n\n        // SV length\n        sizeChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleLinear().domain([0, sizeKeys.length]))\n            .round(Math.floor)\n            .brushOn(true)\n            .elasticX(true)\n            .dimension(sizeDimension)\n            .group(index_group(nonEmptySizeGroup))\n            .elasticY(true)\n            .yAxisLabel('Count')\n            .filterPrinter(function (filters) {\n                var filter = filters[0]\n                return sizeKeys[filter[0]] + ' - ' + sizeKeys[filter[1]]\n            })\n            // adds left padding to plots inside filtering panel\n            .on('renderlet', function () {\n                d3.selectAll(\"svg\")\n                    .classed(\"pl-3\", true)\n            })\n        // limit the number of labels along x-axis\n        sizeChart.xAxis().ticks(10)\n        sizeChart.yAxis().ticks(5)\n        // update labels from keys\n        sizeChart.xAxis().tickFormat(function (v) {\n            return sizeKeys[v]\n        })\n        // update the status format for this chart\n        sizeChart.filterHandler(function (dimension, filters) {\n            if (filters.length === 0) {\n                // the empty case (no filtering)\n                dimension.filter(null)\n            } else {\n                dimension.filterRange([sizeKeys[filters[0][0]], sizeKeys[filters[0][1]]])\n            }\n            return filters\n        })\n\n        // sv type\n        typeChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleBand())\n            .xUnits(dc.units.ordinal)\n            .elasticX(true)\n            .elasticY(true)\n            .dimension(typeDimension)\n            .group(nonEmptyTypeGroup)\n            .yAxisLabel('Count')\n        typeChart.yAxis().ticks(5)\n\n        // chromosome\n        chromChart\n            .width(plotw).height(ploth).gap(1)\n            .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n            .x(d3.scaleBand())\n            .xUnits(dc.units.ordinal)\n            .yAxisLabel('Count')\n            .elasticX(true)\n            .elasticY(true)\n            .dimension(chromDimension)\n            .group(nonEmptyChromGroup)\n            .ordering((d) => {\n                v = parseInt(d.key)\n                if (v) {\n                    return v\n                } else {\n                    return d.key\n                }\n            })\n        chromChart.yAxis().ticks(5)\n\n        // overlaps\n        if (annotation) {\n            var overlapsDimension = ndx.dimension((d) => { return d.overlaps })\n            var overlapsGroup = overlapsDimension.group().reduceCount()\n            var nonEmptyOverlapsGroup = remove_empty_bins(overlapsGroup)\n            overlapsChart = dc.barChart(\"#overlaps-chart\")\n            overlapsChart\n                .width(plotw).height(ploth).gap(1)\n                .margins({ top: 10, right: 50, bottom: 30, left: 40 })\n                .x(d3.scaleBand())\n                .xUnits(dc.units.ordinal)\n                .elasticX(true)\n                .elasticY(true)\n                .dimension(overlapsDimension)\n                .group(nonEmptyOverlapsGroup)\n                .yAxisLabel('Count')\n            overlapsChart.yAxis().ticks(5)\n        }\n\n        variantCount\n            .crossfilter(ndx)\n            .groupAll(all)\n            // (_optional_) `.html` sets different html when some records or all records are selected.\n            // `.html` replaces everything in the anchor with the html given using the following function.\n            // `%filter-count` and `%total-count` are replaced with the values obtained.\n            .html({\n                some: '<strong>%filter-count</strong> selected out of <strong>%total-count</strong> records' +\n                    ' | <a href=\\'javascript:dc.filterAll(); dc.renderAll();\\'>Reset All</a>',\n                all: '<strong>%total-count</strong> records'\n            });\n\n        dc.renderAll()\n    })\n\n</script>\n\n</html>\n"
  },
  {
    "path": "test/data/test_site_cmds.sh",
    "content": "samplot -z 4 --minq 0 -n HG002 HG004 control-sample:HG003 --start_ci '0,0' --end_ci '0,0' -t DEL -c 1 -s 24804397 -e 24807302 -o test_site/DEL_1_24804397_24807302.png -d 1 -b HG002_Illumina.bam HG004_Illumina.bam HG003_Illumina.bam\nsamplot -z 4 --minq 0 -n HG002 HG003 HG004 --start_ci '0,0' --end_ci '0,0' -t DUP -c 4 -s 99813786 -e 99817098 -o test_site/DUP_4_99813786_99817098.png -d 1 -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\nsamplot -z 3 --minq 0 -n HG002 HG003 control-sample:HG004 --start_ci '0,0' --end_ci '0,0' -t DUP -c 11 -s 67974431 -e 67975639 -o test_site/DUP_11_67974431_67975639.png -d 1 -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\nsamplot -z 3 --minq 0 -n HG002 HG003 HG004 --start_ci '0,0' --end_ci '0,0' -t INV -c 12 -s 12544867 -e 12546613 -o test_site/INV_12_12544867_12546613.png -d 1 -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\nsamplot -z 4 --minq 0 -n HG002 HG003 HG004 --start_ci '0,0' --end_ci '0,0' -t DEL -c 19 -s 12694866 -e 12698924 -o test_site/DEL_19_12694866_12698924.png -d 1 -b HG002_Illumina.bam HG003_Illumina.bam HG004_Illumina.bam\n"
  },
  {
    "path": "test/func/samplot_test.sh",
    "content": "#!/bin/bash\n\ntest -e ssshtest || wget -q https://raw.githubusercontent.com/ryanlayer/ssshtest/master/ssshtest\n. ssshtest\n\nSTOP_ON_FAIL=0\ndata_path=\"test/data/\"\nfunc_path=\"test/func/\"\n\nbam_1=$data_path\"NA12878_restricted.bam\"\nbam_2=$data_path\"NA12889_restricted.bam\"\nbam_3=$data_path\"NA12890_restricted.bam\"\n\nsv_chrm=chr4\nsv_start=115928730\nsv_end=115931875\nsv_type=DEL\nout_file_name=$func_path\"test_del.png\"\n\nrm -f $out_file_name\nrun basic_operation \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -t $sv_type\nif [ $basic_operation ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nsv_chrm=chr4\nsv_start=115928730\nsv_end=115931875\nsv_type=DEL\nout_file_name=$func_path\"test_max_coverage.png\"\n\nrm -f $out_file_name\nrun max_coverage \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        --max_coverage 50\\\n        -t $sv_type\nif [ $max_coverage ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\n\nrm $out_file_name\n\nsv_chrm=chr4\nsv_start=115928730\nsv_end=115931875\nsv_type=DEL\nout_file_name=$func_path\"test_coverage_only.png\"\n\nrm -f $out_file_name\nrun coverage_only \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 \\\n        -o $out_file_name \\\n        --coverage_only \\\n        -t $sv_type\nif [ $coverage_only ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"test_same_yaxis.png\"\n\nsv_chrm=chrX\nsv_start=101055330\nsv_end=101067156\nsv_type=DUP\nrm -f $out_file_name\nrun same_yaxis \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3\\\n        -o $out_file_name \\\n        -t $sv_type \\\n        --same_yaxis_scales\nif [ $basic_operation ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nsv_chrm=chr4\nsv_start=115928730\nsv_end=115931875\nsv_type=DEL\nout_file_name=$func_path\"test_zoom.png\"\nrm -f $out_file_name\nrun basic_operation_zoom \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        --zoom 500\nif [ $basic_operation_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"sample.png\"\nrun sampling_normal \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10\nif [ $sampling_normal ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"sample_zoom.png\"\nrun sampling_normal_zoom \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \\\n        --zoom 500\nif [ $sampling_normal_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nsv_chrm=chrX\nsv_start=101055330\nsv_end=101067156\nsv_type=DUP\nout_file_name=$func_path\"dup.png\"\nrm -f $out_file_name\n\nrun common_insert_size_scale \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \\\n        --common_insert_size\nif [ $common_insert_size_scale ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"dup_zoom.png\"\nrm -f $out_file_name\nrun common_insert_size_scale_zoom \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \\\n        --zoom 500 \\\n        --common_insert_size\nif [ $common_insert_size_scale_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\n\nout_file_name=$func_path\"no_sv_type.png\"\nrm -f $out_file_name\n\nrun no_sv_type \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam_1 $bam_2 $bam_3 \\\n        -o $out_file_name \\\n        -d 10 \\\n        --common_insert_size\nif [ $no_sv_type ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nsv_chrm=X\nsv_start=101055330\nsv_end=101067156\nsv_type=DUP\nout_file_name=$func_path\"longread_nanopore_dup.png\"\nbam=$data_path\"nanopore-NA12878.bam\"\nrm -f $out_file_name\n\nrun nanopore_dup \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \nif [ $nanopore_dup ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"longread_nanopore_dup_zoom.png\"\nrm -f $out_file_name\nrun nanopore_dup_zoom \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10  \\\n        --zoom 1000\nif [ $nanopore_dup_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nsv_chrm=4\nsv_start=115928730\nsv_end=115931875\nsv_type=DEL\nout_file_name=$func_path\"longread_nanopore_del.png\"\nbam=$data_path\"nanopore-NA12878.bam\"\nrm -f $out_file_name\nrun nanopore_del \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \nif [ $nanopore_del ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"longread_nanopore_del_zoom.png\"\nrm -f $out_file_name\nrun nanopore_del_zoom \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10  \\\n        --zoom 500\nif [ $nanopore_del_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nsv_chrm=chr1\nsv_start=58343117\nsv_end=58343622\nsv_type=DEL\nout_file_name=$func_path\"longread_del.png\"\nbam=$data_path\"hg19_chr1_58343117_58343622_deletion.bam\"\nrm -f $out_file_name\nrun longread_del \\\n    samplot plot \\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \nif [ $longread_del ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"longread_del_zoom_big_zoom.png\"\nrm -f $out_file_name\nrun longread_del_zoom_big_zoom \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10  \\\n        --zoom 500\nif [ $longread_del_zoom_big_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\n    assert_no_stdout\nfi\nrm $out_file_name\n\n\nout_file_name=$func_path\"longread_del_zoom_zoom.png\"\nrm -f $out_file_name\nrun longread_del_zoom_zoom \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10  \\\n        --zoom 200\nif [ $longread_del_zoom_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nsv_chrm=chr21\nsv_start=27373431\nsv_end=27375410\nsv_type=INV\nout_file_name=$func_path\"longread_inv.png\"\nbam=$data_path\"hg19_chr21_27373431_27375410_inversion.bam\"\nrm -f $out_file_name\nrun longread_inv \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \nif [ $longread_inv ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"longread_inv_zoom.png\"\nrm -f $out_file_name\nrun longread_inv_zoom \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10  \\\n        --zoom 750\nif [ $longread_inv_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_in_stderr \"Insufficient reads for fragment length estimate.\"\nfi\nrm $out_file_name\n\nsv_chrm=1\nsv_start=89475845\nsv_end=89478561\nsv_type=DEL\nout_file_name=$func_path\"linkedread_del.png\"\nbam=$data_path\"HG002_1_89475845-89478561_DEL.tenx.bam\"\nrm -f $out_file_name\nrun linkedread_del \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \nif [ $linkedread_del ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\nout_file_name=$func_path\"linkedread_del_zoom.png\"\nrm -f $out_file_name\nrun linkedread_del_zoom \\\n    samplot plot\\\n        -c $sv_chrm -s $sv_start -e $sv_end \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -d 10 \\\n        --zoom 500\nif [ $linkedread_del_zoom ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\n\nsv_chrm_1=2\nsv_start_1=59405943\nsv_end_1=59405943\nsv_chrm_2=X\nsv_start_2=151118533\nsv_end_2=151118533\nsv_type=BND\nout_file_name=$func_path\"translocation.png\"\nbam=$data_path\"2_59305747-59505747_X_151018513-151218513.BND.bam\"\nrun translocation \\\n    samplot plot\\\n        -c $sv_chrm_1 -s $sv_start_1 -e $sv_end_1 \\\n        -c $sv_chrm_2 -s $sv_start_2 -e $sv_end_2 \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -A $data_path\"Alu.2_X.bed.gz\" \\\n        -T $data_path\"Homo_sapiens.GRCh37.82.sort.2_X.gff3.gz\" \\\n        --zoom 10000\nif [ $translocation ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\n\nout_file_name=$func_path\"csi-annotation.png\"\nbam=$data_path\"2_59305747-59505747_X_151018513-151218513.BND.bam\"\nrun translocation \\\n    samplot plot\\\n        -c $sv_chrm_1 -s $sv_start_1 -e $sv_end_1 \\\n        -c $sv_chrm_2 -s $sv_start_2 -e $sv_end_2 \\\n        -b $bam \\\n        -o $out_file_name \\\n        -t $sv_type \\\n        -A $data_path\"Alu.2_X.csionly.bed.gz\" \\\n        -T $data_path\"Homo_sapiens.GRCh37.csionly.2_X.gff3.gz\" \\\n        --zoom 10000\nif [ $translocation ]; then\n    assert_exit_code 0\n    assert_equal $out_file_name $( ls $out_file_name )\n    assert_no_stdout\n    assert_no_stderr\nfi\nrm $out_file_name\n\n\nrm -rf $func_path\"img/\" ssshtest\n"
  },
  {
    "path": "test/func/samplot_vcf_test.sh",
    "content": "#!/bin/bash\n\ntest -e ssshtest || wget -q https://raw.githubusercontent.com/ryanlayer/ssshtest/master/ssshtest\n. ssshtest\n\nSTOP_ON_FAIL=0\ndata_path=\"test/data/\"\nfunc_path=\"test/func/\"\n\nbam_1=$data_path\"NA12878_restricted.bam\"\nbam_2=$data_path\"NA12889_restricted.bam\"\nbam_3=$data_path\"NA12890_restricted.bam\"\n\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_dir\"\nrm -f $cmd_file\nrm -rf $test_dir\nrun from_vcf \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        --manual_run\\\n        --command_file $cmd_file\nif [ $from_vcf ]; then\n    assert_no_stderr\n    assert_exit_code 0\n    assert_equal $test_dir/index.html $( ls $test_dir/index.html )\n    assert_equal $cmd_file $( ls $cmd_file )\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_gff3_dir\"\nrm -f $cmd_file\nrm -rf $test_dir\nrun from_vcf_gff3 \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        --gff3 $data_path\"Homo_sapiens.GRCh37.82.sort.2_X.gff3.gz\"\\\n        --manual_run\\\n        --command_file $cmd_file\nif [ $from_vcf_gff3 ]; then\n    assert_no_stderr\n    assert_exit_code 0\n    assert_equal $test_dir/index.html $( ls $test_dir/index.html )\n    assert_equal $cmd_file $( ls $cmd_file )\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_gff3_dir\"\nrm -f $cmd_file\nrm -rf $test_dir\nrun from_vcf_annotated \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        -T $data_path\"Homo_sapiens.GRCh37.82.sort.2_X.gff3.gz\"\\\n        -A $data_path\"Alu.2_X.bed.gz\" \\\n        --manual_run\\\n        --command_file $cmd_file\nif [ $from_vcf_annotated ]; then\n    assert_no_stderr\n    assert_exit_code 0\n    assert_equal $test_dir/index.html $( ls $test_dir/index.html )\n    assert_equal $cmd_file $( ls $cmd_file )\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_auto_dir\"\nrm -rf $test_dir\nrun from_vcf_auto \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \nif [ $from_vcf_auto ]; then\n    assert_in_stderr \"Window size is under 1.5x the estimated fragment length and will be resized to 847. Rerun with -w 604 to override\"\n    assert_exit_code 0\n    assert_equal $test_dir/index.html $( ls $test_dir/index.html )\n    assert_equal $test_dir/DEL_1_24804397_24807302.png $( ls $test_dir/DEL_1_24804397_24807302.png )\n    assert_equal $test_dir/DUP_4_99813786_99817098.png $( ls $test_dir/DUP_4_99813786_99817098.png )\n    assert_equal $test_dir/DUP_11_67974431_67975639.png $( ls $test_dir/DUP_11_67974431_67975639.png )\n    assert_equal $test_dir/INV_12_12544867_12546613.png $( ls $test_dir/INV_12_12544867_12546613.png )\n    assert_equal $test_dir/DEL_19_12694866_12698924.png $( ls $test_dir/DEL_19_12694866_12698924.png )\n    assert_equal $test_dir/TRA_1_24804398_43059290.png $( ls $test_dir/TRA_1_24804398_43059290.png )\n    assert_equal $test_dir/TRA_1_24804399_99813787.png $( ls $test_dir/TRA_1_24804399_99813787.png )\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_auto_multithread_dir\"\nrm -rf $test_dir\nrun from_vcf_auto_multithread \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        -t 2\nif [ $from_vcf_auto_multithread ]; then\n    assert_in_stderr \"Window size is under 1.5x the estimated fragment length and will be resized to 847. Rerun with -w 604 to override\"\n    assert_exit_code 0\n    assert_equal $test_dir/index.html $( ls $test_dir/index.html )\n    assert_equal $test_dir/DEL_1_24804397_24807302.png $( ls $test_dir/DEL_1_24804397_24807302.png )\n    assert_equal $test_dir/DUP_4_99813786_99817098.png $( ls $test_dir/DUP_4_99813786_99817098.png )\n    assert_equal $test_dir/DUP_11_67974431_67975639.png $( ls $test_dir/DUP_11_67974431_67975639.png )\n    assert_equal $test_dir/INV_12_12544867_12546613.png $( ls $test_dir/INV_12_12544867_12546613.png )\n    assert_equal $test_dir/DEL_19_12694866_12698924.png $( ls $test_dir/DEL_19_12694866_12698924.png )\n    assert_equal $test_dir/TRA_1_24804398_43059290.png $( ls $test_dir/TRA_1_24804398_43059290.png )\n    assert_equal $test_dir/TRA_1_24804399_99813787.png $( ls $test_dir/TRA_1_24804399_99813787.png )\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_plotall_dir\"\nrm -f $cmd_file\nrm -rf $test_dir\nrun plot_all \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        --plot_all\nif [ $plot_all ]; then\n    assert_in_stderr \"Window size is under 1.5x the estimated fragment length and will be resized to 847. Rerun with -w 604 to override\"\n    assert_exit_code 0\n    assert_equal \"$test_dir/index.html\" $( ls $test_dir/index.html )\n    assert_equal \"$test_dir/DEL_19_12694866_12698924.png\" $( ls \"$test_dir/DEL_19_12694866_12698924.png\" )\n    assert_equal \"$test_dir/DUP_4_99813786_99817098.png\" $( ls \"$test_dir/DUP_4_99813786_99817098.png\" )\n    assert_equal \"$test_dir/DUP_4_99813786_99817098.png\" $( ls \"$test_dir/DUP_4_99813786_99817098.png\" )\n    assert_equal \"$test_dir/TRA_1_24804398_43059290.png\" $( ls $test_dir/TRA_1_24804398_43059290.png )\n    assert_equal \"$test_dir/TRA_1_24804399_99813787.png\" $( ls $test_dir/TRA_1_24804399_99813787.png )\n    assert_equal \"$test_dir/DEL_1_24804397_24807302.png\" $( ls \"$test_dir/DEL_1_24804397_24807302.png\" )\n    assert_equal \"$test_dir/DUP_11_67974431_67975639.png\" $( ls \"$test_dir/DUP_11_67974431_67975639.png\" )\n    assert_equal \"$test_dir/INV_12_12544867_12546613.png\" $( ls \"$test_dir/INV_12_12544867_12546613.png\" )\n\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_plotall_multithread_dir\"\nrm -f $cmd_file\nrm -rf $test_dir\nrun plot_all_multithread \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        --plot_all \\\n        -t 2\nif [ $plot_all_multithread ]; then\n    assert_in_stderr \"Window size is under 1.5x the estimated fragment length and will be resized to 847. Rerun with -w 604 to override\"\n    assert_exit_code 0\n    assert_equal \"$test_dir/index.html\" $( ls $test_dir/index.html )\n    assert_equal \"$test_dir/DEL_19_12694866_12698924.png\" $( ls \"$test_dir/DEL_19_12694866_12698924.png\" )\n    assert_equal \"$test_dir/DUP_4_99813786_99817098.png\" $( ls \"$test_dir/DUP_4_99813786_99817098.png\" )\n    assert_equal \"$test_dir/DUP_4_99813786_99817098.png\" $( ls \"$test_dir/DUP_4_99813786_99817098.png\" )\n    assert_equal \"$test_dir/TRA_1_24804398_43059290.png\" $( ls $test_dir/TRA_1_24804398_43059290.png )\n    assert_equal \"$test_dir/TRA_1_24804399_99813787.png\" $( ls $test_dir/TRA_1_24804399_99813787.png )\n    assert_equal \"$test_dir/DEL_1_24804397_24807302.png\" $( ls \"$test_dir/DEL_1_24804397_24807302.png\" )\n    assert_equal \"$test_dir/DUP_11_67974431_67975639.png\" $( ls \"$test_dir/DUP_11_67974431_67975639.png\" )\n    assert_equal \"$test_dir/INV_12_12544867_12546613.png\" $( ls \"$test_dir/INV_12_12544867_12546613.png\" )\n\nfi\nrm -f $cmd_file\nrm -rf $test_dir\n\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_dir\"\nped_file=$data_path\"test.ped\"\n\nrun denovo_only_noped \\\n    samplot vcf \\\n        -d $test_dir \\\n        --vcf $vcf_file \\\n        --sample_ids HG002 HG003 HG004 \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        --dn_only\nif [ $denovo_only_noped ]; then\n    assert_in_stderr \"Missing --ped, required when using --dn_only\"\nfi\n\nvcf_file=$data_path\"test.vcf\"\ncmd_file=$func_path\"test.cmd\"\ntest_dir=$func_path\"test_vcf_dir\"\nped_file=$data_path\"test.ped\"\nrm -f $cmd_file\nrm -rf $test_dir\n\nrun denovo_only \\\n    samplot vcf \\\n        -d $test_dir \\\n        --sample_ids HG002 HG003 HG004 \\\n        --vcf $vcf_file \\\n        -b $data_path\"HG002_Illumina.bam\" \\\n        $data_path\"HG003_Illumina.bam\" \\\n        $data_path\"HG004_Illumina.bam\" \\\n        --dn_only \\\n        --ped $data_path\"test.ped\"\nif [ $denovo_only ]; then\n    assert_no_stderr\n    assert_exit_code 0\n    echo \"====================================================================\"\n    ls \"$test_dir/DUP_4_99813786_99817098.png\"\n    echo \"====================================================================\"\n    assert_equal \"$test_dir/DEL_19_12694867_12698924.png\" $( ls \"$test_dir/DEL_19_12694867_12698924.png\" )\n    assert_equal \"\" $( ls \"$test_dir/DUP_4_99813786_99817098.png\" )\n    assert_equal \"\" $( ls \"$test_dir/TRA_1_24804399_43059290.png\" )\n    assert_equal \"\" $( ls \"$test_dir/TRA_1_24804398_99813787.png\" )\n    assert_equal \"\" $( ls \"$test_dir/DEL_1_24804397_24807302.png\" )\nfi\n\n# rm -rf ssshtest\n"
  },
  {
    "path": "test/unit/samplot_test.py",
    "content": "import unittest\nimport sys\n\nfrom samplot import samplot\n\n\nbam_1 = 'test/data/NA12878_restricted.bam'\nbam_2 = 'test/data/NA12889_restricted.bam'\nbam_3 = 'test/data/NA12890_restricted.bam'\nbams=[bam_1, bam_2, bam_3]\n\nsv_chrm = 'chr4'\nsv_start = 115928730\nsv_end = 115931875\nsv_type = 'DEL'\n\n\n#{{{ class Test_set_plot_dimensions(unittest.TestCase):\nclass Test_set_plot_dimensions(unittest.TestCase):\n    #{{{ def test_set_plot_dimensions(self):\n    def test_set_plot_dimensions(self):\n\n        '''\n        def set_plot_dimensions(sv,\n                            sv_type,\n                            arg_plot_height,\n                            arg_plot_width,\n                            bams,\n                            reference,\n                            annotation_files,\n                            transcript_file,\n                            arg_window,\n                            zoom):\n        '''\n        plot_height = None\n        plot_width = None\n\n        annotation_files = None\n        transcript_file = None\n\n        zoom = 500000\n\n        window = None\n\n        sv = [samplot.genome_interval(sv_chrm,sv_start,sv_end)]\n\n        # Test basic function where window is set to be proportional to SV size\n        r_plot_height, r_plot_width, r_window, r_ranges = \\\n            samplot.set_plot_dimensions(sv,\n                                        sv_type,\n                                        plot_height,\n                                        plot_width,\n                                        bams,\n                                        None,\n                                        annotation_files,\n                                        transcript_file,\n                                        window,\n                                        zoom)\n\n        self.assertEqual(r_plot_height, 5)\n        self.assertEqual(r_plot_width, 8)\n        this_window = int((sv_end - sv_start)/2)\n        self.assertEqual( r_window, this_window)\n        self.assertEqual( r_ranges[0], \n                          samplot.genome_interval(sv_chrm,\n                                                  sv_start - this_window,\n                                                  sv_end + this_window))\n\n        # Test to see if zoom is ignored when it is larger than window\n        zoom = 10000\n        r_plot_height, r_plot_width, r_window, r_ranges = \\\n            samplot.set_plot_dimensions(sv,\n                                        sv_type,\n                                        plot_height,\n                                        plot_width,\n                                        bams,\n                                        None,\n                                        annotation_files,\n                                        transcript_file,\n                                        window,\n                                        zoom)\n\n        self.assertEqual( r_ranges[0], \n                          samplot.genome_interval(sv_chrm,\n                                                  sv_start - this_window,\n                                                  sv_end + this_window))\n\n\n        # Test to see if zoom creates two ranges\n        zoom = 100\n        r_plot_height, r_plot_width, r_window, r_ranges = \\\n            samplot.set_plot_dimensions(sv,\n                                        sv_type,\n                                        plot_height,\n                                        plot_width,\n                                        bams,\n                                        None,\n                                        annotation_files,\n                                        transcript_file,\n                                        window,\n                                        zoom)\n\n        self.assertEqual( r_window, zoom)\n        self.assertEqual( len(r_ranges), 2)\n        self.assertEqual( r_ranges[0], \n                          samplot.genome_interval(sv_chrm,\n                                                  sv_start - zoom,\n                                                  sv_start + zoom,))\n        self.assertEqual( r_ranges[1], \n                          samplot.genome_interval(sv_chrm,\n                                                  sv_end - zoom,\n                                                  sv_end + zoom) )\n\n\n        # Test to multiple sv regions\n        window = None\n        zoom = None\n        sv = [samplot.genome_interval(sv_chrm,sv_start,sv_start),\n              samplot.genome_interval(sv_chrm,sv_end,sv_end)]\n        r_plot_height, r_plot_width, r_window, r_ranges = \\\n            samplot.set_plot_dimensions(sv,\n                                        sv_type,\n                                        plot_height,\n                                        plot_width,\n                                        bams,\n                                        None,\n                                        annotation_files,\n                                        transcript_file,\n                                        window,\n                                        zoom)\n\n        self.assertEqual( len(r_ranges), 2)\n        self.assertEqual( r_ranges[0], \n                          samplot.genome_interval(sv_chrm,\n                                                  sv_start-1000,\n                                                  sv_start+1000) )\n        self.assertEqual( r_ranges[1], \n                          samplot.genome_interval(sv_chrm,\n                                                  sv_end-1000,\n                                                  sv_end+1000) )\n    #}}}\n\n    #{{{def test_get_read_data(self):\n    def test_get_read_data(self):\n        '''\n        read_data,max_coverage = get_read_data(ranges,\n                                               options.bams,\n                                               options.reference,\n                                               options.separate_mqual,\n                                               options.include_mqual,\n                                               options.coverage_only,\n                                               options.long_read,\n                                               options.same_yaxis_scales,\n                                               options.max_depth,\n                                               options.z,\n                                               options.ignore_hp)\n        '''\n\n        plot_height = None\n        plot_width = None\n\n        annotation_files = None\n        transcript_file = None\n\n        zoom = 500000\n\n        window = None\n\n        sv = [samplot.genome_interval(sv_chrm,sv_start,sv_end)]\n\n        # Test basic function where window is set to be proportional to SV size\n        r_plot_height, r_plot_width, r_window, r_ranges = \\\n            samplot.set_plot_dimensions(sv,\n                                        sv_type,\n                                        plot_height,\n                                        plot_width,\n                                        bams,\n                                        None,\n                                        annotation_files,\n                                        transcript_file,\n                                        window,\n                                        zoom)\n\n        reference = None\n        separate_mqual = 0\n        include_mqual = 1\n        coverage_only = None\n        long_read = 1000\n        long_event_size = 100\n        same_yaxis_scales = None\n        max_depth = 100\n        z = 4\n        ignore_hp = False\n\n        read_data,max_coverage = samplot.get_read_data(r_ranges,\n                                                       bams,\n                                                       reference,\n                                                       separate_mqual,\n                                                       include_mqual,\n                                                       coverage_only,\n                                                       long_read,\n                                                       long_event_size,\n                                                       same_yaxis_scales,\n                                                       max_depth,\n                                                       z,\n                                                       ignore_hp)\n    #}}}\n#}}}\n\n#{{{ class Test_genome_interval(unittest.TestCase):\nclass Test_genome_interval(unittest.TestCase):\n    #{{{ def test_init(self):\n    def test_init(self):\n        gi = samplot.genome_interval('chr1', 1, 1000)\n        self.assertEqual(gi.chrm, 'chr1')\n        self.assertEqual(gi.start, 1)\n        self.assertEqual(gi.end, 1000)\n    #}}}\n\n    #{{{ def test_init(self):\n    def test_intersect(self):\n        gi = samplot.genome_interval('chr8', 500, 1000)\n\n        self.assertEqual(-1, \n                         gi.intersect(samplot.genome_interval('chr7',\n                                                               500,\n                                                               1000)))\n\n        self.assertEqual(1,\n                         gi.intersect(samplot.genome_interval('chr9',\n                                                              500,\n                                                              1000)))\n\n        self.assertEqual(-1,\n                         gi.intersect(samplot.genome_interval('chr8',\n                                                              100,\n                                                              499)))\n\n        self.assertEqual(1,\n                         gi.intersect(samplot.genome_interval('chr8',\n                                                              1001,\n                                                              2000)))\n\n        self.assertEqual(0,\n                         gi.intersect(samplot.genome_interval('chr8',\n                                                              1,\n                                                              500)))\n        self.assertEqual(0,\n                         gi.intersect(samplot.genome_interval('chr8',\n                                                              500,\n                                                              501)))\n        self.assertEqual(0,\n                         gi.intersect(samplot.genome_interval('chr8',\n                                                              1000,\n                                                              2000)))\n\n    #}}}\n\n    #{{{ def test_get_range_hit(self):\n    def test_get_range_hit(self):\n        gi_0 = samplot.genome_interval('chr8', 500, 1000)\n        ranges = [gi_0]\n\n        self.assertEqual(0, samplot.get_range_hit(ranges, 'chr8', 500))\n\n\n        gi_1 = samplot.genome_interval('chr8', 2000, 3000)\n        ranges = [gi_0, gi_1]\n        self.assertEqual(0, samplot.get_range_hit(ranges, 'chr8', 500))\n        self.assertEqual(1, samplot.get_range_hit(ranges, 'chr8', 2500))\n\n        self.assertEqual(None, samplot.get_range_hit(ranges, 'chr7', 2500))\n        self.assertEqual(None, samplot.get_range_hit(ranges, 'chr8', 100))\n        self.assertEqual(None, samplot.get_range_hit(ranges, 'chr8', 10000))\n    #}}}\n\n    #{{{ def test_map_genome_point_to_range_points(self):\n    def test_map_genome_point_to_range_points(self):\n        gi_0 = samplot.genome_interval('chr8', 100, 200)\n        ranges = [gi_0]\n\n        self.assertEqual(None,\n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  10))\n        self.assertEqual(0.0, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  100))\n        self.assertEqual(0.25, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  125))\n        self.assertEqual(0.5, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  150))\n        self.assertEqual(0.75, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  175))\n        self.assertEqual(1.0, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  200))\n        self.assertEqual(None,\n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  201))\n\n        gi_1 = samplot.genome_interval('chr8', 300, 400)\n        ranges = [gi_0, gi_1]\n\n        self.assertEqual(None,\n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  10))\n        self.assertEqual(0.0, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  100))\n        self.assertEqual(0.25/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  125))\n        self.assertEqual(0.5/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  150))\n        self.assertEqual(0.75/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  175))\n        self.assertEqual(1.0/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  200))\n        self.assertEqual(None,\n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  201))\n        self.assertEqual(0.5, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  300))\n        self.assertEqual(0.5+0.25/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  325))\n        self.assertEqual(0.5+0.5/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  350))\n        self.assertEqual(0.5+0.75/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  375))\n        self.assertEqual(1.0, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  400))\n        gi_0 = samplot.genome_interval('chr8', 100, 200)\n        gi_1 = samplot.genome_interval('chr9', 300, 400)\n        ranges = [gi_0, gi_1]\n\n        self.assertEqual(None,\n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  10))\n        self.assertEqual(0.0, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  100))\n        self.assertEqual(0.25/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  125))\n        self.assertEqual(0.5/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  150))\n        self.assertEqual(0.75/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  175))\n        self.assertEqual(1.0/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  200))\n        self.assertEqual(None,\n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr8',\n                                                                  201))\n        self.assertEqual(0.5, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr9',\n                                                                  300))\n        self.assertEqual(0.5+0.25/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr9',\n                                                                  325))\n        self.assertEqual(0.5+0.5/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr9',\n                                                                  350))\n        self.assertEqual(0.5+0.75/2, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr9',\n                                                                  375))\n        self.assertEqual(1.0, \n                         samplot.map_genome_point_to_range_points(ranges,\n                                                                  'chr9',\n                                                                  400))\n    #}}}\n\n#}}}\n\n#{{{ class Test_long_read_plan(unittest.TestCase):\nclass Test_long_read_plan(unittest.TestCase):\n    #{{{ def test_init(self):\n    def test_add_align_step(self):\n        alignment = samplot.Alignment('chr8', 100, 500, True, 0)\n\n        # both are in the same range\n        gi_0 = samplot.genome_interval('chr8', 100, 1000)\n        ranges = [gi_0]\n        steps = []\n\n        samplot.add_align_step(alignment, steps, ranges)\n\n        self.assertEqual(1, len(steps))\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(500, steps[0].end_pos.start)\n        self.assertEqual(500, steps[0].end_pos.end)\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        # in different ranges\n        gi_0 = samplot.genome_interval('chr8', 100, 200)\n        gi_1 = samplot.genome_interval('chr8', 300, 1000)\n        ranges = [gi_0, gi_1]\n        steps = []\n\n        samplot.add_align_step(alignment, steps, ranges)\n\n        self.assertEqual(2, len(steps))\n        #start\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        #end\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(200, steps[0].end_pos.start)\n        self.assertEqual(200, steps[0].end_pos.end)\n        #event\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        #start\n        self.assertEqual('chr8', steps[1].start_pos.chrm)\n        self.assertEqual(300, steps[1].start_pos.start)\n        self.assertEqual(300, steps[1].start_pos.end)\n        #end\n        self.assertEqual('chr8', steps[1].end_pos.chrm)\n        self.assertEqual(500, steps[1].end_pos.start)\n        self.assertEqual(500, steps[1].end_pos.end)\n        #event\n        self.assertEqual('Align', steps[1].info['TYPE'])\n\n        # start is not in range, use end hit\n        gi_0 = samplot.genome_interval('chr8', 10, 20)\n        gi_1 = samplot.genome_interval('chr8', 300, 1000)\n        ranges = [gi_0, gi_1]\n        steps = []\n\n        samplot.add_align_step(alignment, steps, ranges)\n\n        self.assertEqual(1, len(steps))\n        #start\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(300, steps[0].start_pos.start)\n        self.assertEqual(300, steps[0].start_pos.end)\n        #end\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(500, steps[0].end_pos.start)\n        self.assertEqual(500, steps[0].end_pos.end)\n        #event\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        # end is not in range, use start hit\n        gi_0 = samplot.genome_interval('chr8', 100, 200)\n        gi_1 = samplot.genome_interval('chr8', 3000, 4000)\n        ranges = [gi_0, gi_1]\n        steps = []\n\n        samplot.add_align_step(alignment, steps, ranges)\n\n        #start\n        self.assertEqual(1, len(steps))\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        #end\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(200, steps[0].end_pos.end)\n        self.assertEqual(200, steps[0].end_pos.start)\n        #event\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        # neither end is in range, add nothing\n        gi_0 = samplot.genome_interval('chr8', 10, 20)\n        gi_1 = samplot.genome_interval('chr8', 3000, 4000)\n        ranges = [gi_0, gi_1]\n        steps = []\n\n        samplot.add_align_step(alignment, steps, ranges)\n\n        self.assertEqual(0, len(steps))\n    #}}}\n     \n    #{{{def test_get_alignments_from_cigar(self):\n    def test_get_alignments_from_cigar(self):\n        '''\n        alignments = get_alignments_from_cigar(\n                bam_file.get_reference_name(read.reference_id),\n                read.pos,\n                not read.is_reverse,\n                read.cigartuples)\n        '''\n        CIGAR_MAP = { 'M' : 0,\n                      'I' : 1,\n                      'D' : 2,\n                      'N' : 3,\n                      'S' : 4,\n                      'H' : 5,\n                      'P' : 6,\n                      '=' : 7,\n                      'X' : 8,\n                      'B' : 9 }\n\n        cigar = [(CIGAR_MAP['M'], 100),\n                 (CIGAR_MAP['D'], 100),\n                 (CIGAR_MAP['M'], 100)]\n        alignments = samplot.get_alignments_from_cigar('chr8',\n                                                       100,\n                                                       True,\n                                                       cigar)\n        self.assertEqual(2,len(alignments))\n\n        self.assertEqual('chr8', alignments[0].pos.chrm)\n        self.assertEqual(100, alignments[0].pos.start)\n        self.assertEqual(200, alignments[0].pos.end)\n        self.assertEqual(True, alignments[0].strand)\n        self.assertEqual(0, alignments[0].query_position)\n\n        self.assertEqual('chr8', alignments[1].pos.chrm)\n        self.assertEqual(300, alignments[1].pos.start)\n        self.assertEqual(400, alignments[1].pos.end)\n        self.assertEqual(True, alignments[1].strand)\n        self.assertEqual(100, alignments[1].query_position)\n    #}}}\n\n    #{{{def test_get_long_read_plan(self):\n    def test_get_long_read_plan(self):\n        gi_0 = samplot.genome_interval('chr8', 100, 250)\n        gi_1 = samplot.genome_interval('chr8', 300, 400)\n        ranges = [gi_0, gi_1]\n        long_reads = {}\n        read_name = 'Test'\n        alignments = [samplot.Alignment('chr8', 100, 200, True, 0)]\n        long_reads[read_name] = [ samplot.LongRead(alignments) ]\n\n\n        max_gap, steps = samplot.get_long_read_plan(read_name,\n                                                    long_reads,\n                                                    ranges)\n\n        self.assertEqual(0, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(200, steps[0].end_pos.start)\n        self.assertEqual(200, steps[0].end_pos.end)\n        self.assertEqual('LONGREAD', steps[0].event)\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        alignments = [samplot.Alignment('chr8', 100, 299, True, 0)]\n        long_reads[read_name] = [ samplot.LongRead(alignments) ]\n        max_gap, steps = samplot.get_long_read_plan(read_name,\n                                                    long_reads,\n                                                    ranges)\n\n\n        self.assertEqual(0, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(250, steps[0].end_pos.start)\n        self.assertEqual(250, steps[0].end_pos.end)\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n\n        alignments = [samplot.Alignment('chr8', 100, 350, True, 0)]\n        long_reads[read_name] = [ samplot.LongRead(alignments) ]\n        max_gap, steps = samplot.get_long_read_plan(read_name,\n                                                    long_reads,\n                                                    ranges)\n\n        self.assertEqual(0, max_gap)\n        self.assertEqual(2, len(steps))\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(250, steps[0].end_pos.start)\n        self.assertEqual(250, steps[0].end_pos.end)\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        self.assertEqual('chr8', steps[1].start_pos.chrm)\n        self.assertEqual(300, steps[1].start_pos.start)\n        self.assertEqual(300, steps[1].start_pos.end)\n        self.assertEqual('chr8', steps[1].end_pos.chrm)\n        self.assertEqual(350, steps[1].end_pos.start)\n        self.assertEqual(350, steps[1].end_pos.end)\n        self.assertEqual('Align', steps[1].info['TYPE'])\n\n        alignments = [samplot.Alignment('chr8', 100, 250, True, 0),\n                      samplot.Alignment('chr8', 300, 350, True, 150)]\n        long_reads[read_name] = [ samplot.LongRead(alignments) ]\n        max_gap, steps = samplot.get_long_read_plan(read_name,\n                                                    long_reads,\n                                                    ranges)\n\n        self.assertEqual(50, max_gap)\n        self.assertEqual(3, len(steps))\n\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(250, steps[0].end_pos.start)\n        self.assertEqual(250, steps[0].end_pos.end)\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        self.assertEqual('chr8', steps[1].start_pos.chrm)\n        self.assertEqual(250, steps[1].start_pos.start)\n        self.assertEqual(250, steps[1].start_pos.end)\n        self.assertEqual('chr8', steps[1].end_pos.chrm)\n        self.assertEqual(300, steps[1].end_pos.start)\n        self.assertEqual(300, steps[1].end_pos.end)\n        self.assertEqual('Deletion', steps[1].info['TYPE'])\n\n        self.assertEqual('chr8', steps[2].start_pos.chrm)\n        self.assertEqual(300, steps[2].start_pos.start)\n        self.assertEqual(300, steps[2].start_pos.end)\n        self.assertEqual('chr8', steps[2].end_pos.chrm)\n        self.assertEqual(350, steps[2].end_pos.start)\n        self.assertEqual(350, steps[2].end_pos.end)\n        self.assertEqual('Align', steps[2].info['TYPE'])\n\n        gi_0 = samplot.genome_interval('chr8', 100, 250)\n        gi_1 = samplot.genome_interval('chr9', 300, 400)\n        ranges = [gi_0, gi_1]\n \n        alignments = [samplot.Alignment('chr8', 100, 250, True, 0),\n                      samplot.Alignment('chr9', 300, 350, True, 150)]\n        long_reads[read_name] = [ samplot.LongRead(alignments) ]\n        max_gap, steps = samplot.get_long_read_plan(read_name,\n                                                    long_reads,\n                                                    ranges)\n\n        self.assertEqual(5000, max_gap)\n        self.assertEqual(3, len(steps))\n\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(250, steps[0].end_pos.start)\n        self.assertEqual(250, steps[0].end_pos.end)\n        self.assertEqual('Align', steps[0].info['TYPE'])\n\n        self.assertEqual('chr8', steps[1].start_pos.chrm)\n        self.assertEqual(250, steps[1].start_pos.start)\n        self.assertEqual(250, steps[1].start_pos.end)\n        self.assertEqual('chr9', steps[1].end_pos.chrm)\n        self.assertEqual(300, steps[1].end_pos.start)\n        self.assertEqual(300, steps[1].end_pos.end)\n        self.assertEqual('InterChrm', steps[1].info['TYPE'])\n\n        self.assertEqual('chr9', steps[2].start_pos.chrm)\n        self.assertEqual(300, steps[2].start_pos.start)\n        self.assertEqual(300, steps[2].start_pos.end)\n        self.assertEqual('chr9', steps[2].end_pos.chrm)\n        self.assertEqual(350, steps[2].end_pos.start)\n        self.assertEqual(350, steps[2].end_pos.end)\n        self.assertEqual('Align', steps[2].info['TYPE'])\n    #}}}\n#}}}\n\n#{{{class Test_annotation_plan(unittest.TestCase):\nclass Test_annotation_plan(unittest.TestCase):\n    #{{{def test_get_alignments_from_cigar(self):\n    def test_get_alignments_from_cigar(self):\n\n        gi_1 = samplot.genome_interval('chr8', 100, 200)\n        gi_2 = samplot.genome_interval('chr8', 300, 400)\n        ranges = [gi_1, gi_2]\n\n        i = samplot.genome_interval('chr8', 110, 120)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual('chr8',s.chrm)\n        self.assertEqual(110,s.start)\n        self.assertEqual(110,s.end)\n        self.assertEqual('chr8',e.chrm)\n        self.assertEqual(120,e.start)\n        self.assertEqual(120,e.end)\n\n\n        i = samplot.genome_interval('chr8', 110, 220)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual('chr8',s.chrm)\n        self.assertEqual(110,s.start)\n        self.assertEqual(110,s.end)\n        self.assertEqual('chr8',e.chrm)\n        self.assertEqual(200,e.start)\n        self.assertEqual(200,e.end)\n\n\n        i = samplot.genome_interval('chr8', 220, 320)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual('chr8',s.chrm)\n        self.assertEqual(300,s.start)\n        self.assertEqual(300,s.end)\n        self.assertEqual('chr8',e.chrm)\n        self.assertEqual(320,e.start)\n        self.assertEqual(320,e.end)\n\n\n        i = samplot.genome_interval('chr8', 120, 320)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual('chr8',s.chrm)\n        self.assertEqual(120,s.start)\n        self.assertEqual(120,s.end)\n        self.assertEqual('chr8',e.chrm)\n        self.assertEqual(320,e.start)\n        self.assertEqual(320,e.end)\n\n        i = samplot.genome_interval('chr8', 320, 520)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual('chr8',s.chrm)\n        self.assertEqual(320,s.start)\n        self.assertEqual(320,s.end)\n        self.assertEqual('chr8',e.chrm)\n        self.assertEqual(400,e.start)\n        self.assertEqual(400,e.end)\n\n\n        i = samplot.genome_interval('chr8', 30, 50)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual(None, s)\n        self.assertEqual(None, e)\n\n        i = samplot.genome_interval('chr8', 3000, 5000)\n        s, e = samplot.get_interval_range_plan_start_end(ranges, i)\n\n        self.assertEqual(None, s)\n        self.assertEqual(None, e)\n    #}}}\n#}}}\n\n#{{{class Test_splits(unittest.TestCase):\nclass Test_splits(unittest.TestCase):\n    #{{{def test_get_split_plan(self):\n    def test_get_split_plan(self):\n\n        splits = {}\n        hp = 0\n        splits[hp] = {}\n        read_name_1 = 'Test1'\n\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr8', 600, 800) ]\n\n        #both in same ragne\n        #Deletion\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False),\n                samplot.SplitRead('chr8', 170, 180, True, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(20, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('Deletion', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(150, steps[0].start_pos.start)\n        self.assertEqual(150, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(170, steps[0].end_pos.start)\n        self.assertEqual(170, steps[0].end_pos.end)\n\n        #Duplication\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False),\n                samplot.SplitRead('chr8', 130, 180, True, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(20, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('Duplication', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(150, steps[0].start_pos.start)\n        self.assertEqual(150, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(130, steps[0].end_pos.start)\n        self.assertEqual(130, steps[0].end_pos.end)\n\n        #Inversion\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False),\n                samplot.SplitRead('chr8', 151, 180, False, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(30, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('Inversion', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(150, steps[0].start_pos.start)\n        self.assertEqual(150, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(151, steps[0].end_pos.start)\n        self.assertEqual(151, steps[0].end_pos.end)\n\n\n        #both in same ragne\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n        self.assertEqual(None, plan)\n\n        #both in same ragne\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 550, 650, True, 0, False, False),\n                samplot.SplitRead('chr8', 700, 750, True, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(50, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('Deletion', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(650, steps[0].start_pos.start)\n        self.assertEqual(650, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(700, steps[0].end_pos.start)\n        self.assertEqual(700, steps[0].end_pos.end)\n\n\n\n        #both in same ragne\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 150, 175, True, 0, False, False),\n                samplot.SplitRead('chr8', 650, 675, True, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(475, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('Deletion', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(175, steps[0].start_pos.start)\n        self.assertEqual(175, steps[0].start_pos.end)\n        self.assertEqual('chr8', steps[0].end_pos.chrm)\n        self.assertEqual(650, steps[0].end_pos.start)\n        self.assertEqual(650, steps[0].end_pos.end)\n\n\n        #inter chrom\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr9', 600, 800) ]\n\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 150, 175, True, 0, False, False),\n                samplot.SplitRead('chr9', 650, 675, True, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(samplot.INTERCHROM_YAXIS, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('InterChrm', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(175, steps[0].start_pos.start)\n        self.assertEqual(175, steps[0].start_pos.end)\n        self.assertEqual('chr9', steps[0].end_pos.chrm)\n        self.assertEqual(650, steps[0].end_pos.start)\n        self.assertEqual(650, steps[0].end_pos.end)\n\n\n        splits[hp][read_name_1] = [\\\n                samplot.SplitRead('chr8', 150, 175, True, 0, False, False),\n                samplot.SplitRead('chr9', 650, 675, False, 50, False, False)]\n\n        plan = samplot.get_split_plan(ranges, splits[hp][read_name_1])\n\n        max_gap, steps = plan\n\n        self.assertEqual(samplot.INTERCHROM_YAXIS, max_gap)\n        self.assertEqual(1, len(steps))\n        self.assertEqual('SPLITREAD', steps[0].event)\n        self.assertEqual('InterChrmInversion', steps[0].info['TYPE'])\n        self.assertEqual('chr8', steps[0].start_pos.chrm)\n        self.assertEqual(175, steps[0].start_pos.start)\n        self.assertEqual(175, steps[0].start_pos.end)\n        self.assertEqual('chr9', steps[0].end_pos.chrm)\n        self.assertEqual(650, steps[0].end_pos.start)\n        self.assertEqual(650, steps[0].end_pos.end)\n    #}}}\n\n    #{{{def test_get_splits_plan(self):\n    def test_get_splits_plan(self):\n\n        splits = {}\n        hp = 0\n        splits[hp] = {}\n\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr9', 600, 800) ]\n\n        #Deletion\n        splits[hp]['del'] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False),\n                samplot.SplitRead('chr8', 170, 180, True, 50, False, False)]\n\n        #Duplication\n        splits[hp]['dup'] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False),\n                samplot.SplitRead('chr8', 130, 180, True, 50, False, False)]\n\n        #Inversion\n        splits[hp]['inv'] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False),\n                samplot.SplitRead('chr8', 151, 180, False, 50, False, False)]\n\n        #Bad split\n        splits[hp]['bad'] = [\\\n                samplot.SplitRead('chr8', 100, 150, True, 0, False, False)]\n\n\n        #Interchm\n        splits[hp]['interchm'] = [\\\n                samplot.SplitRead('chr8', 150, 175, True, 0, False, False),\n                samplot.SplitRead('chr9', 650, 675, True, 50, False, False)]\n\n        #InterchmInv\n        splits[hp]['interchminv'] = [\\\n                samplot.SplitRead('chr8', 150, 175, True, 0, False, False),\n                samplot.SplitRead('chr9', 650, 675, False, 50, False, False)]\n\n        plan = samplot.get_splits_plan(ranges, splits[hp])\n\n        max_gap, steps = plan\n\n        self.assertEqual(samplot.INTERCHROM_YAXIS, max_gap)\n        self.assertEqual(5, len(steps))\n    #}}}\n#}}}\n\n#{{{ class Test_pairs(unittest.TestCase):\nclass Test_pairs(unittest.TestCase):\n    #{{{ def test_get_pair_insert_size(self):\n    def test_get_pair_insert_size(self):\n\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr8', 600, 800) ]\n\n        pairs = {}\n        hp = 0\n        pairs[hp] = {}\n        read_name_1 = 'Test1'\n\n        #both in same ragne\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr8', 100, 150, True, False, False),\n                samplot.PairedEnd('chr8', 170, 180, False, False, False)]\n\n        read_name_2 = 'Test2'\n\n        pairs[hp][read_name_2] = [\\\n                samplot.PairedEnd('chr8', 100, 150, True, False, False),\n                samplot.PairedEnd('chr8', 170, 180, False, False, False)]\n        pair_insert_sizes = samplot.get_pairs_insert_sizes(ranges, pairs)\n\n        self.assertEqual(2, len(pair_insert_sizes))\n        self.assertEqual(80, pair_insert_sizes[0])\n        self.assertEqual(80, pair_insert_sizes[1])\n\n        #one starting in range ends out of range\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr8', 100, 150, True, False, False),\n                samplot.PairedEnd('chr8', 190, 240, False, False, False)]\n\n        pair_insert_sizes = samplot.get_pairs_insert_sizes(ranges, pairs)\n\n        self.assertEqual(2, len(pair_insert_sizes))\n        self.assertEqual(140, pair_insert_sizes[0])\n        self.assertEqual(80, pair_insert_sizes[1])\n\n\n        #one out of range\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr9', 100, 150, True, False, False),\n                samplot.PairedEnd('chr8', 190, 240, False, False, False)]\n\n        pair_insert_sizes = samplot.get_pairs_insert_sizes(ranges, pairs)\n\n        self.assertEqual(1, len(pair_insert_sizes))\n        self.assertEqual(80, pair_insert_sizes[0])\n\n\n        #DUP\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr8', 125, 150, True, False, False),\n                samplot.PairedEnd('chr8', 175, 200, False, False, False)]\n\n        pair_insert_sizes = samplot.get_pairs_insert_sizes(ranges, pairs)\n\n        self.assertEqual(2, len(pair_insert_sizes))\n        self.assertEqual(75, pair_insert_sizes[0])\n        self.assertEqual(80, pair_insert_sizes[1])\n\n\n        #INV\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr8', 125, 150, True, False, False),\n                samplot.PairedEnd('chr8', 175, 200, True, False, False)]\n\n        pair_insert_sizes = samplot.get_pairs_insert_sizes(ranges, pairs)\n\n        self.assertEqual(2, len(pair_insert_sizes))\n        self.assertEqual(75, pair_insert_sizes[0])\n        self.assertEqual(80, pair_insert_sizes[1])\n\n        #interchrm\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr9', 600, 800) ]\n\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr8', 125, 150, True, False, False),\n                samplot.PairedEnd('chr9', 675, 700, True, False, False)]\n\n        pair_insert_sizes = samplot.get_pairs_insert_sizes(ranges, pairs)\n\n        self.assertEqual(2, len(pair_insert_sizes))\n        self.assertEqual(samplot.INTERCHROM_YAXIS, pair_insert_sizes[0])\n        self.assertEqual(80, pair_insert_sizes[1])\n\n    #}}}\n\n    #{{{ def test_get_pair_plan(self):\n    def test_get_pair_plan(self):\n\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr8', 600, 800) ]\n\n        pairs = {}\n        hp = 0\n        pairs[hp] = {}\n        read_name_1 = 'Test1'\n\n        #both in same ragne\n        pairs[hp][read_name_1] = [\\\n                samplot.PairedEnd('chr8', 100, 150, False, False, False),\n                samplot.PairedEnd('chr8', 170, 180, True, False, False)]\n\n        read_name_2 = 'Test2'\n\n        pairs[hp][read_name_2] = [\\\n                samplot.PairedEnd('chr8', 100, 150, False, False, False),\n                samplot.PairedEnd('chr8', 170, 180, True, False, False)]\n\n        max_event, steps = samplot.get_pairs_plan(ranges, pairs[hp])\n\n        self.assertEqual(80, max_event)\n        self.assertEqual(2, len(steps))\n    #}}}\n#}}}\n\n#{{{ class Test_linked(unittest.TestCase):\nclass Test_linked(unittest.TestCase):\n    #{{{def test_get_split_insert_size(self):\n    def test_get_linked_plan(self):\n\n        ranges = [samplot.genome_interval('chr8', 100, 200),\n                  samplot.genome_interval('chr8', 600, 800) ]\n\n        pairs = {}\n        hp = 0\n        pairs[hp] = {}\n\n        pairs[hp]['PE_1'] = [\\\n                samplot.PairedEnd('chr8', 100, 150, False, False, False),\n                samplot.PairedEnd('chr8', 170, 180, True, False, False)]\n\n        pairs[hp]['PE_2'] = [\\\n                samplot.PairedEnd('chr8', 110, 160, False, False, False),\n                samplot.PairedEnd('chr8', 680, 690, True, False, False)]\n\n        splits = {}\n        splits[hp] = {}\n\n        splits[hp]['SR_1'] = [\\\n                samplot.SplitRead('chr8', 155, 160, True, 0, False, False),\n                samplot.SplitRead('chr8', 670, 675, True, 50, False, False)]\n\n        linked_reads = {}\n        linked_reads[hp] = {}\n\n        MI = 5\n        linked_reads[hp][MI] = [[],[]]\n\n        linked_reads[hp][MI][0].append('PE_1')\n        linked_reads[hp][MI][0].append('PE_2')\n        linked_reads[hp][MI][1].append('SR_1')\n\n\n        max_event, steps = samplot.get_linked_plan(ranges,\n                                                   pairs[hp],\n                                                   splits[hp],\n                                                   linked_reads[hp],\n                                                   MI)\n        self.assertEqual(580, max_event)\n        self.assertEqual(2, len(steps))\n        self.assertEqual(2, len(steps[0].info['PAIR_STEPS']))\n        self.assertEqual(1, len(steps[0].info['SPLIT_STEPS']))\n\n        self.assertEqual(100, steps[0].start_pos.start)\n        self.assertEqual(100, steps[0].start_pos.end)\n        self.assertEqual(ranges[0].end, steps[0].end_pos.start)\n        self.assertEqual(ranges[0].end, steps[0].end_pos.end)\n\n        self.assertEqual(ranges[1].start, steps[1].start_pos.start)\n        self.assertEqual(ranges[1].start, steps[1].start_pos.end)\n        self.assertEqual(690, steps[1].end_pos.start)\n        self.assertEqual(690, steps[1].end_pos.end)\n\n\n        self.assertEqual(100,steps[0].info['PAIR_STEPS'][0].start_pos.start)\n        self.assertEqual(100,steps[0].info['PAIR_STEPS'][0].start_pos.end)\n        self.assertEqual(180,steps[0].info['PAIR_STEPS'][0].end_pos.start)\n        self.assertEqual(180,steps[0].info['PAIR_STEPS'][0].end_pos.end)\n\n        self.assertEqual(110,steps[0].info['PAIR_STEPS'][1].start_pos.start)\n        self.assertEqual(110,steps[0].info['PAIR_STEPS'][1].start_pos.end)\n        self.assertEqual(690,steps[0].info['PAIR_STEPS'][1].end_pos.start)\n        self.assertEqual(690,steps[0].info['PAIR_STEPS'][1].end_pos.end)\n\n\n        self.assertEqual(160,steps[0].info['SPLIT_STEPS'][0].start_pos.start)\n        self.assertEqual(160,steps[0].info['SPLIT_STEPS'][0].start_pos.end)\n        self.assertEqual(670,steps[0].info['SPLIT_STEPS'][0].end_pos.start)\n        self.assertEqual(670,steps[0].info['SPLIT_STEPS'][0].end_pos.end)\n\n    #}}}\n#}}}\n\nif __name__ == '__main__':\n    unittest.main()\n"
  }
]