Copy disabled (too large)
Download .txt
Showing preview only (10,987K chars total). Download the full file to get everything.
Repository: broadinstitute/viral-ngs
Branch: main
Commit: 9e464d4c1e70
Files: 719
Total size: 10.3 MB
Directory structure:
gitextract_xof_5l96/
├── .agents/
│ └── skills/
│ ├── claude-on-vertex-ci/
│ │ └── SKILL.md
│ ├── container-vulns/
│ │ └── SKILL.md
│ ├── dsub-batch-jobs/
│ │ └── SKILL.md
│ └── regression-testing/
│ ├── SKILL.md
│ ├── compare_sample_pair.py
│ ├── discover_pairs.py
│ ├── generate_report.py
│ └── run_vadr.sh
├── .claude/
│ └── rules/
│ └── container-vulns.md
├── .codecov.yml
├── .dockerignore
├── .gitattributes
├── .github/
│ ├── actions/
│ │ ├── create-manifest/
│ │ │ └── action.yml
│ │ ├── pull-with-retry/
│ │ │ └── action.yml
│ │ └── setup-docker-build/
│ │ └── action.yml
│ ├── copilot-instructions.md
│ └── workflows/
│ ├── audit-quay-tags.yml
│ ├── cleanup-images.yml
│ ├── container-scan.yml
│ ├── docker.yml
│ └── docs.yml
├── .gitignore
├── .readthedocs.yml
├── .trivy-ignore-policy.rego
├── .trivyignore
├── AGENTS.md
├── CLAUDE.md
├── LICENSE
├── README.md
├── docker/
│ ├── Dockerfile.assemble
│ ├── Dockerfile.baseimage
│ ├── Dockerfile.classify
│ ├── Dockerfile.core
│ ├── Dockerfile.mega
│ ├── Dockerfile.phylo
│ ├── install-conda-deps.sh
│ ├── requirements/
│ │ ├── assemble-x86.txt
│ │ ├── assemble.txt
│ │ ├── baseimage.txt
│ │ ├── classify-x86.txt
│ │ ├── classify.txt
│ │ ├── core-x86.txt
│ │ ├── core.txt
│ │ ├── phylo-x86.txt
│ │ └── phylo.txt
│ └── scripts/
│ ├── calc_mem.py
│ └── fasta-trim-terminal-ambigs.pl
├── docs/
│ ├── Makefile
│ ├── assembly.rst
│ ├── broad_utils.rst
│ ├── cmdline.rst
│ ├── conf.py
│ ├── description.rst
│ ├── file_utils.rst
│ ├── illumina.rst
│ ├── index.rst
│ ├── interhost.rst
│ ├── intrahost.rst
│ ├── kmer_utils.rst
│ ├── metagenomics.rst
│ ├── ncbi.rst
│ ├── read_utils.rst
│ ├── reports.rst
│ ├── requirements.txt
│ └── taxon_filter.rst
├── pyproject.toml
├── src/
│ └── viral_ngs/
│ ├── __init__.py
│ ├── assemble/
│ │ ├── __init__.py
│ │ ├── freebayes.py
│ │ ├── gap2seq.py
│ │ ├── mafft.py
│ │ ├── mummer.py
│ │ ├── muscle.py
│ │ ├── rasusa.py
│ │ ├── skani.py
│ │ ├── spades.py
│ │ ├── vcf.py
│ │ └── wgsim.py
│ ├── assembly.py
│ ├── broad_utils.py
│ ├── classify/
│ │ ├── __init__.py
│ │ ├── blast.py
│ │ ├── bmtagger.py
│ │ ├── kb.py
│ │ ├── kma.py
│ │ ├── kmc.py
│ │ ├── kraken2.py
│ │ ├── krona.py
│ │ ├── last.py
│ │ └── taxonomy.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── bbmap.py
│ │ ├── bwa.py
│ │ ├── cdhit.py
│ │ ├── cmd.py
│ │ ├── errors.py
│ │ ├── fastqc.py
│ │ ├── file.py
│ │ ├── illumina_indices.py
│ │ ├── illumina_utils.py
│ │ ├── minimap2.py
│ │ ├── misc.py
│ │ ├── mvicuna.py
│ │ ├── novoalign.py
│ │ ├── picard.py
│ │ ├── prinseq.py
│ │ ├── priorities.py
│ │ ├── sambamba.py
│ │ ├── samtools.py
│ │ ├── splitcode.py
│ │ ├── stats.py
│ │ ├── trimmomatic.py
│ │ └── version.py
│ ├── file_utils.py
│ ├── illumina.py
│ ├── interhost.py
│ ├── intrahost.py
│ ├── kmer_utils.py
│ ├── metagenomics.py
│ ├── ncbi.py
│ ├── phylo/
│ │ ├── __init__.py
│ │ ├── feature_table.py
│ │ ├── feature_table_types.py
│ │ ├── genbank.py
│ │ ├── mafft.py
│ │ ├── mummer.py
│ │ ├── muscle.py
│ │ ├── snpeff.py
│ │ ├── vcf.py
│ │ └── vphaser2.py
│ ├── py.typed
│ ├── read_utils.py
│ ├── reports.py
│ └── taxon_filter.py
└── tests/
├── __init__.py
├── conftest.py
├── input/
│ ├── 5kb_human_from_chr6.fasta
│ ├── G5012.3.fasta
│ ├── G5012.3.mini.bam
│ ├── G5012.3.subset.bam
│ ├── G5012.3.testreads.bam
│ ├── README.md
│ ├── TestAssembleSpades/
│ │ ├── clipDb.fasta
│ │ └── trinity_contigs.fasta
│ ├── TestBamFilter/
│ │ ├── expected.bam
│ │ └── input.bam
│ ├── TestBlastnDbBuild/
│ │ └── expected/
│ │ ├── TestBlastnDbBuild.nhr
│ │ ├── TestBlastnDbBuild.nin
│ │ └── TestBlastnDbBuild.nsq
│ ├── TestBmtagger/
│ │ ├── expected.Match.1.fastq
│ │ ├── expected.Match.2.fastq
│ │ ├── expected.NoMatch.1.fastq
│ │ ├── expected.NoMatch.2.fastq
│ │ ├── humanChr1Subset.bitmask
│ │ ├── humanChr1Subset.fa
│ │ ├── humanChr9Subset.bitmask
│ │ ├── humanChr9Subset.fa
│ │ ├── in1.fastq
│ │ └── in2.fastq
│ ├── TestBmtaggerDbBuild/
│ │ └── expected/
│ │ ├── TestBmtaggerDbBuild.bitmask
│ │ ├── TestBmtaggerDbBuild.srprism.amp
│ │ ├── TestBmtaggerDbBuild.srprism.idx.md5
│ │ ├── TestBmtaggerDbBuild.srprism.imp
│ │ ├── TestBmtaggerDbBuild.srprism.map.md5
│ │ ├── TestBmtaggerDbBuild.srprism.pmp
│ │ ├── TestBmtaggerDbBuild.srprism.rmp
│ │ ├── TestBmtaggerDbBuild.srprism.ss.md5
│ │ ├── TestBmtaggerDbBuild.srprism.ssa
│ │ └── TestBmtaggerDbBuild.srprism.ssd
│ ├── TestDepleteBlastnBam/
│ │ ├── expected.sam
│ │ ├── humanChr1Subset.fa
│ │ ├── humanChr9Subset.fa
│ │ └── in.bam
│ ├── TestDepleteHuman/
│ │ ├── aligned-expected/
│ │ │ ├── test-reads.blastn.bam
│ │ │ ├── test-reads.bmtagger.bam
│ │ │ ├── test-reads.bwa.bam
│ │ │ ├── test-reads.revert.bam
│ │ │ ├── test-reads.rmdup.bam
│ │ │ └── test-reads.taxfilt.imperfect.bam
│ │ ├── expected/
│ │ │ ├── test-reads.blastn.bam
│ │ │ ├── test-reads.bmtagger.bam
│ │ │ ├── test-reads.bwa.bam
│ │ │ ├── test-reads.revert.bam
│ │ │ ├── test-reads.rmdup.bam
│ │ │ ├── test-reads.taxfilt.bam
│ │ │ ├── test-reads.taxfilt.imperfect-2.bam
│ │ │ └── test-reads.taxfilt.imperfect.bam
│ │ ├── partial_pan-viral-9seqs-with-human-random-subset.fasta
│ │ ├── test-reads-aligned.bam
│ │ ├── test-reads-human.bam
│ │ └── test-reads.bam
│ ├── TestDifficultSampleNames/
│ │ ├── RunInfo.xml
│ │ ├── SampleSheet-inline-commas-strings.csv
│ │ └── SampleSheet.csv
│ ├── TestFastaFetch/
│ │ ├── JQ610675.1.fa
│ │ ├── JQ610675.1.fasta
│ │ ├── JQ610676.1.fa
│ │ ├── JQ610676.1.fasta
│ │ ├── JQ610677.1.fa
│ │ ├── JQ610677.1.fasta
│ │ ├── JQ610678.1.fa
│ │ ├── JQ610678.1.fasta
│ │ ├── JQ610679.1.fa
│ │ ├── JQ610679.1.fasta
│ │ ├── JQ610680.1.fa
│ │ ├── JQ610680.1.fasta
│ │ ├── JQ610681.1.fa
│ │ ├── JQ610681.1.fasta
│ │ ├── JQ610682.1.fa
│ │ ├── JQ610682.1.fasta
│ │ ├── JQ610683.1.fa
│ │ ├── JQ610683.1.fasta
│ │ ├── JQ610684.1.fa
│ │ ├── JQ610684.1.fasta
│ │ ├── orungo.fa
│ │ └── orungo.fasta
│ ├── TestFastqBam/
│ │ ├── expected.fastq1
│ │ ├── expected.java1_7.sam
│ │ ├── expected.java1_8.sam
│ │ ├── expected.java1_8_v1.5.sam
│ │ ├── in1.fastq
│ │ ├── in2.fastq
│ │ └── inHeader.txt
│ ├── TestFeatureReader/
│ │ ├── GU481072.1.tbl
│ │ ├── GU481073.1.tbl
│ │ ├── KM821772.1.tbl
│ │ ├── KM821773.1.tbl
│ │ ├── LC889323.1.tbl
│ │ ├── NC_026438.1.tbl
│ │ ├── test1-S.tbl
│ │ └── test2-L.tbl
│ ├── TestFeatureTableFetch/
│ │ ├── JQ610675.1.table
│ │ ├── JQ610675.1.tbl
│ │ ├── JQ610676.1.table
│ │ ├── JQ610676.1.tbl
│ │ ├── JQ610677.1.table
│ │ ├── JQ610677.1.tbl
│ │ ├── JQ610678.1.table
│ │ ├── JQ610678.1.tbl
│ │ ├── JQ610679.1.table
│ │ ├── JQ610679.1.tbl
│ │ ├── JQ610680.1.table
│ │ ├── JQ610680.1.tbl
│ │ ├── JQ610681.1.table
│ │ ├── JQ610681.1.tbl
│ │ ├── JQ610682.1.table
│ │ ├── JQ610682.1.tbl
│ │ ├── JQ610683.1.table
│ │ ├── JQ610683.1.tbl
│ │ ├── JQ610684.1.table
│ │ ├── JQ610684.1.tbl
│ │ ├── orungo.table
│ │ └── orungo.tbl
│ ├── TestFeatureTransfer/
│ │ ├── adenovirus_truncated/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── internal_partials/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── lasv/
│ │ │ ├── expected/
│ │ │ │ ├── LASV_NGA_2018_0026-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0026-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0097-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0097-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0541-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0541-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0611-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0611-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0664-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0664-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0959-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0959-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0998-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0998-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1024-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1024-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1079-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1079-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1177-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1177-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1375-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1375-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1381-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1381-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1392-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1392-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1643-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1643-2.tbl
│ │ │ │ └── test.tbl
│ │ │ └── input/
│ │ │ ├── KM821997.1.tbl
│ │ │ ├── KM821998.1.tbl
│ │ │ ├── align_mafft-ref-lasv-ISTH2376_1.fasta
│ │ │ ├── align_mafft-ref-lasv-ISTH2376_2.fasta
│ │ │ └── ref-lasv-ISTH2376.fasta
│ │ ├── negative_strand_partial/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── synthetic/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── synthetic_ignore_ambig_edges/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ └── synthetic_oob_clip/
│ │ ├── expected/
│ │ │ └── mapped.tbl
│ │ └── input/
│ │ ├── aligned_1.fasta
│ │ ├── ref.fasta
│ │ └── ref.tbl
│ ├── TestFilterLastal/
│ │ ├── expected.fastq
│ │ └── in.fastq
│ ├── TestGap2Seq/
│ │ └── expected.ebov.doublehit.gapfill.fasta
│ ├── TestGenbankRecordFetch/
│ │ ├── JQ610675.1.gb
│ │ ├── JQ610675.1.gbk
│ │ ├── JQ610676.1.gb
│ │ ├── JQ610676.1.gbk
│ │ ├── JQ610677.1.gb
│ │ ├── JQ610677.1.gbk
│ │ ├── JQ610678.1.gb
│ │ ├── JQ610678.1.gbk
│ │ ├── JQ610679.1.gb
│ │ ├── JQ610679.1.gbk
│ │ ├── JQ610680.1.gb
│ │ ├── JQ610680.1.gbk
│ │ ├── JQ610681.1.gb
│ │ ├── JQ610681.1.gbk
│ │ ├── JQ610682.1.gb
│ │ ├── JQ610682.1.gbk
│ │ ├── JQ610683.1.gb
│ │ ├── JQ610683.1.gbk
│ │ ├── JQ610684.1.gb
│ │ ├── JQ610684.1.gbk
│ │ ├── orungo.gb
│ │ └── orungo.gbk
│ ├── TestIlluminaBarcodeHelper/
│ │ ├── ambiguous/
│ │ │ ├── barcodes.txt
│ │ │ ├── expected.txt
│ │ │ └── metrics.txt
│ │ ├── few_assigned/
│ │ │ ├── barcodes.txt
│ │ │ └── metrics.txt
│ │ ├── one_correction/
│ │ │ ├── barcodes.txt
│ │ │ ├── expected.txt
│ │ │ └── metrics.txt
│ │ ├── single_index/
│ │ │ ├── barcodes.txt
│ │ │ ├── expected.txt
│ │ │ └── metrics.txt
│ │ └── single_index_i5_only/
│ │ ├── barcodes.txt
│ │ ├── expected.txt
│ │ └── metrics.txt
│ ├── TestIlluminaDir/
│ │ ├── bcl-indented.tgz
│ │ ├── bcl-plain.tar.bz2
│ │ ├── bcl-plain.tar.lz4
│ │ ├── bcl-plain.tgz
│ │ └── empty_dir/
│ │ └── Data/
│ │ └── Intensities/
│ │ └── BaseCalls/
│ │ └── README
│ ├── TestImputeFromReference/
│ │ ├── contigs.sub.ebov.fasta
│ │ ├── expected.hhv3.mummer.fasta
│ │ ├── expected.hhv3.muscle.fasta
│ │ ├── expected.sub.ebov.impute.fasta
│ │ ├── ref.sub.ebov.fasta
│ │ └── test.pseudo.fasta
│ ├── TestKMA/
│ │ └── ref.fasta
│ ├── TestKbPython/
│ │ ├── palmdb.corona.idx
│ │ ├── palmdb_clustered_t2g.txt
│ │ └── palmdb_rdrp_seqs.corona.fa
│ ├── TestKmers/
│ │ ├── ambig_bases.fasta
│ │ ├── filt.fasta
│ │ ├── palindromic_kmers.fasta
│ │ ├── simple.fasta
│ │ ├── simple.fasta.kmers.k4.txt
│ │ └── tcgaattt.fasta
│ ├── TestLastalDbBuild/
│ │ └── expected/
│ │ ├── TestLastalDbBuild.bck
│ │ ├── TestLastalDbBuild.des
│ │ ├── TestLastalDbBuild.prj
│ │ ├── TestLastalDbBuild.sds
│ │ ├── TestLastalDbBuild.ssp
│ │ ├── TestLastalDbBuild.suf.md5
│ │ └── TestLastalDbBuild.tis
│ ├── TestManualSnpCaller/
│ │ ├── indel.vcf.gz.tbi
│ │ └── output.fasta
│ ├── TestMetagenomicsSimple/
│ │ ├── db/
│ │ │ ├── library/
│ │ │ │ ├── Viruses/
│ │ │ │ │ ├── Bundibugyo_ebolavirus/
│ │ │ │ │ │ ├── GCF_000889155.1_ViralProj51245_genomic.fna
│ │ │ │ │ │ └── GCF_000889155.1_ViralProj51245_protein.faa
│ │ │ │ │ ├── Reston_ebolavirus/
│ │ │ │ │ │ ├── GCF_000854085.1_ViralProj15006_genomic.fna
│ │ │ │ │ │ └── GCF_000854085.1_ViralProj15006_protein.faa
│ │ │ │ │ ├── Sudan_ebolavirus/
│ │ │ │ │ │ ├── GCF_000855585.1_ViralProj15012_genomic.fna
│ │ │ │ │ │ └── GCF_000855585.1_ViralProj15012_protein.faa
│ │ │ │ │ ├── Tai_Forest_ebolavirus/
│ │ │ │ │ │ ├── GCF_000888475.1_ViralProj51257_genomic.fna
│ │ │ │ │ │ └── GCF_000888475.1_ViralProj51257_protein.faa
│ │ │ │ │ └── Zaire_ebolavirus/
│ │ │ │ │ ├── GCF_000848505.1_ViralProj14703_genomic.fna
│ │ │ │ │ └── GCF_000848505.1_ViralProj14703_protein.faa
│ │ │ │ └── prelim_map.txt
│ │ │ └── taxonomy/
│ │ │ ├── accession2taxid/
│ │ │ │ ├── nucl_est.accession2taxid
│ │ │ │ ├── nucl_gb.accession2taxid
│ │ │ │ ├── nucl_gss.accession2taxid
│ │ │ │ ├── nucl_wgs.accession2taxid
│ │ │ │ ├── pdb.accession2taxid
│ │ │ │ └── prot.accession2taxid
│ │ │ ├── delnodes.dmp
│ │ │ ├── gi_taxid_nucl.dmp
│ │ │ ├── gi_taxid_prot.dmp
│ │ │ ├── merged.dmp
│ │ │ ├── names.dmp
│ │ │ └── nodes.dmp
│ │ ├── test-reads.bam
│ │ ├── zaire_ebola.1.fastq
│ │ ├── zaire_ebola.2.fastq
│ │ └── zaire_ebola.bam
│ ├── TestMetagenomicsViralMix/
│ │ ├── db/
│ │ │ ├── library/
│ │ │ │ ├── Viruses/
│ │ │ │ │ ├── Enterovirus_C/
│ │ │ │ │ │ ├── GCF_000861165.1_ViralProj15288_genomic.fna
│ │ │ │ │ │ └── GCF_000861165.1_ViralProj15288_protein.faa
│ │ │ │ │ ├── Hepatitis_C_virus/
│ │ │ │ │ │ ├── GCF_000861845.1_ViralProj15432_genomic.fna
│ │ │ │ │ │ └── GCF_000861845.1_ViralProj15432_protein.faa
│ │ │ │ │ ├── Tomato_mosaic_virus/
│ │ │ │ │ │ ├── GCF_000853705.1_ViralProj14926_genomic.fna
│ │ │ │ │ │ └── GCF_000853705.1_ViralProj14926_protein.faa
│ │ │ │ │ └── partial_pan-viral-9seqs-with-human-random-subset.fna
│ │ │ │ └── prelim_map.txt
│ │ │ └── taxonomy/
│ │ │ ├── accession2taxid/
│ │ │ │ ├── nucl_est.accession2taxid
│ │ │ │ ├── nucl_gb.accession2taxid
│ │ │ │ ├── nucl_gss.accession2taxid
│ │ │ │ ├── nucl_wgs.accession2taxid
│ │ │ │ ├── pdb.accession2taxid
│ │ │ │ └── prot.accession2taxid
│ │ │ ├── delnodes.dmp
│ │ │ ├── gi_taxid_nucl.dmp
│ │ │ ├── gi_taxid_prot.dmp
│ │ │ ├── merged.dmp
│ │ │ ├── names.dmp
│ │ │ └── nodes.dmp
│ │ └── test-reads.bam
│ ├── TestMinimap2Idxstats/
│ │ ├── multi-viral-reads.bam
│ │ └── multi-viral-refs.fasta
│ ├── TestMiseqToBam/
│ │ ├── RunInfo.xml
│ │ └── SampleSheet.csv
│ ├── TestMvicuna/
│ │ ├── expected_pairedOut.1.fastq
│ │ ├── expected_pairedOut.2.fastq
│ │ ├── expected_unpairedOut.fastq
│ │ ├── in.1.fastq
│ │ └── in.2.fastq
│ ├── TestOrderAndOrient/
│ │ ├── contig.mummer3_fail_lasv.fasta
│ │ ├── contigs.ebov.doublehit.fasta
│ │ ├── contigs.ebov.fasta
│ │ ├── contigs.hhv3.fasta
│ │ ├── contigs.hiv.big_indel.fasta
│ │ ├── contigs.hiv.wrapped.fasta
│ │ ├── contigs.influenza.fasta
│ │ ├── contigs.lasv.fasta
│ │ ├── contigs.lasv.one_small.fasta
│ │ ├── expected.ebov.ambig.fasta
│ │ ├── expected.ebov.doublehit.fasta
│ │ ├── expected.ebov.small.fasta
│ │ ├── expected.hhv3.fasta
│ │ ├── expected.hiv.big_indel.alternates.fasta
│ │ ├── expected.hiv.big_indel.fasta
│ │ ├── expected.hiv.wrapped.fasta
│ │ ├── expected.influenza.fasta
│ │ ├── expected.lasv.ambig.fasta
│ │ ├── expected.lasv.fasta
│ │ ├── expected.lasv.promer.fasta
│ │ ├── expected.refsel.ebov.stats.tsv
│ │ ├── expected.refsel.lasv.stats.tsv
│ │ ├── ref.ebov.gin.fasta
│ │ ├── ref.ebov.lbr.fasta
│ │ ├── ref.ebov.makona_C15.fasta
│ │ ├── ref.ebov.sle.fasta
│ │ ├── ref.ebov.small.fasta
│ │ ├── ref.hhv3.fasta
│ │ ├── ref.hiv.fasta
│ │ ├── ref.influenza.fasta
│ │ ├── ref.lasv.BNI_Nig08_A19.fasta
│ │ ├── ref.lasv.ISTH2376.fasta
│ │ ├── ref.lasv.KGH_G502.fasta
│ │ ├── ref.lasv.nomatch.fasta
│ │ ├── ref.lasv.pinneo.fasta
│ │ └── refs.ebov.fasta
│ ├── TestOrderOrientAndImputeFromReference/
│ │ ├── contigs.influenza.fasta
│ │ ├── expected.influenza.impute.mafft.fasta
│ │ ├── expected.influenza.impute.mummer.fasta
│ │ ├── expected.influenza.impute.muscle.fasta
│ │ └── ref.influenza_partial.fasta
│ ├── TestPerSample/
│ │ ├── in.2libs.bam
│ │ ├── in.2libs3rgs.bam
│ │ ├── in.3libs.bam
│ │ ├── in.bam
│ │ ├── in.indels.bam
│ │ ├── in.oneunmapped.bam
│ │ ├── ref.fasta
│ │ ├── ref.fasta.fai
│ │ ├── ref.indels.fasta
│ │ ├── ref.indels.fasta.fai
│ │ ├── vphaser_one_sample_2libs_expected.txt
│ │ ├── vphaser_one_sample_3libs_expected.txt
│ │ ├── vphaser_one_sample_expected.txt
│ │ └── vphaser_one_sample_indels_expected.txt
│ ├── TestPurgeUnmated/
│ │ ├── expected1.fastq
│ │ ├── expected2.fastq
│ │ ├── in1.fastq
│ │ ├── in2.fastq
│ │ ├── in_sra1.fastq
│ │ └── in_sra2.fastq
│ ├── TestRefineAssembly/
│ │ ├── expected.ebov.refine1.fasta
│ │ ├── expected.ebov.refine1.freebayes.fasta
│ │ ├── expected.ebov.refine1.new.fasta
│ │ ├── expected.ebov.refine2.fasta
│ │ ├── expected.ebov.refine2.freebayes.fasta
│ │ └── impute.ebov.fasta
│ ├── TestRmdupUnaligned/
│ │ ├── expected.bam
│ │ └── input.bam
│ ├── TestRunInfo/
│ │ ├── RunInfo-hiseq.xml
│ │ ├── RunInfo-miseq.xml
│ │ ├── RunInfo-nextseq-1000-2000-p1.xml
│ │ ├── RunInfo-nextseq550.xml
│ │ ├── RunInfo-novaseq-x-plus.xml
│ │ ├── RunInfo-novaseq.xml
│ │ ├── RunInfo-novel-fcid-and-tilecount.xml
│ │ ├── RunInfo-novel-fcid.xml
│ │ └── RunInfo-novel-tile-count.xml
│ ├── TestSampleSheet/
│ │ ├── SampleSheet-AEHWY-subset.csv
│ │ ├── SampleSheet-custom-1.txt
│ │ ├── SampleSheet-custom-1_macos9-endings.txt
│ │ ├── SampleSheet-custom-1_win-endings.txt
│ │ ├── SampleSheet-custom-2.txt
│ │ ├── SampleSheet-custom-2_win-endings.tsv
│ │ ├── SampleSheet-custom-inner-barcodes-outer-collapse.tsv
│ │ ├── SampleSheet-hiseq-1.csv
│ │ ├── SampleSheet-in-Broad-MiSeq-Format_with_Picard_Block.csv
│ │ ├── SampleSheet-miseq-1.csv
│ │ ├── SampleSheet-submit-1.csv
│ │ ├── SampleSheet-submit-2.csv
│ │ ├── SampleSheet-submit-3.csv
│ │ └── SampleSheet-with-blanklines.csv
│ ├── TestSkaniReferenceSelection/
│ │ ├── RVA_DQ473496.1_Rhinovirus_A49.fa
│ │ ├── RVA_DQ473498.1_Rhinovirus_A10.fa
│ │ ├── RVA_DQ473499.1_Human_rhinovirus_A44.fa
│ │ ├── RVA_DQ473501.1_Rhinovirus_A34.fa
│ │ ├── RVA_DQ473507.1_Rhinovirus_A53.fa
│ │ ├── RVA_FJ445116.1_Human_rhinovirus_13_strain_ATCC_VR-1123.fa
│ │ ├── RVA_FJ445140.1_Human_rhinovirus_56_strain_ATCC_VR-1166.fa
│ │ ├── RVA_FJ445177.1_Human_rhinovirus_9_strain_ATCC_VR-489.fa
│ │ ├── RVA_GQ223229.1_Human_rhinovirus_A_isolate_N13.fa
│ │ ├── RVA_L24917.1_Human_rhinovirus_type_16_polyprotein_gene.fa
│ │ └── USA-MA-Broad_BWH-19947-2023.l000013249603_C5.HTKJ7DRX3.1.acellular.dedup.assembly1-spades.fasta
│ ├── TestSnpEff/
│ │ ├── RBV16.fasta
│ │ ├── ann_eff.vcf.gz.tbi
│ │ ├── merged.vcf.gz.tbi
│ │ ├── msa.fasta
│ │ └── ref-rabies-JQ685920.fasta
│ ├── TestSplitReads/
│ │ ├── expected.fasta.01
│ │ ├── expected.fasta.02
│ │ ├── expected.fastq.01
│ │ ├── expected.fastq.02
│ │ ├── expected.fastq.03
│ │ ├── expected.fastq.1
│ │ ├── expected.fastq.2
│ │ ├── in.fasta
│ │ └── in.fastq
│ ├── TestSplitcodeDemuxFastqs/
│ │ ├── RunInfo.xml
│ │ ├── SampleSheet.csv
│ │ ├── samples_3bc.tsv
│ │ ├── samples_3bc_i5_rc_with_n.tsv
│ │ └── samples_3bc_i5_revcomp.tsv
│ ├── TestSplitcodeDemuxIntegration/
│ │ ├── RunInfo.xml
│ │ └── SampleSheet.tsv
│ ├── TestSplitcodeLookupTable/
│ │ ├── AAAAAAAA-TTTTTTTT.lLibA_summary.json
│ │ ├── ATCGATCG-GCTAGCTA.lB1_summary.json
│ │ ├── ATCGATCG-GCTAGCTA.lL1_summary.json
│ │ ├── GGGGGGGG-CCCCCCCC.lLibB_summary.json
│ │ ├── TTTTAAAA-CCCCGGGG.lB2_summary.json
│ │ ├── sample_sheet_basic.tsv
│ │ ├── sample_sheet_multi_pool.tsv
│ │ ├── sample_sheet_unique_lib_ids.tsv
│ │ └── sample_sheet_zero_reads.tsv
│ ├── TestTarballMerger/
│ │ ├── mixed-compressed-input/
│ │ │ ├── file2.tar.lz4
│ │ │ ├── file3.tar.zst
│ │ │ └── file4.tar.bz2
│ │ └── raw-input/
│ │ ├── file1
│ │ ├── file2
│ │ ├── file3
│ │ └── file4
│ ├── TestTaxonomy/
│ │ └── simple.m8
│ ├── TestToolKrakenExecute/
│ │ ├── empty-report.txt
│ │ ├── expected-kraken-mix.reads.txt
│ │ └── expected-kraken-mix.report.txt
│ ├── TestToolNovoalign/
│ │ └── ebov_reads.bam
│ ├── TestToolPicard/
│ │ ├── in.dict
│ │ ├── in.fasta
│ │ ├── messy-headers.fasta
│ │ └── simple.sam
│ ├── TestToolSamtools/
│ │ ├── in.fasta
│ │ ├── in.fasta.fai
│ │ ├── indel_cigar.sam
│ │ └── simple.sam
│ ├── TestTrimRmdupSubsamp/
│ │ └── clipDb.fasta
│ ├── TestTrimmomatic/
│ │ ├── clip.fasta
│ │ ├── empty.fastq
│ │ ├── expected1.fastq
│ │ ├── expected1.maxinfo.fastq
│ │ ├── expected2.fastq
│ │ ├── expected2.maxinfo.fastq
│ │ ├── in1.fastq
│ │ └── in2.fastq
│ ├── TestTsvJoin/
│ │ ├── expected-out.txt
│ │ ├── tab-1.txt
│ │ └── tab-2.txt
│ ├── TestUtilMisc/
│ │ ├── cfg1.yaml
│ │ ├── cfg2.yaml
│ │ ├── cfg_std.yaml
│ │ └── empty.yaml
│ ├── TestVPhaser2/
│ │ ├── expected.cpickle
│ │ ├── in.bam
│ │ └── in.bam.bai
│ ├── almost-empty-2.bam
│ ├── almost-empty.bam
│ ├── broken.bam
│ ├── ebola.fasta
│ ├── ebola.fasta.bz2
│ ├── ebola.fasta.lz4
│ ├── ebola.fasta.zst
│ ├── ebov-makona.fasta
│ ├── empty.bam
│ ├── empty.fasta
│ ├── one_gene.vcf.gz.tbi
│ ├── ref.lasv.fasta
│ └── s3/
│ └── sabeti-public-dbs/
│ ├── blast/
│ │ ├── hybsel_probe_adapters.fasta
│ │ ├── hybsel_probe_adapters.nhr
│ │ ├── hybsel_probe_adapters.nin
│ │ ├── hybsel_probe_adapters.nsq
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.dict
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.fasta.fai
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.nhr
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.nin
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.nix
│ │ └── metag_v3.ncRNA.mRNA.mitRNA.consensus.nsq
│ ├── bmtagger/
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.bitmask
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.amp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.idx
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.imp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.pmp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.rmp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.ss
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.ssa
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.ssd
│ │ ├── hg19.bitmask
│ │ ├── hg19.srprism.amp
│ │ ├── hg19.srprism.idx
│ │ ├── hg19.srprism.imp
│ │ ├── hg19.srprism.pmp
│ │ ├── hg19.srprism.rmp
│ │ ├── hg19.srprism.ss
│ │ ├── hg19.srprism.ssa
│ │ ├── hg19.srprism.ssd
│ │ ├── metagenomics_contaminants_v3.bitmask
│ │ ├── metagenomics_contaminants_v3.readme.txt
│ │ ├── metagenomics_contaminants_v3.srprism.amp
│ │ ├── metagenomics_contaminants_v3.srprism.idx
│ │ ├── metagenomics_contaminants_v3.srprism.imp
│ │ ├── metagenomics_contaminants_v3.srprism.pmp
│ │ ├── metagenomics_contaminants_v3.srprism.rmp
│ │ ├── metagenomics_contaminants_v3.srprism.ss
│ │ ├── metagenomics_contaminants_v3.srprism.ssa
│ │ └── metagenomics_contaminants_v3.srprism.ssd
│ ├── bwa/
│ │ ├── hg19.amb
│ │ ├── hg19.ann
│ │ ├── hg19.bwt
│ │ ├── hg19.pac
│ │ └── hg19.sa
│ ├── kaiju/
│ │ └── nr/
│ │ └── nr.fmi
│ ├── krakenuniq/
│ │ ├── database.idx
│ │ ├── database.jdb
│ │ ├── database.kdb
│ │ └── lca.complete
│ ├── krona/
│ │ └── taxonomy.tab
│ ├── rna_bwa/
│ │ ├── human_viral_rrna.amb
│ │ ├── human_viral_rrna.ann
│ │ ├── human_viral_rrna.bwt
│ │ ├── human_viral_rrna.pac
│ │ └── human_viral_rrna.sa
│ ├── spikeins/
│ │ └── ercc_spike-ins.fasta
│ ├── taxonomy/
│ │ ├── merged.dmp
│ │ ├── names.dmp
│ │ └── nodes.dmp
│ └── trim_clip/
│ └── contaminants.fasta
└── unit/
├── assemble/
│ ├── test_assembly.py
│ ├── test_assembly_integration.py
│ └── test_util_vcf.py
├── classify/
│ ├── __init__.py
│ ├── fixtures.py
│ ├── test_integration_kb.py
│ ├── test_integration_kraken2.py
│ ├── test_integration_taxon_filter.py
│ ├── test_kmer_utils.py
│ ├── test_metagenomics.py
│ ├── test_taxon_filter.py
│ ├── test_taxonomy.py
│ ├── test_tools_kb_python.py
│ ├── test_tools_kma.py
│ └── test_tools_krona.py
├── core/
│ ├── test_conftest.py
│ ├── test_file_utils.py
│ ├── test_illumina.py
│ ├── test_read_utils.py
│ ├── test_tools.py
│ ├── test_tools_bbmap.py
│ ├── test_tools_bwa.py
│ ├── test_tools_fastqc.py
│ ├── test_tools_minimap2.py
│ ├── test_tools_novoalign.py
│ ├── test_tools_picard.py
│ ├── test_tools_sambamba.py
│ ├── test_tools_samtools.py
│ ├── test_tools_splitcode.py
│ ├── test_tools_trimmomatic.py
│ ├── test_util_file.py
│ └── test_util_misc.py
└── phylo/
├── __init__.py
├── test_interhost.py
├── test_intrahost.py
├── test_ncbi.py
├── test_tools.py
├── test_tools_vphaser2.py
└── test_util_vcf.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .agents/skills/claude-on-vertex-ci/SKILL.md
================================================
# Claude on Vertex AI from GitHub Actions
Reusable infrastructure for invoking Claude (via [`anthropics/claude-code-action`](https://github.com/anthropics/claude-code-action))
on Google Vertex AI from GitHub Actions workflows in this repo. Authentication
is via Workload Identity Federation — no long-lived secrets.
## When to Use
Add a new Claude-in-CI use case (PR review, CVE triage, dependency-update
analysis, automated docs generation, etc.) when:
- The task is well-suited to an LLM with tool use (reading files, running CLI
commands, calling `gh`)
- The task fires on a GitHub event (schedule, workflow_dispatch, push, PR)
- You want an issue, PR comment, or artifact as the output
The first use case (and template) is the **CVE triage step in
`container-scan.yml`** — see `.agents/skills/container-vulns/SKILL.md`.
## What's Already Provisioned
**GCP project: `viral-seq-ai`**
| Resource | Identifier |
|---|---|
| Workload Identity Pool | `github-actions-pool` (global) |
| OIDC provider | `broadinstitute-github` |
| Provider attribute condition | `assertion.repository_owner == 'broadinstitute'` |
| Required APIs | `aiplatform.googleapis.com`, `iamcredentials.googleapis.com` |
**Service accounts (one per use case):**
| SA email | Use case | Workflow |
|---|---|---|
| `viral-ngs-cve-triage@viral-seq-ai.iam.gserviceaccount.com` | Weekly CVE triage | `.github/workflows/container-scan.yml` |
The pool + provider are reusable across use cases. Add a new SA per use case
(principle of least privilege; easier to audit and disable individually).
## Adding a New Claude-in-CI Use Case
You need GCP IAM Admin on `viral-seq-ai` for steps 1–3.
### 1. Create a service account scoped to the use case
```bash
gcloud iam service-accounts create <use-case-name> \
--project=viral-seq-ai \
--display-name="<Human-readable name>" \
--description="Used by <workflow-file>.yml to invoke Claude on Vertex AI for <purpose>"
```
### 2. Grant minimum roles
Vertex invocation requires both:
```bash
gcloud projects add-iam-policy-binding viral-seq-ai \
--member="serviceAccount:<use-case-name>@viral-seq-ai.iam.gserviceaccount.com" \
--role="roles/aiplatform.user" \
--condition=None
gcloud projects add-iam-policy-binding viral-seq-ai \
--member="serviceAccount:<use-case-name>@viral-seq-ai.iam.gserviceaccount.com" \
--role="roles/serviceusage.serviceUsageConsumer" \
--condition=None
```
### 3. Bind the GitHub repo to the SA via WIF
```bash
PROJECT_NUMBER=$(gcloud projects describe viral-seq-ai --format='value(projectNumber)')
gcloud iam service-accounts add-iam-policy-binding \
<use-case-name>@viral-seq-ai.iam.gserviceaccount.com \
--project=viral-seq-ai \
--role="roles/iam.workloadIdentityUser" \
--member="principalSet://iam.googleapis.com/projects/${PROJECT_NUMBER}/locations/global/workloadIdentityPools/github-actions-pool/attribute.repository/broadinstitute/viral-ngs"
```
For tighter scope, use `attribute.ref` (branch) or `attribute.workflow` (specific
workflow file) in place of (or in addition to) `attribute.repository`.
### 4. Set GitHub repo variables
If the new use case lives alongside the CVE triage one, you can reuse
`GCP_PROJECT_ID` and `GCP_WIP_PROVIDER`. The SA email differs per use case —
either create a use-case-specific variable (e.g., `GCP_PR_REVIEW_SA_EMAIL`) or
hard-code it in the workflow file.
```bash
gh variable set GCP_<USECASE>_SA_EMAIL \
--body "<use-case-name>@viral-seq-ai.iam.gserviceaccount.com" \
--repo broadinstitute/viral-ngs
```
### 5. Add the workflow steps
See the canonical pattern below.
## Canonical Workflow Pattern
```yaml
permissions:
id-token: write # for OIDC token to GCP via WIF
# plus whatever else your workflow needs (contents: read, issues: write, etc.)
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
# If Claude needs git history (`git log --grep`, `git show`), use 0.
# Otherwise the default shallow clone is fine.
fetch-depth: 0
- name: Authenticate to GCP via Workload Identity Federation
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ vars.GCP_WIP_PROVIDER }}
service_account: ${{ vars.GCP_<USECASE>_SA_EMAIL }}
- name: Claude on Vertex AI
# Pin to commit SHA for supply-chain safety; bump when picking up new releases.
uses: anthropics/claude-code-action@<FULL_40_CHAR_SHA> # v1
env:
CLAUDE_CODE_USE_VERTEX: '1'
CLOUD_ML_REGION: global # Sonnet 4.6 supports the global endpoint
ANTHROPIC_VERTEX_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }}
with:
use_vertex: 'true'
github_token: ${{ secrets.GITHUB_TOKEN }} # avoids needing the Claude Code GitHub App
claude_args: '--model claude-sonnet-4-6 --max-turns 30'
settings: |
{
"permissions": {
"allow": [
"Read",
"Write",
"Bash(git log:*)",
"Bash(grep:*)",
"Bash(jq:*)"
# ... add only what the prompt actually needs
]
}
}
prompt: |
<Your prompt here. Claude sees the GH workspace as cwd.>
```
## Gotchas (Things We Learned the Hard Way)
- **`iamcredentials.googleapis.com` must be enabled.** WIF impersonation calls
this API; without it you get `IAM Service Account Credentials API has not
been used in project ... before or it is disabled` from the Claude action,
AFTER auth appears to succeed. Confusing.
- **Use `@v1`, not `@beta`.** `claude-code-action@beta` is the older API shape
with separate `direct_prompt`, `max_turns`, `allowed_tools` inputs. `@v1`
consolidates everything into `prompt` + `claude_args` + `settings`. Pin to a
commit SHA, not the floating tag.
- **Both `use_vertex: 'true'` AND `CLAUDE_CODE_USE_VERTEX=1` are needed** —
the input goes to the action wrapper, the env var goes to the underlying
`claude-code` CLI.
- **Pass `github_token: ${{ secrets.GITHUB_TOKEN }}`** to skip the Claude
Code GitHub App requirement. Without this you get
`Error: Claude Code is not installed on this repository`.
- **The permission DSL doesn't take path globs on `Write` or paths on `Bash`
command names.** `Write(/tmp/issues/**)` and `Bash(mkdir:/tmp/issues*)` are
silently rejected and Claude hits `permission_denials_count > 0`. The
`Bash(<cmd>:*)` pattern is for the *args after the command*, not paths
embedded in the command name. For now, use unrestricted `Write` and rely
on prompt instructions to constrain output paths.
- **Region `global` (recommended) gives dynamic routing across regions for
Sonnet 4.6.** Pin to a specific region (e.g., `us-east5`, `europe-west1`)
only if you need data-residency control.
- **`fetch-depth: 0` if Claude needs git history.** Default `actions/checkout`
is shallow (depth=1); `git log --all --grep` and `git show <sha>` will
produce empty results without unshallowing.
- **Always `jq`-query authoritative data sources in the prompt.** When Claude
has multiple ways to learn a fact (training data vs reading a workspace
file), tell it explicitly which is canonical and require the tool call.
Otherwise it sometimes infers from training data.
## Cost / Safety
- **Cost gate:** invoke Claude only when the workflow has real work to do
(e.g., new CVEs detected, PR opened by non-bot). Don't invoke on every
schedule tick unconditionally.
- **Turn cap:** `--max-turns 30` is generous for triage-style tasks; tighten
if you can. Observed: 6–10 turns for one-CVE analyses.
- **Cost order of magnitude:** Sonnet 4.6 ≈ $0.10–1 per non-trivial task
(one-CVE analysis with full repo reading). Opus 4.7 is ~5× more expensive
for marginal quality gain on most CI tasks.
- **Provider gate:** the OIDC provider attribute condition limits token
minting to repos owned by `broadinstitute`. Other GitHub orgs cannot use
this pool.
- **SA gate:** each SA's `workloadIdentityUser` binding limits which repos
can impersonate it. Default scope: `attribute.repository/broadinstitute/viral-ngs`.
Tighten with `attribute.ref` or `attribute.workflow` if needed.
- **Tool allowlist:** only allow tools the prompt actually uses. Avoid wildcard
`Bash(*:*)` — name specific commands like `Bash(git log:*)`.
## Key Files
| File | Purpose |
|------|---------|
| `.github/workflows/container-scan.yml` | First use case (CVE triage); reference for the workflow pattern |
| `.agents/skills/container-vulns/SKILL.md` | The CVE triage playbook this infra serves |
## References
- [`anthropics/claude-code-action`](https://github.com/anthropics/claude-code-action)
- [Claude Code on Google Vertex AI](https://code.claude.com/docs/en/google-vertex-ai)
- [`google-github-actions/auth`](https://github.com/google-github-actions/auth) (WIF action)
================================================
FILE: .agents/skills/container-vulns/SKILL.md
================================================
# Container Vulnerability Management
Guidance for scanning, triaging, and mitigating container image vulnerabilities
in the viral-ngs Docker image hierarchy.
## Scanning
Container images are scanned for vulnerabilities using [Trivy](https://aquasecurity.github.io/trivy/):
- **On every PR/push**: `docker.yml` scans each image flavor after build (SARIF -> GitHub Security tab, JSON -> artifact)
- **Weekly schedule**: `container-scan.yml` scans the latest published images. When new
fixable HIGH/CRITICAL CVEs are detected (i.e. CVE IDs not already present in any open
or closed GH issue title), the workflow invokes Claude Sonnet 4.6 on Vertex AI to
triage each one and files a GitHub issue per CVE (labels: `security`, `cve`). The
Vertex/WIF infra used here is documented in `.agents/skills/claude-on-vertex-ci/SKILL.md`.
- Scans filter to **CRITICAL/HIGH** severity, **ignore-unfixed**, and apply a Rego policy (`.trivy-ignore-policy.rego`)
- Per-CVE exceptions go in `.trivyignore` with mandatory justification comments
## Rego Policy (`.trivy-ignore-policy.rego`)
The Rego policy filters CVEs that are architecturally inapplicable to ephemeral batch containers:
- **AV:P** (Physical access required) -- containers are cloud-hosted
- **AV:A** (Adjacent network required) -- no attacker on same network segment
- **AV:L + UI:R** (Local + user interaction) -- no interactive sessions
- **AV:L + PR:H** (Local + high privileges) -- containers run non-root
- **AV:L + S:U** (Local + scope unchanged) -- attacker already has code execution and impact stays within the ephemeral container
Changes to this policy should be reviewed carefully. The comments in the file explain the rationale and risk for each rule.
## Common Vulnerability Sources
**Python transitive deps**: Pin minimum versions in `docker/requirements/*.txt`. Prefer conda packages over pip. Check conda-forge availability before assuming a version exists -- conda-forge often lags PyPI by days/weeks.
**Java fat JARs** (picard, gatk, snpeff, fgbio): Bioinformatics Java tools are distributed as uber JARs with all dependencies bundled inside. Trivy detects vulnerable libraries (log4j, commons-compress, etc.) baked into these JARs. Version bumps can cause ARM64 conda solver conflicts because Java tools pull in openjdk -> harfbuzz -> icu version chains that clash with other packages (r-base, boost-cpp, pyicu). Always check:
1. Whether the tool is actually flagged by Trivy (don't bump versions unnecessarily)
2. Whether the CVE applies (e.g., log4j 1.x is NOT vulnerable to Log4Shell)
3. Whether the desired version resolves on ARM64 before pushing
**Go binaries**: Some conda packages bundle compiled Go binaries (e.g., mafft's `dash_client`, google-cloud-sdk's `gcloud-crc32c`). If the binary is unused, delete it in the Dockerfile. Delete from **both** the installed location and `/opt/conda/pkgs/*/` (conda package cache) -- Trivy scans the full filesystem.
**Vendored copies**: Packages like google-cloud-sdk and setuptools bundle their own copies of Python libraries that may be older than what's in the conda environment. Trivy flags these vendored copies separately. Options: delete the vendored directory (if not needed at runtime), or accept the risk in `.trivyignore` with justification.
## ARM64 Solver Conflicts
The conda solver on ARM64 (linux-aarch64) is more constrained than amd64 because fewer package builds exist. Common conflict patterns:
- **icu version conflicts**: Many packages (openjdk, r-base, boost-cpp, pyicu) pin specific icu version ranges. Bumping one package can make the entire environment unsolvable.
- **libdeflate/htslib conflicts**: lofreq 2.1.5 pins old htslib/libdeflate versions that conflict with newer pillow/libtiff.
- **openjdk version escalation**: snpeff 5.2+ requires openjdk>=11, 5.3+ requires openjdk>=21. Higher openjdk versions pull in harfbuzz->icu chains that conflict with everything.
When a solver conflict occurs: revert the change, check what version the solver was picking before, and pin to that exact version if it already addresses the CVE.
## Mitigation Decision Process
When triaging a CVE:
1. **Check the CVSS vector** -- does the Rego policy already filter it?
2. **Identify the source package** -- use Trivy JSON output (`PkgName`, `PkgPath`, `InstalledVersion`)
3. **Check if a fix version exists on conda-forge/bioconda** -- not just on PyPI
4. **Test on ARM64** -- solver conflicts are the most common failure mode
5. **If the fix version conflicts**: consider whether the CVE is exploitable in your deployment model. Document the risk assessment in `.trivyignore` or `vulnerability-mitigation-status.md`.
6. **If the vulnerable code is unused**: delete the binary/file inline in the Dockerfile (same RUN layer as install to avoid bloating images)
## Key Files
| File | Purpose |
|------|---------|
| `.trivy-ignore-policy.rego` | Rego policy for class-level CVE filtering |
| `.trivyignore` | Per-CVE exceptions with justifications |
| `.github/workflows/docker.yml` | Build-time scanning (SARIF + JSON) |
| `.github/workflows/container-scan.yml` | Weekly scheduled scanning |
| `vulnerability-mitigation-status.md` | Local-only tracking doc (not committed) |
================================================
FILE: .agents/skills/dsub-batch-jobs/SKILL.md
================================================
# Running Batch Jobs on GCP via dsub
Use dsub to run one-off compute jobs on Google Cloud when your analysis requires
more compute/memory than the local environment, or needs specific Docker images
that are impractical to run locally.
## When to Use
- Analysis tools need >8GB RAM (e.g., VADR, BLAST, assembly)
- Need to run many independent jobs in parallel (batch processing)
- Need a specific Docker image with pre-installed tools
- Data lives in GCS and is most efficiently processed in-cloud
## Prerequisites
- **dsub** installed (ask the user where their dsub installation or venv is located)
- **gcloud CLI** authenticated with a GCP project that has Batch API enabled
- **GCS bucket** accessible by the project's default service account
## Generic Invocation
```bash
dsub --provider google-cls-v2 \
--project <gcp-project> \
--regions <region> \
--image <docker-image> \
--machine-type <machine-type> \
--script <script.sh> \
--tasks <tasks.tsv> \
--logging gs://<bucket>/logs/<job-name>/
```
### Key Parameters
| Parameter | Description |
|-----------|-------------|
| `--provider google-cls-v2` | Use Google Cloud Batch (recommended over Life Sciences) |
| `--project` | GCP project with Batch API enabled |
| `--regions` | Compute region (e.g., `us-central1`) |
| `--image` | Docker image to run (e.g., `staphb/vadr:1.6.4`) |
| `--machine-type` | VM type (e.g., `n1-highmem-4` for 26GB RAM) |
| `--script` | Local shell script to execute inside the container |
| `--tasks` | TSV file defining one row per job (batch mode) |
| `--logging` | GCS path for stdout/stderr logs |
## Task TSV Format
The tasks TSV defines inputs, outputs, and environment variables for each job.
Header row uses column prefixes to declare types:
```
--env VAR1 --env VAR2 --input FASTA --output RESULT --output LOG
value1 value2 gs://bucket/input.fasta gs://bucket/output.txt gs://bucket/log.txt
```
- `--env NAME` -- environment variable passed to the script
- `--input NAME` -- GCS file downloaded to a local path; the env var is set to the local path
- `--output NAME` -- local path; after the script finishes, the file is uploaded to GCS
Each non-header row is one job. All jobs run independently in parallel.
## GCP Project and Bucket Scoping
The service account running dsub jobs must have read/write access to all GCS paths
referenced in the tasks TSV. The simplest approach:
1. Use a GCP project whose default service account already has access to your data
2. Use a bucket within that same project for staging intermediate/output files
3. For ephemeral results, use a temp bucket with a lifecycle policy (e.g., 30-day auto-delete)
### Broad Viral Genomics Defaults
Most developers on the viral-ngs codebase use:
- **GCP project**: `gcid-viral-seq`
- **Staging bucket**: `gs://viral-temp-30d` (30-day auto-delete lifecycle)
These are not universal -- always confirm with the user before using them.
## Monitoring Jobs
```bash
# Check job status
dsub --provider google-cls-v2 --project <project> --jobs <job-id> --status
# Or use dstat
dstat --provider google-cls-v2 --project <project> --jobs <job-id> --status '*'
# View logs
gcloud storage cat gs://<bucket>/logs/<job-name>/<task-id>.log
```
## Tips
- **Batch over single jobs**: Always prefer `--tasks` with a TSV over individual
`dsub` invocations. One TSV row per job is cleaner and easier to track.
- **Machine sizing**: Check your tool's memory requirements. VADR needs ~16GB;
use `n1-highmem-4` (26GB). Most tools work fine with `n1-standard-4` (15GB).
- **Script portability**: Write the `--script` to be self-contained. It receives
inputs/outputs as environment variables. Don't assume any local state.
- **Logging**: Always set `--logging` to a GCS path so you can debug failures.
- **Idempotency**: If re-running, dsub will create new jobs. Check for existing
outputs before re-submitting to avoid redundant computation.
## Example: VADR Batch Analysis
From the GATK-to-FreeBayes regression testing (PR #1053), we ran VADR on 30 FASTAs
(15 assemblies x old/new) using dsub:
```bash
source ~/venvs/dsub/bin/activate # or wherever dsub is installed
dsub --provider google-cls-v2 \
--project gcid-viral-seq \
--regions us-central1 \
--image staphb/vadr:1.6.4 \
--machine-type n1-highmem-4 \
--script run_vadr.sh \
--tasks vadr_tasks.tsv \
--logging gs://viral-temp-30d/vadr_regression/logs/
```
The tasks TSV had columns for VADR options (`--env VADR_OPTS`), model URL
(`--env MODEL_URL`), input FASTA (`--input FASTA`), and outputs
(`--output NUM_ALERTS`, `--output ALERTS_TSV`, `--output VADR_TGZ`).
All 30 jobs completed in ~15 minutes total (running in parallel on GCP).
================================================
FILE: .agents/skills/regression-testing/SKILL.md
================================================
# Assembly Regression Testing
End-to-end regression testing for assembly pipeline changes against Terra submissions.
## When to Use
Use this playbook when a PR makes functional changes to the assembly or variant-calling
pipeline (e.g., swapping variant callers, changing alignment parameters, modifying
consensus logic). It compares assembly outputs from old vs new code across hundreds
of real samples to validate equivalence or improvement.
## Prerequisites
- **gcloud CLI** -- authenticated with access to Terra workspace GCS buckets
- **mafft** -- for pairwise sequence alignment
- **Python** with pandas and matplotlib (e.g., a dataviz venv)
- **dsub** -- for running VADR batch jobs on GCP (see the `dsub-batch-jobs` skill)
## Workflow
### Step 1: Set Up Terra Submissions (Manual)
The user must manually launch Terra submissions with old and new code:
1. Run the pipeline on a representative dataset using the **main branch** Docker image
2. Run the same pipeline on the same dataset using the **feature branch** Docker image
3. Note the submission IDs and workspace bucket for both runs
### Step 2: Discover Paired Samples
Use `discover_pairs.py` to find all comparable old/new sample pairs by crawling
GCS Cromwell output directories.
```bash
python discover_pairs.py \
--bucket <workspace-bucket-id> \
--old-sub <old-submission-id> \
--new-sub <new-submission-id> \
--output pairs.json
```
This produces a JSON mapping sample_name -> {old_tsv, new_tsv} for all samples
present in both submissions.
### Step 3: Compare Assembly Outputs
Use `compare_sample_pair.py` to compare each sample pair. This script:
- Downloads assembly_stats TSVs from GCS
- Compares metrics (coverage, % reference covered, length, etc.)
- Downloads FASTA assemblies and aligns them with mafft
- Reports SNPs, indels (events and bp), ambiguity diffs, and terminal extensions
```bash
python compare_sample_pair.py \
--old-tsv <gcs_uri> --new-tsv <gcs_uri> \
--work-dir ./results/<sample> \
--output-json ./results/<sample>.json
```
For batch processing, iterate over all entries in `pairs.json` and invoke
`compare_sample_pair.py` for each sample pair (e.g., via a small wrapper
script using `concurrent.futures` or `xargs`/GNU `parallel`).
### Step 4: Generate Report
Use `generate_report.py` to aggregate all per-sample JSONs into plots and a markdown report.
```bash
python generate_report.py \
--results-dir ./results/ \
--report-dir ./report/ \
--workspace-name <name>
```
Outputs:
- Summary TSV with per-assembly metrics
- 8 plots (scatter plots, histograms, identity distribution)
- Markdown report with summary tables and divergent assembly details
### Step 5: (Optional) VADR Annotation Quality
For assemblies with internal indel differences, run VADR to assess whether indels
cause frameshifts or other annotation problems. See the `dsub-batch-jobs` skill for
details on running batch jobs via dsub.
Use `run_vadr.sh` with dsub to run VADR on each FASTA:
```bash
dsub --provider google-cls-v2 \
--project <gcp-project> --regions us-central1 \
--image staphb/vadr:1.6.4 \
--machine-type n1-highmem-4 \
--script run_vadr.sh \
--tasks vadr_tasks.tsv \
--logging gs://<bucket>/vadr_logs/
```
VADR model parameters come from the viral-references repo:
https://github.com/broadinstitute/viral-references/blob/main/annotation/vadr/vadr-by-taxid.tsv
Use the taxid from the assembly_id (format: `sample_id-taxid`) to look up the
correct `vadr_opts`, `min_seq_len`, `max_seq_len`, `vadr_model_tar_url`, and
`vadr_model_tar_subdir`.
### Step 6: Post Results
Post the report as a PR comment. Before posting:
- **Self-review the proposed comment for confidential information** (sample names,
internal paths, credentials, etc.). Ask the user if in doubt about what is safe
to share publicly.
- Include plots as image attachments if the PR is on GitHub
- Attribute the analysis appropriately
## Key Patterns
### Per-Segment Alignment
Multi-segment genomes (e.g., influenza with 8 segments) must be aligned
**per-segment**, not as a single concatenated sequence. Otherwise, terminal
effects at segment boundaries get misclassified as internal indels.
The `compare_sample_pair.py` script handles this automatically: it pairs
segments by FASTA header, aligns each pair independently, analyzes each
alignment separately (so terminal effects stay terminal), and aggregates
the statistics.
### Event Counting vs BP Counting
For indels, both counts matter:
- **BP count**: Total gap positions (e.g., "49 bp of indels")
- **Event count**: Contiguous gap runs (e.g., "13 indel events")
A single 26bp insertion is 1 event but 26 bp. Event counts better reflect
the number of variant-calling decisions that differ between old and new code.
### VADR Frameshift Cascade Detection
A single spurious 1bp indel in a coding region causes a cascade of VADR alerts:
1. `FRAMESHIFT` -- the indel shifts the reading frame
2. `STOP_CODON` -- premature stop codon in the shifted frame
3. `UNEXPECTED_LENGTH` -- protein length doesn't match model
4. `PEPTIDE_TRANSLATION_PROBLEM` -- for each downstream mature peptide
When comparing VADR alert counts, a large delta (e.g., 32 -> 1) usually means
one version has frameshift-causing indels that the other avoids. Check the
`.alt.list` files to confirm which genes are affected.
## Interpreting Results
### What to Look For
1. **Identity distribution**: Most assemblies should be 100% identical. Any
below 99.9% warrant investigation.
2. **SNP count = 0 for all assemblies**: Pipeline changes that only affect
indel calling (e.g., swapping variant callers) should produce zero SNPs.
3. **Indel events**: The number and nature of indel differences. Are they in
coding regions? Do they cause frameshifts?
4. **Coverage correlation**: Low-coverage samples (<10x) are most likely to
show differences between variant callers.
5. **VADR alert deltas**: Fewer alerts = more biologically plausible assembly.
Large improvements (e.g., -31 alerts) strongly favor the new code.
### Red Flags
- Assemblies present in old but missing in new (or vice versa)
- SNPs introduced where none existed before
- VADR alerts increasing significantly for the new code
- Differences concentrated in specific organisms/taxids
================================================
FILE: .agents/skills/regression-testing/compare_sample_pair.py
================================================
#!/usr/bin/env python3
"""Compare assembly outputs between old and new code for a single sample pair.
Takes two GCS URIs pointing at assembly_metadata TSV files (old and new),
downloads them, compares metrics, aligns FASTAs with mafft, and outputs a JSON result.
"""
import argparse
import csv
import io
import json
import logging
import os
import subprocess
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger(__name__)
METRICS_COLS = [
'assembly_length', 'assembly_length_unambiguous', 'reads_aligned',
'mean_coverage', 'percent_reference_covered', 'reference_length',
'scaffolding_num_segments_recovered', 'reference_num_segments_required',
]
FLOAT_COLS = {'mean_coverage', 'percent_reference_covered'}
INT_COLS = {'assembly_length', 'assembly_length_unambiguous', 'reads_aligned',
'reference_length', 'scaffolding_num_segments_recovered',
'reference_num_segments_required'}
def gcloud_cat(gcs_uri):
"""Read a GCS file's contents as a string."""
result = subprocess.run(
['gcloud', 'storage', 'cat', gcs_uri],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0:
raise RuntimeError(f"gcloud storage cat failed for {gcs_uri}: {result.stderr.strip()}")
return result.stdout
def gcloud_cp(gcs_uri, local_path):
"""Download a GCS file to local path."""
result = subprocess.run(
['gcloud', 'storage', 'cp', gcs_uri, local_path],
capture_output=True, text=True, timeout=120
)
if result.returncode != 0:
raise RuntimeError(f"gcloud storage cp failed for {gcs_uri}: {result.stderr.strip()}")
def parse_tsv(content):
"""Parse assembly_metadata TSV content into a dict keyed by assembly_id.
Returns dict: assembly_id -> {col: value, ...}
"""
reader = csv.DictReader(io.StringIO(content), delimiter='\t')
rows = {}
for row in reader:
# The first column is 'entity:assembly_id'
assembly_id = row.get('entity:assembly_id', '').strip()
if not assembly_id:
continue
# Parse numeric columns
parsed = dict(row)
for col in FLOAT_COLS:
if col in parsed and parsed[col]:
try:
parsed[col] = float(parsed[col])
except (ValueError, TypeError):
parsed[col] = None
for col in INT_COLS:
if col in parsed and parsed[col]:
try:
parsed[col] = int(parsed[col])
except (ValueError, TypeError):
parsed[col] = None
rows[assembly_id] = parsed
return rows
def parse_fasta(content):
"""Parse a FASTA string into list of (header, sequence) tuples."""
sequences = []
current_header = None
current_seq = []
for line in content.strip().split('\n'):
line = line.strip()
if not line:
continue
if line.startswith('>'):
if current_header is not None:
sequences.append((current_header, ''.join(current_seq)))
current_header = line[1:].strip()
current_seq = []
else:
current_seq.append(line)
if current_header is not None:
sequences.append((current_header, ''.join(current_seq)))
return sequences
def run_mafft_pair(old_seq, new_seq, work_dir, pair_id='0'):
"""Run mafft on a single pair of sequences. Returns aligned (old_seq, new_seq) strings."""
combined = os.path.join(work_dir, f'combined_{pair_id}.fasta')
with open(combined, 'w') as f:
f.write(f'>old_{pair_id}\n{old_seq}\n>new_{pair_id}\n{new_seq}\n')
try:
result = subprocess.run(
['mafft', '--auto', '--preservecase', '--quiet', '--thread', '1', combined],
capture_output=True, text=True, timeout=300
)
finally:
os.unlink(combined)
if result.returncode != 0:
raise RuntimeError(f"mafft failed: {result.stderr.strip()}")
seqs = parse_fasta(result.stdout)
if len(seqs) != 2:
raise RuntimeError(f"Expected 2 sequences from mafft, got {len(seqs)}")
return seqs[0][1], seqs[1][1]
def align_and_analyze_fastas(old_fasta_path, new_fasta_path, work_dir):
"""Align old vs new FASTAs, handling multi-segment genomes.
For single-segment: runs one mafft alignment and analyzes it.
For multi-segment: pairs segments by header, aligns each pair separately,
analyzes each independently (so terminal effects stay terminal), and
aggregates the stats.
Returns (aligned_fasta_path, stats_dict).
"""
with open(old_fasta_path) as f:
old_fasta_text = f.read()
with open(new_fasta_path) as f:
new_fasta_text = f.read()
old_seqs = parse_fasta(old_fasta_text)
new_seqs = parse_fasta(new_fasta_text)
if len(old_seqs) == 1 and len(new_seqs) == 1:
# Simple case: single segment
aln_old, aln_new = run_mafft_pair(old_seqs[0][1], new_seqs[0][1], work_dir)
aligned = os.path.join(work_dir, 'aligned.fasta')
with open(aligned, 'w') as f:
f.write(f'>old_{old_seqs[0][0]}\n{aln_old}\n>new_{new_seqs[0][0]}\n{aln_new}\n')
stats = analyze_alignment_seqs(aln_old, aln_new)
return aligned, stats
# Multi-segment: pair by header name
old_dict = {h: s for h, s in old_seqs}
new_dict = {h: s for h, s in new_seqs}
common_headers = [h for h in old_dict if h in new_dict]
if not common_headers:
if len(old_seqs) == len(new_seqs):
log.info(f" Multi-segment: headers don't match, pairing by position ({len(old_seqs)} segments)")
common_headers = None
else:
raise RuntimeError(
f"Cannot pair segments: {len(old_seqs)} old vs {len(new_seqs)} new, no matching headers")
# Align each pair and analyze independently
segment_pairs = []
if common_headers is not None:
for i, h in enumerate(common_headers):
aln_old, aln_new = run_mafft_pair(old_dict[h], new_dict[h], work_dir, pair_id=str(i))
segment_pairs.append((h, aln_old, aln_new))
else:
for i in range(len(old_seqs)):
aln_old, aln_new = run_mafft_pair(old_seqs[i][1], new_seqs[i][1], work_dir, pair_id=str(i))
segment_pairs.append((old_seqs[i][0], aln_old, aln_new))
n_segments = len(segment_pairs)
log.info(f" Multi-segment alignment: {n_segments} segments aligned")
# Analyze each segment independently, then aggregate
agg = {
'alignment_length': 0, 'internal_length': 0,
'matches': 0, 'snps': 0,
'internal_insertions': 0, 'internal_deletions': 0,
'internal_insertion_events': 0, 'internal_deletion_events': 0,
'ambiguity_diffs': 0,
'terminal_old_left': 0, 'terminal_new_left': 0,
'terminal_old_right': 0, 'terminal_new_right': 0,
'terminal_extensions_old': 0, 'terminal_extensions_new': 0,
'terminal_extension_events_old': 0, 'terminal_extension_events_new': 0,
}
per_segment = []
for header, aln_old, aln_new in segment_pairs:
seg_stats = analyze_alignment_seqs(aln_old, aln_new)
per_segment.append({'segment': header, **seg_stats})
for key in agg:
agg[key] += seg_stats.get(key, 0)
# Compute aggregate identity
total_bases_compared = agg['matches'] + agg['snps'] + agg['ambiguity_diffs']
agg['identity'] = agg['matches'] / total_bases_compared if total_bases_compared > 0 else 1.0
agg['n_segments'] = n_segments
agg['per_segment'] = per_segment
# Write per-segment alignment file (for review)
aligned = os.path.join(work_dir, 'aligned.fasta')
with open(aligned, 'w') as f:
for header, aln_old, aln_new in segment_pairs:
f.write(f'>old_{header}\n{aln_old}\n>new_{header}\n{aln_new}\n')
return aligned, agg
def analyze_alignment_seqs(old_seq_str, new_seq_str):
"""Analyze a pairwise alignment given two aligned sequence strings.
Returns dict with alignment statistics.
"""
old_seq = old_seq_str.upper()
new_seq = new_seq_str.upper()
aln_len = len(old_seq)
if len(new_seq) != aln_len:
raise RuntimeError(f"Alignment sequences differ in length: {aln_len} vs {len(new_seq)}")
ACGT = set('ACGT')
# Find the internal region (where both sequences have bases)
left_bound = 0
while left_bound < aln_len and (old_seq[left_bound] == '-' or new_seq[left_bound] == '-'):
left_bound += 1
right_bound = aln_len - 1
while right_bound >= 0 and (old_seq[right_bound] == '-' or new_seq[right_bound] == '-'):
right_bound -= 1
# Terminal extension counts (bp and events)
terminal_old_left = 0
terminal_new_left = 0
terminal_old_right = 0
terminal_new_right = 0
# Track events: a contiguous run of gaps on one side = 1 event
terminal_old_left_events = 0
terminal_new_left_events = 0
terminal_old_right_events = 0
terminal_new_right_events = 0
prev_old_gap = False
prev_new_gap = False
for i in range(left_bound):
if old_seq[i] != '-' and new_seq[i] == '-':
terminal_old_left += 1
if not prev_old_gap:
terminal_old_left_events += 1
prev_old_gap = True
prev_new_gap = False
elif new_seq[i] != '-' and old_seq[i] == '-':
terminal_new_left += 1
if not prev_new_gap:
terminal_new_left_events += 1
prev_new_gap = True
prev_old_gap = False
else:
prev_old_gap = False
prev_new_gap = False
prev_old_gap = False
prev_new_gap = False
for i in range(right_bound + 1, aln_len):
if old_seq[i] != '-' and new_seq[i] == '-':
terminal_old_right += 1
if not prev_old_gap:
terminal_old_right_events += 1
prev_old_gap = True
prev_new_gap = False
elif new_seq[i] != '-' and old_seq[i] == '-':
terminal_new_right += 1
if not prev_new_gap:
terminal_new_right_events += 1
prev_new_gap = True
prev_old_gap = False
else:
prev_old_gap = False
prev_new_gap = False
# Analyze internal region (bp counts and event counts)
matches = 0
snps = 0
internal_insertions = 0 # bp count
internal_deletions = 0 # bp count
internal_insertion_events = 0
internal_deletion_events = 0
ambiguity_diffs = 0
in_insertion = False
in_deletion = False
for i in range(left_bound, right_bound + 1):
o = old_seq[i]
n = new_seq[i]
if o == '-' and n != '-':
internal_insertions += 1
if not in_insertion:
internal_insertion_events += 1
in_insertion = True
in_deletion = False
elif o != '-' and n == '-':
internal_deletions += 1
if not in_deletion:
internal_deletion_events += 1
in_deletion = True
in_insertion = False
else:
in_insertion = False
in_deletion = False
if o == n:
if o != '-':
matches += 1
elif o in ACGT and n in ACGT:
snps += 1
else:
ambiguity_diffs += 1
total_internal = right_bound - left_bound + 1 if right_bound >= left_bound else 0
total_bases_compared = matches + snps + ambiguity_diffs
identity = matches / total_bases_compared if total_bases_compared > 0 else 1.0
return {
'alignment_length': aln_len,
'internal_length': total_internal,
'matches': matches,
'snps': snps,
'internal_insertions': internal_insertions,
'internal_deletions': internal_deletions,
'internal_insertion_events': internal_insertion_events,
'internal_deletion_events': internal_deletion_events,
'ambiguity_diffs': ambiguity_diffs,
'terminal_old_left': terminal_old_left,
'terminal_new_left': terminal_new_left,
'terminal_old_right': terminal_old_right,
'terminal_new_right': terminal_new_right,
'terminal_extensions_old': terminal_old_left + terminal_old_right,
'terminal_extensions_new': terminal_new_left + terminal_new_right,
'terminal_extension_events_old': terminal_old_left_events + terminal_old_right_events,
'terminal_extension_events_new': terminal_new_left_events + terminal_new_right_events,
'identity': identity,
}
def analyze_alignment(aligned_fasta_path):
"""Analyze pairwise alignment from mafft output file. Thin wrapper around analyze_alignment_seqs."""
with open(aligned_fasta_path) as f:
seqs = parse_fasta(f.read())
if len(seqs) != 2:
raise RuntimeError(f"Expected 2 sequences in alignment, got {len(seqs)}")
return analyze_alignment_seqs(seqs[0][1], seqs[1][1])
def compare_assembly(assembly_id, old_row, new_row, work_dir):
"""Compare one assembly (old vs new). Downloads FASTAs, runs mafft, returns result dict."""
result = {
'assembly_id': assembly_id,
'taxid': old_row.get('taxid', ''),
'tax_name': old_row.get('tax_name', ''),
'old_metrics': {},
'new_metrics': {},
'deltas': {},
'alignment': None,
'error': None,
}
# Extract metrics
for col in METRICS_COLS:
old_val = old_row.get(col)
new_val = new_row.get(col)
result['old_metrics'][col] = old_val
result['new_metrics'][col] = new_val
if old_val is not None and new_val is not None:
try:
result['deltas'][col] = float(new_val) - float(old_val)
except (ValueError, TypeError):
result['deltas'][col] = None
# Get FASTA paths
old_fasta_uri = old_row.get('assembly_fasta', '').strip()
new_fasta_uri = new_row.get('assembly_fasta', '').strip()
if not old_fasta_uri or not new_fasta_uri:
log.info(f" {assembly_id}: skipping alignment (missing FASTA URI)")
return result
# Download FASTAs
aln_dir = os.path.join(work_dir, f'aln_{assembly_id}')
os.makedirs(aln_dir, exist_ok=True)
old_fasta_local = os.path.join(aln_dir, 'old.fasta')
new_fasta_local = os.path.join(aln_dir, 'new.fasta')
try:
log.debug(f" Downloading old FASTA: {old_fasta_uri}")
gcloud_cp(old_fasta_uri, old_fasta_local)
log.debug(f" Downloading new FASTA: {new_fasta_uri}")
gcloud_cp(new_fasta_uri, new_fasta_local)
# Check if FASTAs are non-empty
if os.path.getsize(old_fasta_local) == 0 or os.path.getsize(new_fasta_local) == 0:
log.info(f" {assembly_id}: skipping alignment (empty FASTA)")
return result
# Align and analyze (handles multi-segment genomes independently)
log.debug(f" Running alignment for {assembly_id}")
aligned_path, alignment_stats = align_and_analyze_fastas(old_fasta_local, new_fasta_local, aln_dir)
result['alignment'] = alignment_stats
identity = alignment_stats['identity']
snps = alignment_stats['snps']
indels = alignment_stats['internal_insertions'] + alignment_stats['internal_deletions']
log.info(f" {assembly_id}: identity={identity:.6f}, snps={snps}, indels={indels}, "
f"terminal_ext_old={alignment_stats['terminal_extensions_old']}, "
f"terminal_ext_new={alignment_stats['terminal_extensions_new']}")
except Exception as e:
log.error(f" {assembly_id}: alignment failed: {e}")
result['error'] = str(e)
finally:
# Cleanup downloaded files
for f in [old_fasta_local, new_fasta_local]:
if os.path.exists(f):
os.unlink(f)
# Keep aligned file only if identity < 99.9%
aligned_file = os.path.join(aln_dir, 'aligned.fasta')
if os.path.exists(aligned_file):
if result['alignment'] and result['alignment']['identity'] >= 0.999:
os.unlink(aligned_file)
else:
log.info(f" Keeping alignment file for review: {aligned_file}")
# Remove empty dir
try:
os.rmdir(aln_dir)
except OSError:
pass # dir not empty (kept alignment file)
return result
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--old-tsv', required=True, help='GCS URI of old assembly_metadata TSV')
parser.add_argument('--new-tsv', required=True, help='GCS URI of new assembly_metadata TSV')
parser.add_argument('--work-dir', required=True, help='Working directory for temp files')
parser.add_argument('--output-json', required=True, help='Output JSON file path')
parser.add_argument('--verbose', '-v', action='store_true', help='Enable debug logging')
args = parser.parse_args()
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
os.makedirs(args.work_dir, exist_ok=True)
# Download and parse TSVs
log.info(f"Downloading old TSV: {args.old_tsv}")
old_content = gcloud_cat(args.old_tsv)
old_rows = parse_tsv(old_content)
log.info(f"Downloading new TSV: {args.new_tsv}")
new_content = gcloud_cat(args.new_tsv)
new_rows = parse_tsv(new_content)
# Extract sample_id from first row (or from TSV filename)
sample_id = 'unknown'
if old_rows:
first_row = next(iter(old_rows.values()))
sample_id = first_row.get('sample_id', 'unknown')
elif new_rows:
first_row = next(iter(new_rows.values()))
sample_id = first_row.get('sample_id', 'unknown')
log.info(f"Sample: {sample_id}")
log.info(f"Old assemblies: {len(old_rows)}, New assemblies: {len(new_rows)}")
# Find intersecting assembly_ids
common_ids = sorted(set(old_rows.keys()) & set(new_rows.keys()))
old_only_ids = sorted(set(old_rows.keys()) - set(new_rows.keys()))
new_only_ids = sorted(set(new_rows.keys()) - set(old_rows.keys()))
if old_only_ids:
log.info(f" Assemblies only in old: {old_only_ids}")
if new_only_ids:
log.info(f" Assemblies only in new: {new_only_ids}")
log.info(f" Assemblies in common: {len(common_ids)}")
# Compare each intersecting assembly
comparisons = []
for aid in common_ids:
comp = compare_assembly(aid, old_rows[aid], new_rows[aid], args.work_dir)
comparisons.append(comp)
# Build output
output = {
'sample_id': sample_id,
'old_tsv_uri': args.old_tsv,
'new_tsv_uri': args.new_tsv,
'old_assembly_count': len(old_rows),
'new_assembly_count': len(new_rows),
'assembly_count_match': len(old_rows) == len(new_rows),
'assemblies_only_in_old': old_only_ids,
'assemblies_only_in_new': new_only_ids,
'comparisons': comparisons,
}
with open(args.output_json, 'w') as f:
json.dump(output, f, indent=2)
log.info(f"Wrote results to {args.output_json}")
if __name__ == '__main__':
main()
================================================
FILE: .agents/skills/regression-testing/discover_pairs.py
================================================
#!/usr/bin/env python3
"""Discover comparable old/new sample pairs by crawling GCS Cromwell output directories.
For each submission, finds assembly_metadata TSV files named
``assembly_metadata-<sample>.tsv``, extracts sample names from filenames,
and outputs the intersection as a JSON mapping.
Usage:
python discover_pairs.py \
--bucket fc-XXXXXXXX-... \
--old-sub <old-submission-id> \
--new-sub <new-submission-id> \
-o pairs.json
"""
import argparse
import json
import logging
import re
import subprocess
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger(__name__)
def gcloud_ls(path):
"""List GCS path, return list of URIs. Returns [] on error."""
try:
result = subprocess.run(
['gcloud', 'storage', 'ls', path],
capture_output=True, text=True, timeout=60
)
if result.returncode != 0:
log.warning(f"gcloud ls non-zero exit for {path}: {result.stderr.strip()}")
return []
return [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
except Exception as e:
log.warning(f"gcloud ls failed for {path}: {e}")
return []
def find_tsv_in_call_dir(call_dir_uri):
"""Find assembly_metadata TSV in a call directory, handling attempt-N subdirs.
Returns (sample_name, tsv_uri) or (None, None).
"""
items = gcloud_ls(call_dir_uri)
# Check for attempt-N subdirectories
def attempt_sort_key(path):
match = re.search(r'/attempt-(\d+)', path)
return int(match.group(1)) if match else 0
attempt_dirs = sorted([i for i in items if '/attempt-' in i],
key=attempt_sort_key, reverse=True)
tsv_files = [i for i in items if i.endswith('.tsv')]
# If there are attempt dirs, check the highest attempt first
if attempt_dirs:
for attempt_dir in attempt_dirs:
attempt_items = gcloud_ls(attempt_dir)
attempt_tsvs = [i for i in attempt_items if i.endswith('.tsv')]
if attempt_tsvs:
tsv_files = attempt_tsvs
break
for tsv in tsv_files:
match = re.search(r'assembly_metadata-(.+)\.tsv$', tsv)
if match:
return match.group(1), tsv
return None, None
def discover_submission_tsvs(bucket, submission_id):
"""Find all assembly_metadata TSVs for a submission.
Returns dict: sample_name -> tsv_gcs_uri
"""
base = f"gs://{bucket}/submissions/{submission_id}/assemble_denovo_metagenomic/"
log.info(f"Listing workflow directories in {base}")
wf_dirs = gcloud_ls(base)
log.info(f"Found {len(wf_dirs)} workflow directories")
results = {}
for i, wf_dir in enumerate(wf_dirs):
if i % 20 == 0:
log.info(f" Scanning workflow {i+1}/{len(wf_dirs)}...")
for call_name in ['call-assembly_stats_non_empty', 'call-assembly_stats_empty']:
call_dir = f"{wf_dir}{call_name}/"
sample_name, tsv_uri = find_tsv_in_call_dir(call_dir)
if sample_name:
if sample_name in results:
log.warning(f"Duplicate sample {sample_name} -- keeping first occurrence")
else:
results[sample_name] = tsv_uri
break
log.info(f"Found TSVs for {len(results)} samples in submission {submission_id[:8]}")
return results
def main():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--bucket', required=True,
help='Terra workspace GCS bucket ID (e.g., fc-XXXXXXXX-...)')
parser.add_argument('--old-sub', required=True,
help='Old submission ID (main branch)')
parser.add_argument('--new-sub', required=True,
help='New submission ID (feature branch)')
parser.add_argument('--output', '-o', required=True,
help='Output JSON file path')
args = parser.parse_args()
log.info(f"Old submission: {args.old_sub[:8]}")
log.info(f"New submission: {args.new_sub[:8]}")
old_tsvs = discover_submission_tsvs(args.bucket, args.old_sub)
new_tsvs = discover_submission_tsvs(args.bucket, args.new_sub)
# Find intersection
common_samples = sorted(set(old_tsvs.keys()) & set(new_tsvs.keys()))
old_only = sorted(set(old_tsvs.keys()) - set(new_tsvs.keys()))
new_only = sorted(set(new_tsvs.keys()) - set(old_tsvs.keys()))
log.info(f"Old-only samples: {len(old_only)}")
log.info(f"New-only samples: {len(new_only)}")
log.info(f"Intersecting samples: {len(common_samples)}")
if old_only:
log.info(f" Old-only: {old_only[:5]}{'...' if len(old_only) > 5 else ''}")
if new_only:
log.info(f" New-only: {new_only[:5]}{'...' if len(new_only) > 5 else ''}")
pairs = {}
for sample in common_samples:
pairs[sample] = {
'old_tsv': old_tsvs[sample],
'new_tsv': new_tsvs[sample],
}
output = {
'bucket': args.bucket,
'old_submission': args.old_sub,
'new_submission': args.new_sub,
'old_sample_count': len(old_tsvs),
'new_sample_count': len(new_tsvs),
'paired_count': len(pairs),
'old_only': old_only,
'new_only': new_only,
'pairs': pairs,
}
with open(args.output, 'w') as f:
json.dump(output, f, indent=2)
log.info(f"Wrote {len(pairs)} pairs to {args.output}")
if __name__ == '__main__':
main()
================================================
FILE: .agents/skills/regression-testing/generate_report.py
================================================
#!/usr/bin/env python3
"""Generate regression testing report with plots from per-sample JSON results.
Aggregates all comparison results, produces summary TSV, plots, and markdown report.
"""
import argparse
import glob
import json
import logging
import os
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s')
log = logging.getLogger(__name__)
# Delay imports so script can show usage without these deps
def get_deps():
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
return pd, plt
def load_results(results_dir):
"""Load all per-sample JSON results."""
results = []
for path in sorted(glob.glob(os.path.join(results_dir, '*.json'))):
try:
with open(path) as f:
results.append(json.load(f))
except Exception as e:
log.warning(f"Failed to load {path}: {e}")
return results
def build_comparison_table(results, workspace_name):
"""Build a flat table of per-assembly comparisons."""
rows = []
for r in results:
sample_id = r.get('sample_id', 'unknown')
for comp in r.get('comparisons', []):
row = {
'workspace': workspace_name,
'sample_id': sample_id,
'assembly_id': comp.get('assembly_id', ''),
'taxid': comp.get('taxid', ''),
'tax_name': comp.get('tax_name', ''),
}
# Metrics
for col in ['percent_reference_covered', 'mean_coverage',
'assembly_length_unambiguous', 'assembly_length',
'reads_aligned', 'reference_length']:
old_val = comp.get('old_metrics', {}).get(col)
new_val = comp.get('new_metrics', {}).get(col)
delta = comp.get('deltas', {}).get(col)
row[f'old_{col}'] = old_val
row[f'new_{col}'] = new_val
row[f'delta_{col}'] = delta
# Alignment stats
aln = comp.get('alignment')
if aln:
row['alignment_identity'] = aln.get('identity')
row['snp_count'] = aln.get('snps', 0)
row['internal_insertions'] = aln.get('internal_insertions', 0)
row['internal_deletions'] = aln.get('internal_deletions', 0)
row['indel_count_bp'] = aln.get('internal_insertions', 0) + aln.get('internal_deletions', 0)
row['indel_count_events'] = aln.get('internal_insertion_events', 0) + aln.get('internal_deletion_events', 0)
row['terminal_extensions_old'] = aln.get('terminal_extensions_old', 0)
row['terminal_extensions_new'] = aln.get('terminal_extensions_new', 0)
row['terminal_extension_events_old'] = aln.get('terminal_extension_events_old', 0)
row['terminal_extension_events_new'] = aln.get('terminal_extension_events_new', 0)
row['ambiguity_diffs'] = aln.get('ambiguity_diffs', 0)
else:
row['alignment_identity'] = None
row['snp_count'] = None
row['indel_count_bp'] = None
row['indel_count_events'] = None
row['terminal_extensions_old'] = None
row['terminal_extensions_new'] = None
row['terminal_extension_events_old'] = None
row['terminal_extension_events_new'] = None
row['ambiguity_diffs'] = None
row['error'] = comp.get('error')
rows.append(row)
return rows
def build_sample_summary(results):
"""Build sample-level summary table."""
rows = []
for r in results:
rows.append({
'sample_id': r.get('sample_id', 'unknown'),
'old_assembly_count': r.get('old_assembly_count', 0),
'new_assembly_count': r.get('new_assembly_count', 0),
'assembly_count_match': r.get('assembly_count_match', False),
'assemblies_only_in_old': len(r.get('assemblies_only_in_old', [])),
'assemblies_only_in_new': len(r.get('assemblies_only_in_new', [])),
'num_comparisons': len(r.get('comparisons', [])),
})
return rows
def generate_plots(df, plot_dir):
"""Generate all plots from the comparison dataframe."""
_, plt = get_deps()
os.makedirs(plot_dir, exist_ok=True)
# Filter to rows with actual assemblies
df_asm = df[df['old_percent_reference_covered'].notna() &
df['new_percent_reference_covered'].notna()].copy()
if len(df_asm) == 0:
log.warning("No assembly comparisons with metrics — skipping plots")
return
# 1. Percent reference covered scatter
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(df_asm['old_percent_reference_covered'] * 100,
df_asm['new_percent_reference_covered'] * 100,
alpha=0.5, s=20)
lims = [0, 105]
ax.plot(lims, lims, 'r--', alpha=0.5, label='y=x')
ax.set_xlabel('Old % Reference Covered')
ax.set_ylabel('New % Reference Covered')
ax.set_title('Percent Reference Covered: Old vs New')
ax.set_xlim(lims)
ax.set_ylim(lims)
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'pct_ref_covered_scatter.png'), dpi=150)
plt.close(fig)
# 2. Mean coverage scatter (log scale)
df_cov = df_asm[df_asm['old_mean_coverage'].notna() &
df_asm['new_mean_coverage'].notna() &
(df_asm['old_mean_coverage'] > 0) &
(df_asm['new_mean_coverage'] > 0)].copy()
if len(df_cov) > 0:
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(df_cov['old_mean_coverage'], df_cov['new_mean_coverage'],
alpha=0.5, s=20)
min_v = min(df_cov['old_mean_coverage'].min(), df_cov['new_mean_coverage'].min()) * 0.8
max_v = max(df_cov['old_mean_coverage'].max(), df_cov['new_mean_coverage'].max()) * 1.2
ax.plot([min_v, max_v], [min_v, max_v], 'r--', alpha=0.5, label='y=x')
ax.set_xscale('log')
ax.set_yscale('log')
ax.set_xlabel('Old Mean Coverage')
ax.set_ylabel('New Mean Coverage')
ax.set_title('Mean Coverage: Old vs New')
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'mean_coverage_scatter.png'), dpi=150)
plt.close(fig)
# 3. Assembly length scatter
df_len = df_asm[df_asm['old_assembly_length_unambiguous'].notna() &
df_asm['new_assembly_length_unambiguous'].notna()].copy()
if len(df_len) > 0:
fig, ax = plt.subplots(figsize=(8, 8))
ax.scatter(df_len['old_assembly_length_unambiguous'],
df_len['new_assembly_length_unambiguous'],
alpha=0.5, s=20)
min_v = min(df_len['old_assembly_length_unambiguous'].min(),
df_len['new_assembly_length_unambiguous'].min()) * 0.95
max_v = max(df_len['old_assembly_length_unambiguous'].max(),
df_len['new_assembly_length_unambiguous'].max()) * 1.05
ax.plot([min_v, max_v], [min_v, max_v], 'r--', alpha=0.5, label='y=x')
ax.set_xlabel('Old Unambiguous Length')
ax.set_ylabel('New Unambiguous Length')
ax.set_title('Assembly Length (Unambiguous): Old vs New')
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'assembly_length_scatter.png'), dpi=150)
plt.close(fig)
# 4. Delta pct_ref_covered histogram
deltas = df_asm['delta_percent_reference_covered'].dropna() * 100
if len(deltas) > 0:
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(deltas, bins=50, edgecolor='black', alpha=0.7)
ax.axvline(0, color='r', linestyle='--', alpha=0.5)
ax.set_xlabel('Delta % Reference Covered (New - Old)')
ax.set_ylabel('Count')
ax.set_title(f'Distribution of % Reference Covered Changes (n={len(deltas)})')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'delta_pct_ref_covered_hist.png'), dpi=150)
plt.close(fig)
# 5. Alignment identity histogram
df_aln = df_asm[df_asm['alignment_identity'].notna()].copy()
if len(df_aln) > 0:
fig, ax = plt.subplots(figsize=(8, 5))
identities = df_aln['alignment_identity'] * 100
ax.hist(identities, bins=50, edgecolor='black', alpha=0.7)
ax.set_xlabel('Pairwise Identity (%)')
ax.set_ylabel('Count')
ax.set_title(f'Assembly Identity Distribution (n={len(identities)})')
ax.axvline(100, color='r', linestyle='--', alpha=0.3)
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'alignment_identity_hist.png'), dpi=150)
plt.close(fig)
# 6. SNP and indel count histograms
if len(df_aln) > 0:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))
snps = df_aln['snp_count'].dropna()
if len(snps) > 0:
max_snp = int(snps.max())
bins_snp = range(0, max(max_snp + 2, 3))
axes[0].hist(snps, bins=bins_snp, edgecolor='black', alpha=0.7)
axes[0].set_xlabel('SNP Count')
axes[0].set_ylabel('Number of Assemblies')
axes[0].set_title(f'SNPs per Assembly (n={len(snps)})')
indels = df_aln['indel_count_events'].dropna()
if len(indels) > 0:
max_indel = int(indels.max())
bins_indel = range(0, max(max_indel + 2, 3))
axes[1].hist(indels, bins=bins_indel, edgecolor='black', alpha=0.7)
axes[1].set_xlabel('Indel Count (internal)')
axes[1].set_ylabel('Number of Assemblies')
axes[1].set_title(f'Internal Indels per Assembly (n={len(indels)})')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'snp_indel_counts.png'), dpi=150)
plt.close(fig)
# 7. Terminal extensions histogram
if len(df_aln) > 0:
ext_old = df_aln['terminal_extensions_old'].dropna()
ext_new = df_aln['terminal_extensions_new'].dropna()
has_ext = (ext_old > 0) | (ext_new > 0)
if has_ext.any():
fig, ax = plt.subplots(figsize=(8, 5))
ax.hist(ext_new[has_ext], bins=30, alpha=0.6, label='New extends beyond Old', edgecolor='black')
ax.hist(ext_old[has_ext], bins=30, alpha=0.6, label='Old extends beyond New', edgecolor='black')
ax.set_xlabel('Terminal Extension (bp)')
ax.set_ylabel('Count')
ax.set_title('Terminal Extensions')
ax.legend()
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'terminal_extensions_hist.png'), dpi=150)
plt.close(fig)
# 8. Coverage vs identity
if len(df_aln) > 0:
df_ci = df_aln[df_aln['old_mean_coverage'].notna() & (df_aln['old_mean_coverage'] > 0)].copy()
if len(df_ci) > 0:
fig, ax = plt.subplots(figsize=(8, 5))
ax.scatter(df_ci['old_mean_coverage'], df_ci['alignment_identity'] * 100,
alpha=0.5, s=20)
ax.set_xscale('log')
ax.set_xlabel('Mean Coverage')
ax.set_ylabel('Pairwise Identity (%)')
ax.set_title('Coverage vs Assembly Identity')
fig.tight_layout()
fig.savefig(os.path.join(plot_dir, 'coverage_vs_identity.png'), dpi=150)
plt.close(fig)
log.info(f"Generated plots in {plot_dir}")
def generate_markdown_report(df, sample_df, workspace_name, report_dir, plot_dir):
"""Generate markdown report."""
pd, _ = get_deps()
total_samples = len(sample_df)
if sample_df.empty or 'old_assembly_count' not in sample_df.columns:
samples_with_assemblies = 0
samples_count_match = 0
else:
samples_with_assemblies = len(sample_df[sample_df['old_assembly_count'] > 0])
samples_count_match = len(sample_df[sample_df['assembly_count_match']])
samples_count_mismatch = total_samples - samples_count_match
total_assemblies = len(df)
if df.empty or 'alignment_identity' not in df.columns:
df_aln = pd.DataFrame()
else:
df_aln = df[df['alignment_identity'].notna()]
identical = len(df_aln[df_aln['alignment_identity'] >= 1.0]) if len(df_aln) > 0 else 0
near_identical = len(df_aln[(df_aln['alignment_identity'] >= 0.999) & (df_aln['alignment_identity'] < 1.0)]) if len(df_aln) > 0 else 0
minor_diff = len(df_aln[(df_aln['alignment_identity'] >= 0.99) & (df_aln['alignment_identity'] < 0.999)]) if len(df_aln) > 0 else 0
significant_diff = len(df_aln[df_aln['alignment_identity'] < 0.99]) if len(df_aln) > 0 else 0
if len(df_aln) > 0 and 'snp_count' in df_aln.columns:
with_snps = len(df_aln[df_aln['snp_count'] > 0])
with_indels = len(df_aln[df_aln['indel_count_events'] > 0])
with_ambig = len(df_aln[df_aln['ambiguity_diffs'] > 0])
with_terminal = len(df_aln[(df_aln['terminal_extensions_old'] > 0) | (df_aln['terminal_extensions_new'] > 0)])
total_snps = int(df_aln['snp_count'].sum())
total_indel_bp = int(df_aln['indel_count_bp'].sum())
total_indel_events = int(df_aln['indel_count_events'].sum())
total_ambig = int(df_aln['ambiguity_diffs'].sum())
total_terminal_bp_old = int(df_aln['terminal_extensions_old'].sum())
total_terminal_bp_new = int(df_aln['terminal_extensions_new'].sum())
total_terminal_events_old = int(df_aln['terminal_extension_events_old'].sum())
total_terminal_events_new = int(df_aln['terminal_extension_events_new'].sum())
else:
with_snps = with_indels = with_ambig = with_terminal = 0
total_snps = total_indel_bp = total_indel_events = total_ambig = 0
total_terminal_bp_old = total_terminal_bp_new = 0
total_terminal_events_old = total_terminal_events_new = 0
report_path = os.path.join(report_dir, f'report_{workspace_name}.md')
with open(report_path, 'w') as f:
f.write(f"# Regression Report: {workspace_name}\n\n")
f.write(f"## Summary\n\n")
f.write(f"| Metric | Value |\n")
f.write(f"|--------|-------|\n")
f.write(f"| Total samples compared | {total_samples} |\n")
f.write(f"| Samples with assemblies (old) | {samples_with_assemblies} |\n")
f.write(f"| Samples with matching assembly count | {samples_count_match} |\n")
f.write(f"| Samples with mismatched assembly count | {samples_count_mismatch} |\n")
f.write(f"| Total assembly comparisons | {total_assemblies} |\n")
f.write(f"| Assemblies aligned | {len(df_aln)} |\n\n")
f.write(f"## Assembly Identity\n\n")
f.write(f"| Category | Count | % |\n")
f.write(f"|----------|-------|---|\n")
if len(df_aln) > 0:
f.write(f"| Identical (100%) | {identical} | {100*identical/len(df_aln):.1f}% |\n")
f.write(f"| Near-identical (99.9-100%) | {near_identical} | {100*near_identical/len(df_aln):.1f}% |\n")
f.write(f"| Minor differences (99-99.9%) | {minor_diff} | {100*minor_diff/len(df_aln):.1f}% |\n")
f.write(f"| Significant differences (<99%) | {significant_diff} | {100*significant_diff/len(df_aln):.1f}% |\n")
f.write(f"\n")
f.write(f"## Variant Counts\n\n")
f.write(f"| Metric | Assemblies affected | Events | Bases |\n")
f.write(f"|--------|--------------------:|-------:|------:|\n")
f.write(f"| SNPs (A/C/G/T ↔ A/C/G/T) | {with_snps} | {total_snps} | {total_snps} |\n")
f.write(f"| Internal indels | {with_indels} | {total_indel_events} | {total_indel_bp} |\n")
f.write(f"| Ambiguity diffs (N ↔ A/C/G/T) | {with_ambig} | {total_ambig} | {total_ambig} |\n")
f.write(f"| Terminal extensions (old only) | {with_terminal} | {total_terminal_events_old} | {total_terminal_bp_old} |\n")
f.write(f"| Terminal extensions (new only) | {with_terminal} | {total_terminal_events_new} | {total_terminal_bp_new} |\n\n")
# Metrics summary
if len(df_aln) > 0:
f.write(f"## Metrics Summary\n\n")
f.write(f"| Metric | Median delta | Mean delta | Min | Max |\n")
f.write(f"|--------|-------------|------------|-----|-----|\n")
for col, label in [
('delta_percent_reference_covered', '% Ref Covered'),
('delta_mean_coverage', 'Mean Coverage'),
('delta_assembly_length_unambiguous', 'Unambig Length'),
]:
vals = df[col].dropna()
if len(vals) > 0:
f.write(f"| {label} | {vals.median():.4f} | {vals.mean():.4f} | {vals.min():.4f} | {vals.max():.4f} |\n")
f.write(f"\n")
# Divergent assemblies
if len(df_aln) > 0 and 'alignment_identity' in df_aln.columns:
divergent = df_aln[df_aln['alignment_identity'] < 0.999].sort_values('alignment_identity')
else:
divergent = pd.DataFrame()
if len(divergent) > 0:
f.write(f"## Divergent Assemblies (identity < 99.9%)\n\n")
f.write(f"| Assembly ID | Tax Name | Identity | SNPs | Indel events (bp) | Ambig Diffs | Term Ext Old events (bp) | Term Ext New events (bp) |\n")
f.write(f"|-------------|----------|----------|------|-------------------|-------------|--------------------------|---------------------------|\n")
for _, row in divergent.iterrows():
indel_ev = int(row.get('indel_count_events', 0))
indel_bp = int(row.get('indel_count_bp', 0))
term_old_ev = int(row.get('terminal_extension_events_old', 0))
term_old_bp = int(row.get('terminal_extensions_old', 0))
term_new_ev = int(row.get('terminal_extension_events_new', 0))
term_new_bp = int(row.get('terminal_extensions_new', 0))
f.write(f"| {row['assembly_id']} | {row['tax_name']} | "
f"{row['alignment_identity']*100:.3f}% | {row['snp_count']:.0f} | "
f"{indel_ev} ({indel_bp}) | {row['ambiguity_diffs']:.0f} | "
f"{term_old_ev} ({term_old_bp}) | "
f"{term_new_ev} ({term_new_bp}) |\n")
f.write(f"\n")
# Assembly count mismatches
if 'assembly_count_match' not in sample_df.columns:
mismatches = pd.DataFrame()
else:
mismatches = sample_df[~sample_df['assembly_count_match']]
if len(mismatches) > 0:
f.write(f"## Assembly Count Mismatches\n\n")
f.write(f"| Sample | Old Count | New Count | Only Old | Only New |\n")
f.write(f"|--------|-----------|-----------|----------|----------|\n")
for _, row in mismatches.iterrows():
f.write(f"| {row['sample_id']} | {row['old_assembly_count']} | "
f"{row['new_assembly_count']} | {row['assemblies_only_in_old']} | "
f"{row['assemblies_only_in_new']} |\n")
f.write(f"\n")
# Plot references
f.write(f"## Plots\n\n")
plot_files = sorted(os.listdir(plot_dir)) if os.path.isdir(plot_dir) else []
for pf in plot_files:
if pf.endswith('.png'):
rel_plot_dir = os.path.basename(plot_dir)
f.write(f"\n\n")
log.info(f"Report written to {report_path}")
return report_path
def main():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('--results-dir', required=True, help='Directory with per-sample JSON results')
parser.add_argument('--report-dir', required=True, help='Output directory for report')
parser.add_argument('--workspace-name', required=True, help='Workspace name for report title')
args = parser.parse_args()
pd, plt = get_deps()
results = load_results(args.results_dir)
log.info(f"Loaded {len(results)} sample results")
# Build comparison table
comp_rows = build_comparison_table(results, args.workspace_name)
df = pd.DataFrame(comp_rows)
log.info(f"Total assembly comparisons: {len(df)}")
# Build sample summary
sample_rows = build_sample_summary(results)
sample_df = pd.DataFrame(sample_rows)
# Save summary TSV
os.makedirs(args.report_dir, exist_ok=True)
tsv_path = os.path.join(args.report_dir, f'summary_{args.workspace_name}.tsv')
if len(df) > 0:
df.to_csv(tsv_path, sep='\t', index=False)
log.info(f"Summary TSV written to {tsv_path}")
# Generate plots
plot_dir = os.path.join(args.report_dir, f'plots_{args.workspace_name}')
if len(df) > 0:
generate_plots(df, plot_dir)
# Generate markdown report
generate_markdown_report(df, sample_df, args.workspace_name, args.report_dir, plot_dir)
if __name__ == '__main__':
main()
================================================
FILE: .agents/skills/regression-testing/run_vadr.sh
================================================
#!/bin/bash
# Run VADR on a single FASTA file.
# Inputs (env vars set by dsub):
# FASTA - input FASTA file (localized by dsub)
# VADR_OPTS - vadr options string
# MIN_LEN - minimum sequence length (optional)
# MAX_LEN - maximum sequence length (optional)
# MODEL_URL - URL to vadr model tarball
# MODEL_SUB - subdirectory within model tarball (optional)
# Outputs (env vars set by dsub):
# NUM_ALERTS - file to write num_alerts integer
# ALERTS_TSV - file to write alerts TSV
# VADR_TGZ - file to write full vadr output tarball
set -euo pipefail
# Default optional env vars to empty
MODEL_URL="${MODEL_URL:-}"
MODEL_SUB="${MODEL_SUB:-}"
MIN_LEN="${MIN_LEN:-}"
MAX_LEN="${MAX_LEN:-}"
BASENAME=$(basename "${FASTA}" .fasta)
# Download and unpack VADR models
if [ -n "${MODEL_URL}" ]; then
mkdir -p vadr-untar
curl -fsSL "${MODEL_URL}" | tar -C vadr-untar -xzf -
MODEL_DIR=$(find vadr-untar -mindepth 1 -maxdepth 1 -type d | head -1)
ln -s "${MODEL_DIR}" vadr-models
else
ln -s /opt/vadr/vadr-models vadr-models
fi
if [ -n "${MODEL_SUB}" ]; then
VADR_MODEL_DIR="vadr-models/${MODEL_SUB}"
else
VADR_MODEL_DIR="vadr-models"
fi
# Build trim args
TRIM_ARGS=""
if [ -n "${MIN_LEN}" ]; then
TRIM_ARGS="${TRIM_ARGS} --minlen ${MIN_LEN}"
fi
if [ -n "${MAX_LEN}" ]; then
TRIM_ARGS="${TRIM_ARGS} --maxlen ${MAX_LEN}"
fi
# Remove terminal ambiguous nucleotides
/opt/vadr/vadr/miniscripts/fasta-trim-terminal-ambigs.pl \
"${FASTA}" ${TRIM_ARGS} > "${BASENAME}.trimmed.fasta"
# Run VADR
v-annotate.pl \
${VADR_OPTS} \
--split --cpu $(nproc) \
--mdir "${VADR_MODEL_DIR}" \
"${BASENAME}.trimmed.fasta" \
"${BASENAME}"
# Package outputs
tar -C "${BASENAME}" -czf "${VADR_TGZ}" .
# Extract alerts
cut -f 5 "${BASENAME}/${BASENAME}.vadr.alt.list" | tail -n +2 > alerts.tsv
cp alerts.tsv "${ALERTS_TSV}"
wc -l < alerts.tsv > "${NUM_ALERTS}"
echo "VADR complete. Alerts: $(cat "${NUM_ALERTS}")"
================================================
FILE: .claude/rules/container-vulns.md
================================================
---
paths:
- "docker/**"
- ".trivyignore"
- ".trivy-ignore-policy.rego"
- "vulnerability-mitigation-status.md"
- ".github/workflows/container-scan.yml"
- ".github/workflows/docker.yml"
---
For container vulnerability management guidance, see
.agents/skills/container-vulns/SKILL.md
================================================
FILE: .codecov.yml
================================================
# Codecov configuration
# https://docs.codecov.com/docs/codecov-yaml
coverage:
status:
project:
default:
informational: true # Never fails, just shows coverage info
patch:
default:
informational: true # Never fails, just shows patch coverage
# Flag configuration for multi-component coverage
flags:
core:
paths:
- src/viral_ngs/core/
- src/viral_ngs/util/
- src/viral_ngs/*.py
carryforward: true
assemble:
paths:
- src/viral_ngs/assemble/
- src/viral_ngs/assembly.py
carryforward: true
classify:
paths:
- src/viral_ngs/classify/
- src/viral_ngs/metagenomics.py
- src/viral_ngs/taxon_filter.py
- src/viral_ngs/kmer_utils.py
carryforward: true
phylo:
paths:
- src/viral_ngs/phylo/
- src/viral_ngs/interhost.py
- src/viral_ngs/intrahost.py
- src/viral_ngs/ncbi.py
carryforward: true
comment:
layout: "reach,diff,flags,files"
behavior: default
require_changes: true # Only comment if coverage changes
================================================
FILE: .dockerignore
================================================
*.pyc
__pycache__
================================================
FILE: .gitattributes
================================================
*.rules linguist-language=Python
================================================
FILE: .github/actions/create-manifest/action.yml
================================================
name: 'Create Multi-Arch Manifest'
description: 'Create multi-arch Docker manifest'
inputs:
ghcr-repo:
description: 'GHCR repository base (e.g., ghcr.io/broadinstitute/viral-ngs)'
required: true
target-tag:
description: 'Target manifest tag (just the tag, not full URL)'
required: true
source-amd64:
description: 'Source AMD64 image tag'
required: true
source-arm64:
description: 'Source ARM64 image tag'
required: true
description:
description: 'Image description (reserved for future use)'
required: false
runs:
using: 'composite'
steps:
- name: Create manifest
shell: bash
run: |
docker buildx imagetools create \
--tag "${{ inputs.ghcr-repo }}:${{ inputs.target-tag }}" \
"${{ inputs.ghcr-repo }}:${{ inputs.source-amd64 }}" \
"${{ inputs.ghcr-repo }}:${{ inputs.source-arm64 }}"
- name: Verify Docker Manifest List v2 format
shell: bash
run: |
set -euo pipefail
REPO_PATH="${REPO#ghcr.io/}"
# Fetch registry access token
TOKEN=$(curl -fsS "https://ghcr.io/token?service=ghcr.io&scope=repository:${REPO_PATH}:pull" | jq -er .token) || {
echo "FAIL: Unable to retrieve token from ghcr.io"
exit 1
}
# Fetch manifest and capture HTTP status code + content type
HTTP_STATUS=$(curl -sS -o /dev/null -w '%{http_code}' \
-H "Authorization: Bearer $TOKEN" \
-H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
"https://ghcr.io/v2/${REPO_PATH}/manifests/${{ inputs.target-tag }}") || {
echo "FAIL: Unable to retrieve manifest from ghcr.io for tag '${{ inputs.target-tag }}'"
exit 1
}
if [ "$HTTP_STATUS" != "200" ]; then
echo "FAIL: Expected HTTP 200 when fetching manifest, got: $HTTP_STATUS"
exit 1
fi
CONTENT_TYPE=$(curl -sS -D- -o /dev/null \
-H "Authorization: Bearer $TOKEN" \
-H "Accept: application/vnd.docker.distribution.manifest.list.v2+json" \
"https://ghcr.io/v2/${REPO_PATH}/manifests/${{ inputs.target-tag }}" | grep -i content-type)
if echo "$CONTENT_TYPE" | grep -qi "manifest.list.v2"; then
echo "OK: Docker Manifest List v2 format confirmed"
else
echo "FAIL: Expected Docker Manifest List v2, got: $CONTENT_TYPE"
exit 1
fi
env:
REPO: ${{ inputs.ghcr-repo }}
================================================
FILE: .github/actions/pull-with-retry/action.yml
================================================
name: 'Pull Docker Image with Retry'
description: 'Pull a Docker image with retry logic'
inputs:
image:
description: 'Full image reference to pull'
required: true
max-retries:
description: 'Maximum number of retry attempts'
default: '5'
retry-delay:
description: 'Delay between retries in seconds'
default: '10'
runs:
using: 'composite'
steps:
- name: Pull image with retries
shell: bash
run: |
for i in $(seq 1 ${{ inputs.max-retries }}); do
echo "Attempt $i: Pulling ${{ inputs.image }}..."
if docker pull "${{ inputs.image }}"; then
echo "Successfully pulled image"
exit 0
fi
echo "Pull failed, waiting ${{ inputs.retry-delay }} seconds before retry..."
sleep ${{ inputs.retry-delay }}
done
echo "Failed to pull image after ${{ inputs.max-retries }} attempts"
exit 1
================================================
FILE: .github/actions/setup-docker-build/action.yml
================================================
name: 'Setup Docker Build'
description: 'Checkout, setup buildx, and login to GHCR'
inputs:
github-token:
description: 'GitHub token for GHCR login'
required: true
checkout:
description: 'Whether to checkout repository'
default: 'true'
fetch-depth:
description: 'Git fetch depth'
default: '1'
fetch-tags:
description: 'Whether to fetch tags'
default: 'false'
runs:
using: 'composite'
steps:
- name: Checkout repository
if: ${{ inputs.checkout == 'true' }}
uses: actions/checkout@v4
with:
fetch-depth: ${{ inputs.fetch-depth }}
fetch-tags: ${{ inputs.fetch-tags }}
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ inputs.github-token }}
================================================
FILE: .github/copilot-instructions.md
================================================
# Copilot Instructions
This file provides guidance to GitHub Copilot when working with code in this repository.
**IMPORTANT**: Always read [AGENTS.md](../AGENTS.md) at the start of every session before doing any work. It contains comprehensive project context and development guidelines that are essential for working in this codebase.
## Quick Reference
- **Docker-centric development**: Run tests inside containers, not on host
- **Import pattern**: `from viral_ngs import core` then `core.samtools.SamtoolsTool()`
- **Test location**: `tests/unit/<module>/`
- **Dependencies**: ALL via conda, not pip (see `docker/requirements/*.txt`)
## Running Tests
```bash
docker run --rm \
-v $(pwd):/opt/viral-ngs/source \
quay.io/broadinstitute/viral-ngs:main-core \
pytest -rsxX -n auto /opt/viral-ngs/source/tests/unit
```
## Key Files
| File | Purpose |
|------|---------|
| [AGENTS.md](../AGENTS.md) | Full AI assistant guidance |
| [pyproject.toml](../pyproject.toml) | Package configuration |
| [docker/](../docker/) | Dockerfiles and requirements |
| [src/viral_ngs/](../src/viral_ngs/) | Source code |
| [tests/](../tests/) | Test files |
| [.agents/skills/](../.agents/skills/) | Reusable agent playbooks and scripts |
================================================
FILE: .github/workflows/audit-quay-tags.yml
================================================
name: Audit Quay.io Tags
on:
schedule:
- cron: '0 8 * * 1' # Monday 8:00 UTC
workflow_dispatch:
permissions: {}
jobs:
audit-tags:
runs-on: ubuntu-latest
permissions: {}
steps:
- name: Install crane
uses: imjasonh/setup-crane@v0.4
- name: Log in to Quay.io
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_TOKEN }}
- name: Audit version tags
run: |
set -euo pipefail
REPO="quay.io/broadinstitute/viral-ngs"
FLAVORS="baseimage core assemble classify phylo"
FAILED=0
# Check the 5 most recent version tags
VERSIONS=$(crane ls "$REPO" | grep -E '^[0-9]+\.[0-9]+\.[0-9]+$' | sort -V | tail -5)
if [[ -z "$VERSIONS" ]]; then
echo "::error::No version tags found on ${REPO}"
exit 1
fi
check_tag() {
local TAG="$1"
if ! crane manifest "${REPO}:${TAG}" > /dev/null 2>&1; then
echo "::error::MISSING: ${REPO}:${TAG}"
FAILED=1
else
echo "OK: ${REPO}:${TAG}"
fi
}
for VERSION in $VERSIONS; do
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
# Check mega tag (no suffix) — both X.Y.Z and X.Y
check_tag "${VERSION}"
check_tag "${MAJOR_MINOR}"
# Check each flavor — both X.Y.Z-flavor and X.Y-flavor
for FLAVOR in $FLAVORS; do
check_tag "${VERSION}-${FLAVOR}"
check_tag "${MAJOR_MINOR}-${FLAVOR}"
done
done
if [[ $FAILED -ne 0 ]]; then
echo "::error::Some version tags are missing from Quay.io!"
exit 1
fi
echo "All version tags verified."
================================================
FILE: .github/workflows/cleanup-images.yml
================================================
name: Cleanup Feature Branch Images
on:
delete:
jobs:
cleanup-quay-images:
# Only run for branch deletions (not tags), and skip main
if: github.event.ref_type == 'branch' && github.event.ref != 'main'
runs-on: ubuntu-latest
permissions: {}
steps:
- name: Compute tag prefix
id: tag-prefix
run: |
# Apply the same sanitization as docker.yml's "Compute Docker image tag prefix":
# replace "/" with "-" and strip leading "v"
RAW="${{ github.event.ref }}"
PREFIX=$(echo "$RAW" | sed 's|/|-|g')
PREFIX=${PREFIX#v}
echo "prefix=${PREFIX}" >> $GITHUB_OUTPUT
echo "Tag prefix: ${PREFIX} (from branch: ${RAW})"
- name: Safety check - refuse version-like branch names
run: |
PREFIX="${{ steps.tag-prefix.outputs.prefix }}"
if [[ "$PREFIX" =~ ^[0-9]+\.[0-9]+ ]]; then
echo "::error::Refusing to delete tags for version-like branch: $PREFIX"
exit 1
fi
- name: Install crane
uses: imjasonh/setup-crane@v0.4
- name: Log in to Quay.io
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_TOKEN }}
- name: Delete feature branch tags from Quay
run: |
TAG_PREFIX="${{ steps.tag-prefix.outputs.prefix }}"
QUAY_REPO="quay.io/broadinstitute/viral-ngs"
# Image tag suffixes - must be kept in sync with deploy-to-quay in docker.yml
# See: .github/workflows/docker.yml deploy-to-quay job matrix
SUFFIXES=("-baseimage" "-core" "-assemble" "-classify" "-phylo" "")
for SUFFIX in "${SUFFIXES[@]}"; do
TAG="${TAG_PREFIX}${SUFFIX}"
echo "Deleting ${QUAY_REPO}:${TAG}..."
# Use crane delete instead of skopeo delete. crane removes only the tag
# reference, not the underlying manifest. This prevents cascade deletion
# of other tags sharing the same digest (which caused the 3.0.10-baseimage
# incident: skopeo deleted the manifest by digest, expiring all tags
# pointing to it).
crane delete "${QUAY_REPO}:${TAG}" 2>&1 || echo " Tag ${TAG} not found or already deleted"
done
================================================
FILE: .github/workflows/container-scan.yml
================================================
name: Scheduled Container Vulnerability Scan
on:
schedule:
# Weekly scan of main branch mega image every Monday at 06:00 UTC
- cron: '0 6 * * 1'
workflow_dispatch:
inputs:
test_cve_id:
description: 'Optional: bypass new-CVE detection and force-analyze this specific CVE ID (for testing the Claude pipeline)'
required: false
type: string
default: ''
dry_run:
description: 'Run Claude analysis but do NOT file GitHub issues (artifact still uploaded)'
required: false
type: boolean
default: false
permissions: {}
env:
GHCR_REPO: ghcr.io/broadinstitute/viral-ngs
jobs:
scan:
runs-on: ubuntu-latest
permissions:
contents: read
packages: read
security-events: write
issues: write # for filing CVE issues
id-token: write # for OIDC token to GCP via WIF
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
# Full history so the Claude analysis step can `git log --grep` and `git show`
# precedent CVE-fix commits (e.g., to mirror past mitigation patterns exactly).
fetch-depth: 0
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: '${{ env.GHCR_REPO }}:main-mega-amd64'
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL,HIGH'
limit-severities-for-sarif: true
exit-code: '0'
ignore-unfixed: true
trivyignores: '.trivyignore'
ignore-policy: '.trivy-ignore-policy.rego'
- name: Run Trivy vulnerability scanner (JSON)
uses: aquasecurity/trivy-action@master
with:
image-ref: '${{ env.GHCR_REPO }}:main-mega-amd64'
format: 'json'
output: 'trivy-results.json'
severity: 'CRITICAL,HIGH'
exit-code: '0' # don't fail here — Claude pipeline + final-step gate handles signaling
ignore-unfixed: true
trivyignores: '.trivyignore'
ignore-policy: '.trivy-ignore-policy.rego'
- name: Log scan result count
if: always()
run: |
if [ -f trivy-results.sarif ]; then
COUNT=$(jq '[.runs[].results[]] | length' trivy-results.sarif)
echo "::notice::Trivy found $COUNT findings for mega-scheduled (after policy filtering)"
fi
- name: Upload Trivy scan results to GitHub Security tab
if: always()
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'
category: 'container-mega-scheduled'
- name: Upload Trivy JSON results
if: always()
uses: actions/upload-artifact@v4
with:
name: trivy-mega-scheduled
path: trivy-results.json
# === Claude triage pipeline ===
# If new fixable HIGH/CRITICAL CVEs are found, hand them to Claude (Sonnet 4.6
# on Vertex AI) for analysis, then file GitHub issues. Source of truth for
# "new" is GH issues themselves: a CVE is new if no existing issue (open OR
# closed) has the CVE ID in its title.
- name: Identify new fixable CVEs
id: triage
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# Use github.event.inputs (not inputs) so this is well-defined on schedule too.
TEST_CVE_ID: ${{ github.event.inputs.test_cve_id || '' }}
run: |
set -euo pipefail
# Test mode: bypass scan-diff and use the provided CVE ID directly.
if [ -n "${TEST_CVE_ID:-}" ]; then
echo "::notice::Test mode active — analyzing TEST_CVE_ID=$TEST_CVE_ID"
echo "cve_ids=$TEST_CVE_ID" >> "$GITHUB_OUTPUT"
echo "test_mode=true" >> "$GITHUB_OUTPUT"
exit 0
fi
# Production mode: parse trivy JSON for fixable HIGH/CRITICAL CVEs.
all_cves=$(jq -r '
[.Results[]?.Vulnerabilities[]?
| select((.Severity == "HIGH" or .Severity == "CRITICAL")
and (.FixedVersion // "") != "")
| .VulnerabilityID]
| unique[]
' trivy-results.json)
if [ -z "$all_cves" ]; then
echo "::notice::No fixable HIGH/CRITICAL CVEs in scan."
echo "cve_ids=" >> "$GITHUB_OUTPUT"
echo "test_mode=false" >> "$GITHUB_OUTPUT"
exit 0
fi
# Dedup against existing GH issues (open + closed) by title-substring search.
new_cves=()
for cve in $all_cves; do
count=$(gh search issues \
--repo "$GITHUB_REPOSITORY" \
--state=all \
"\"$cve\" in:title" \
--json url --jq 'length')
if [ "$count" = "0" ]; then
new_cves+=("$cve")
echo " NEW: $cve"
else
echo " existing issue for $cve, skipping"
fi
done
new_list="${new_cves[*]:-}"
echo "::notice::Found ${#new_cves[@]} new fixable CVE(s)"
echo "cve_ids=$new_list" >> "$GITHUB_OUTPUT"
echo "test_mode=false" >> "$GITHUB_OUTPUT"
- name: Authenticate to GCP via Workload Identity Federation
if: steps.triage.outputs.cve_ids != ''
uses: google-github-actions/auth@v2
with:
workload_identity_provider: ${{ vars.GCP_WIP_PROVIDER }}
service_account: ${{ vars.GCP_SA_EMAIL }}
- name: Ensure issue labels exist
if: steps.triage.outputs.cve_ids != '' && (github.event.inputs.dry_run || 'false') != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Idempotent: gh label create exits non-zero if label exists; ignore that.
gh label create security --color B60205 --description "Security-related issue" --repo "$GITHUB_REPOSITORY" 2>/dev/null || true
gh label create cve --color B60205 --description "CVE tracked in container scans" --repo "$GITHUB_REPOSITORY" 2>/dev/null || true
gh label create test --color FBCA04 --description "Test issue (filed by workflow_dispatch test_cve_id)" --repo "$GITHUB_REPOSITORY" 2>/dev/null || true
- name: Claude analysis on Vertex AI
if: steps.triage.outputs.cve_ids != ''
# Pinned to commit SHA (== v1 as of 2026-04-27) for supply-chain safety;
# bump this SHA when picking up new claude-code-action releases.
uses: anthropics/claude-code-action@567fe954a4527e81f132d87d1bdbcc94f7737434 # v1
env:
CLAUDE_CODE_USE_VERTEX: '1'
CLOUD_ML_REGION: global
ANTHROPIC_VERTEX_PROJECT_ID: ${{ vars.GCP_PROJECT_ID }}
with:
use_vertex: 'true'
github_token: ${{ secrets.GITHUB_TOKEN }}
claude_args: '--model claude-sonnet-4-6 --max-turns 30'
settings: |
{
"permissions": {
"allow": [
"Read",
"Write",
"Bash(mkdir:*)",
"Bash(git log:*)",
"Bash(git show:*)",
"Bash(git rev-parse:*)",
"Bash(git grep:*)",
"Bash(grep:*)",
"Bash(find:*)",
"Bash(jq:*)",
"Bash(ls:*)",
"Bash(cat:*)",
"Bash(head:*)",
"Bash(tail:*)"
]
}
}
prompt: |
You are triaging container vulnerabilities for the broadinstitute/viral-ngs repo.
## Your task
For each CVE ID listed below, write a triage report to `/tmp/issues/<CVE-ID>.md`.
The reports will be filed verbatim as GitHub issues by the next workflow step.
**CVE IDs to analyze:** ${{ steps.triage.outputs.cve_ids }}
**Test mode:** ${{ steps.triage.outputs.test_mode }}
(If `true`, the CVE was supplied manually via `test_cve_id` and may not appear in
the current scan's `trivy-results.json`. Use your training knowledge in that case
and add a `> _Test analysis_` blockquote at the top of the report so reviewers
know it was generated for pipeline validation, not from a real scan finding.)
## Required reading (do this BEFORE writing reports)
1. `trivy-results.json` (in the workspace root) — **the authoritative source for
CVSS score, CVSS vector, package path, fix version, and references.** Query
it with `jq` before writing any report. Example:
```
jq '.Results[]?.Vulnerabilities[]? | select(.VulnerabilityID == "<CVE-ID>")' trivy-results.json
```
You MUST cite the exact CVSS score and vector from this file in the
"Vulnerability details" section — do NOT infer or estimate them from your
own knowledge if the CVE is present in the JSON. If the CVE is NOT in the
JSON (test mode, or scan-target divergence), explicitly say so in the report
and use your training knowledge as a clearly-labeled fallback.
2. `.agents/skills/container-vulns/SKILL.md` — read fully. This is the repo's
container-vulnerability playbook and tells you what the maintainers consider
actionable vs. accepted risk.
3. `.trivyignore` — existing per-CVE exceptions with their justifications. Mirror
the writing style and depth of justification when you recommend `.trivyignore`
additions.
4. `.trivy-ignore-policy.rego` — Rego policy for class-level CVE filtering.
Understand what it filters and why.
5. `docker/Dockerfile.*` — container build files showing dep installs and inline
mitigations. Look for prior fixups (`find ... -exec rm`, `gem install`, etc.)
applied to similar packages.
6. `docker/requirements/*.txt` — conda dependency lists. Use `grep` to find
which file pulls in the affected package.
7. Recent git history — full history is available (the workflow checks out
with `fetch-depth: 0`). Use:
- `git log --all --oneline --grep <package>` to find prior commits
touching the affected package
- `git log --all --oneline --grep CVE-` to find prior CVE-fix commits
- **`git show <sha>` to inspect the FULL DIFF of any precedent fix you
plan to cite. Read the diff, not just the commit message.** Many fixes
combine multiple elements (e.g., file removal + reinstall) — your
recommendation must mirror ALL elements of the precedent, not just the
headline change.
- ALWAYS verify a commit SHA exists with `git show <sha>` before citing it
in the report.
## Required structure for each report
File path: `/tmp/issues/<CVE-ID>.md` (filename MUST match the CVE ID exactly).
**First line MUST be a single H1 used as the issue title:**
`# [CVE-YYYY-NNNN] <package>: <one-line description>`
Then sections (use H2 `##` headers):
1. **Summary** — 2–3 sentences: what it is, severity, where it came from.
2. **Vulnerability details** — CVSS score + vector + plain-English meaning;
2–4 sentences explaining the bug technically.
3. **Dependency chain** — name the direct conda package or Docker layer that
pulls this in. Trace transitive deps where you can. If you can't determine
this confidently, say so explicitly — do NOT guess.
4. **Why the Rego policy didn't suppress it** — explain in terms of the AV/PR/UI/S
vector classes the policy filters and why this CVE's vector doesn't match.
5. **Recommended fix** — concrete and actionable. Options:
- Version bump (which file, which floor)
- Inline Dockerfile mitigation (which Dockerfile, what RUN-block addition)
- `.trivyignore` entry (with justification matching the existing style)
Cite historical precedent when applicable: `(mirror the fix in commit <sha>)`.
6. **Practical exploitability** — in this deployment model (ephemeral batch
containers, no network-facing services, no untrusted user input at runtime),
is this actually reachable? Be honest and specific.
7. **References** — GHSA URL, NVD URL, vendor advisory.
8. **Attribution footer** — at the very end of the report, add a horizontal
rule (`---`) on its own line, then this exact paragraph (italicized):
`*This analysis and report were authored entirely by Claude Sonnet 4.6 (running on Google Vertex AI via the `container-scan.yml` triage pipeline). Independently verify the CVSS data, dependency chain, recommended fix, and commit SHAs before acting on this report.*`
## Constraints
- `mkdir -p /tmp/issues` first.
- One file per CVE.
- Be concise. Each report should be readable in 1–2 minutes (target: 300–600 words).
- Do NOT hallucinate package versions, file paths, or commit SHAs. Verify with
tools when in doubt.
- If you finish all reports with budget remaining, do NOT pad — stop.
- name: Upload Claude analysis as artifact
if: steps.triage.outputs.cve_ids != '' && always()
uses: actions/upload-artifact@v4
with:
name: claude-cve-analysis
path: /tmp/issues/
if-no-files-found: warn
- name: File GitHub issues
if: steps.triage.outputs.cve_ids != '' && (github.event.inputs.dry_run || 'false') != 'true'
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
TEST_MODE: ${{ steps.triage.outputs.test_mode }}
run: |
set -uo pipefail
shopt -s nullglob
if [ ! -d /tmp/issues ]; then
echo "::error::/tmp/issues does not exist — Claude analysis step likely failed"
exit 1
fi
md_files=(/tmp/issues/*.md)
if [ ${#md_files[@]} -eq 0 ]; then
echo "::error::No .md analysis files in /tmp/issues — Claude may have failed silently"
exit 1
fi
failed=0
for f in "${md_files[@]}"; do
cve=$(basename "$f" .md)
title=$(head -1 "$f" | sed 's/^# *//')
body=$(tail -n +2 "$f")
# Dedup-integrity guard: title MUST contain the CVE ID, otherwise the next
# scheduled run won't recognize it as already-triaged via title-substring search.
if ! echo "$title" | grep -qF "$cve"; then
echo "::error::Issue title for $cve does not contain the CVE ID — refusing to file (would break dedup)"
echo " Title was: $title"
failed=$((failed+1))
continue
fi
label_args=(--label security --label cve)
if [ "$TEST_MODE" = "true" ]; then
label_args+=(--label test)
fi
echo "Creating issue for $cve: $title"
if ! gh issue create \
--repo "$GITHUB_REPOSITORY" \
--title "$title" \
--body "$body" \
"${label_args[@]}"; then
echo "::error::Failed to create issue for $cve"
failed=$((failed+1))
fi
done
if [ $failed -gt 0 ]; then
echo "::error::$failed issue(s) failed to create"
exit 1
fi
- name: Fail job if new CVEs were found (production mode only)
if: steps.triage.outputs.cve_ids != '' && steps.triage.outputs.test_mode != 'true'
run: |
echo "::error::Scan found new fixable HIGH/CRITICAL CVEs. See filed issues for details."
exit 1
================================================
FILE: .github/workflows/docker.yml
================================================
name: Build and Test
on:
push:
branches:
- main
- '**'
tags:
- '**'
pull_request:
branches:
- main
env:
QUAY_REPO: quay.io/broadinstitute/viral-ngs
GHCR_REPO: ghcr.io/broadinstitute/viral-ngs
jobs:
# Determine which paths changed to enable smart test filtering
paths-filter:
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
core: ${{ steps.filter.outputs.core }}
assemble: ${{ steps.filter.outputs.assemble }}
classify: ${{ steps.filter.outputs.classify }}
phylo: ${{ steps.filter.outputs.phylo }}
docker: ${{ steps.filter.outputs.docker }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Check changed paths
uses: dorny/paths-filter@v3
id: filter
with:
filters: |
core:
- 'src/viral_ngs/*.py'
- 'src/viral_ngs/core/**'
- 'src/viral_ngs/util/**'
- 'tests/unit/core/**'
- 'tests/conftest.py'
- 'docker/requirements/core.txt'
- 'docker/requirements/core-x86.txt'
- 'docker/requirements/baseimage.txt'
- 'docker/Dockerfile.core'
- 'docker/Dockerfile.baseimage'
- 'pyproject.toml'
assemble:
- 'src/viral_ngs/assemble/**'
- 'src/viral_ngs/assembly.py'
- 'tests/unit/assemble/**'
- 'docker/requirements/assemble.txt'
- 'docker/Dockerfile.assemble'
classify:
- 'src/viral_ngs/classify/**'
- 'src/viral_ngs/metagenomics.py'
- 'src/viral_ngs/taxon_filter.py'
- 'src/viral_ngs/kmer_utils.py'
- 'tests/unit/classify/**'
- 'docker/requirements/classify.txt'
- 'docker/Dockerfile.classify'
phylo:
- 'src/viral_ngs/phylo/**'
- 'src/viral_ngs/interhost.py'
- 'src/viral_ngs/intrahost.py'
- 'src/viral_ngs/ncbi.py'
- 'tests/unit/phylo/**'
- 'docker/requirements/phylo.txt'
- 'docker/requirements/phylo-x86.txt'
- 'docker/Dockerfile.phylo'
docker:
- 'docker/**'
- '.github/workflows/docker.yml'
# Calculate version from git describe (once, shared by all jobs)
get-version:
runs-on: ubuntu-latest
permissions:
contents: read
outputs:
version: ${{ steps.version.outputs.version }}
image-tag-prefix: ${{ steps.image-tag.outputs.prefix }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history for accurate git describe
fetch-tags: true
- name: Get version from git describe
id: version
run: |
# Get version like: v1.25.0, 3.0.0, or v1.25.0-42-gabcdef
# Match version tags with or without 'v' prefix, excluding archive/* tags
RAW_VERSION=$(git describe --tags --always --match 'v*' --match '[0-9]*')
# Strip leading 'v' if present
RAW_VERSION=${RAW_VERSION#v}
# Convert to PEP 440 format for setuptools_scm
# git describe: 1.25.0-42-gabcdef -> PEP 440: 1.25.0.dev42+gabcdef
if [[ "$RAW_VERSION" =~ ^([0-9]+\.[0-9]+\.[0-9]+)-([0-9]+)-g([a-f0-9]+)$ ]]; then
VERSION="${BASH_REMATCH[1]}.dev${BASH_REMATCH[2]}+g${BASH_REMATCH[3]}"
else
# Clean version tag (e.g., 1.25.0)
VERSION="$RAW_VERSION"
fi
echo "version=${VERSION}" >> $GITHUB_OUTPUT
echo "Detected version: ${VERSION}"
- name: Compute Docker image tag prefix
id: image-tag
run: |
# For PRs, github.ref_name is "1020/merge" which contains "/" - invalid in Docker tags
# Convert to a valid tag prefix: "pr-1020" for PRs, sanitized branch name for branches
if [[ "${{ github.event_name }}" == "pull_request" ]]; then
# Extract PR number from github.ref (refs/pull/1020/merge -> 1020)
PR_NUMBER="${{ github.event.pull_request.number }}"
PREFIX="pr-${PR_NUMBER}"
else
# For branches/tags, replace "/" with "-" to create valid Docker tags
PREFIX=$(echo "${{ github.ref_name }}" | sed 's|/|-|g')
# Strip leading 'v' for version tags (e.g., v3.0.1 -> 3.0.1)
PREFIX=${PREFIX#v}
fi
echo "prefix=${PREFIX}" >> $GITHUB_OUTPUT
echo "Image tag prefix: ${PREFIX}"
# ============================================================================
# BASEIMAGE BUILD JOBS - Native multi-arch builds (no QEMU)
# AMD64 and ARM64 build in parallel on native runners, then combined
# ============================================================================
# Build baseimage for AMD64
build-baseimage-amd64:
needs: get-version
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (amd64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.baseimage
platforms: linux/amd64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-amd64
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-amd64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-amd64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Build baseimage for ARM64 (native runner)
build-baseimage-arm64:
needs: get-version
runs-on: ubuntu-24.04-arm
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (arm64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.baseimage
platforms: linux/arm64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-arm64
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-arm64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-arm64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Create multi-arch manifest for baseimage
create-manifest-baseimage:
needs: [get-version, build-baseimage-amd64, build-baseimage-arm64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
baseimage-tag: ${{ steps.get-tag.outputs.tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Get baseimage tag
id: get-tag
run: echo "tag=${{ needs.get-version.outputs.image-tag-prefix }}-baseimage" >> $GITHUB_OUTPUT
- name: Create multi-arch manifest
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: ${{ steps.get-tag.outputs.tag }}
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-arm64
description: 'Viral genomics analysis tools - base image'
- name: Create additional tags for main branch
if: github.ref == 'refs/heads/main'
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: baseimage
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-arm64
description: 'Viral genomics analysis tools - base image'
- name: Create version tags
if: github.ref_type == 'tag'
run: |
VERSION="${{ needs.get-version.outputs.image-tag-prefix }}"
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
# Create version tag
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${VERSION}-baseimage \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-arm64
# Create major.minor tag
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${MAJOR_MINOR}-baseimage \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-arm64
# ============================================================================
# CORE BUILD JOBS - Native multi-arch builds
# ============================================================================
# Build core for AMD64 (depends on baseimage-amd64, not manifest)
build-core-amd64:
needs: [get-version, build-baseimage-amd64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (amd64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.core
platforms: linux/amd64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-amd64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-amd64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Build core for ARM64 (depends on baseimage-arm64, not manifest)
build-core-arm64:
needs: [get-version, build-baseimage-arm64]
runs-on: ubuntu-24.04-arm
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (arm64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.core
platforms: linux/arm64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-baseimage-arm64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-baseimage-arm64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Create multi-arch manifest for core
create-manifest-core:
needs: [get-version, build-core-amd64, build-core-arm64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
core-tag: ${{ steps.get-tag.outputs.tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Get core tag
id: get-tag
run: echo "tag=${{ needs.get-version.outputs.image-tag-prefix }}-core" >> $GITHUB_OUTPUT
- name: Create multi-arch manifest
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: ${{ steps.get-tag.outputs.tag }}
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
description: 'Viral genomics analysis tools - core utilities'
- name: Create additional tags for main branch
if: github.ref == 'refs/heads/main'
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: core
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
description: 'Viral genomics analysis tools - core utilities'
- name: Create version tags
if: github.ref_type == 'tag'
run: |
VERSION="${{ needs.get-version.outputs.image-tag-prefix }}"
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${VERSION}-core \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${MAJOR_MINOR}-core \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
# ============================================================================
# ASSEMBLE BUILD JOBS - Native multi-arch builds
# ============================================================================
# Build assemble for AMD64
build-assemble-amd64:
needs: [get-version, build-core-amd64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (amd64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.assemble
platforms: linux/amd64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-assemble-amd64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-assemble-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-assemble-amd64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-assemble-amd64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Build assemble for ARM64
build-assemble-arm64:
needs: [get-version, build-core-arm64]
runs-on: ubuntu-24.04-arm
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (arm64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.assemble
platforms: linux/arm64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-assemble-arm64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-assemble-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-assemble-arm64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-assemble-arm64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Create multi-arch manifest for assemble
create-manifest-assemble:
needs: [get-version, build-assemble-amd64, build-assemble-arm64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
assemble-tag: ${{ steps.get-tag.outputs.tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Get assemble tag
id: get-tag
run: echo "tag=${{ needs.get-version.outputs.image-tag-prefix }}-assemble" >> $GITHUB_OUTPUT
- name: Create multi-arch manifest
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: ${{ steps.get-tag.outputs.tag }}
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-assemble-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-assemble-arm64
description: 'Viral genomics analysis tools - assembly'
- name: Create additional tags for main branch
if: github.ref == 'refs/heads/main'
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: assemble
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-assemble-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-assemble-arm64
description: 'Viral genomics analysis tools - assembly'
- name: Create version tags
if: github.ref_type == 'tag'
run: |
VERSION="${{ needs.get-version.outputs.image-tag-prefix }}"
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${VERSION}-assemble \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-assemble-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-assemble-arm64
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${MAJOR_MINOR}-assemble \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-assemble-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-assemble-arm64
# ============================================================================
# CLASSIFY BUILD JOBS - Native multi-arch builds
# ============================================================================
# Build classify for AMD64
build-classify-amd64:
needs: [get-version, build-core-amd64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (amd64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.classify
platforms: linux/amd64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-classify-amd64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-classify-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-classify-amd64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-classify-amd64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Build classify for ARM64
build-classify-arm64:
needs: [get-version, build-core-arm64]
runs-on: ubuntu-24.04-arm
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (arm64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.classify
platforms: linux/arm64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-classify-arm64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-classify-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-classify-arm64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-classify-arm64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Create multi-arch manifest for classify
create-manifest-classify:
needs: [get-version, build-classify-amd64, build-classify-arm64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
classify-tag: ${{ steps.get-tag.outputs.tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Get classify tag
id: get-tag
run: echo "tag=${{ needs.get-version.outputs.image-tag-prefix }}-classify" >> $GITHUB_OUTPUT
- name: Create multi-arch manifest
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: ${{ steps.get-tag.outputs.tag }}
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-classify-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-classify-arm64
description: 'Viral genomics analysis tools - metagenomic classification'
- name: Create additional tags for main branch
if: github.ref == 'refs/heads/main'
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: classify
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-classify-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-classify-arm64
description: 'Viral genomics analysis tools - metagenomic classification'
- name: Create version tags
if: github.ref_type == 'tag'
run: |
VERSION="${{ needs.get-version.outputs.image-tag-prefix }}"
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${VERSION}-classify \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-classify-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-classify-arm64
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${MAJOR_MINOR}-classify \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-classify-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-classify-arm64
# ============================================================================
# PHYLO BUILD JOBS - Native multi-arch builds
# ============================================================================
# Build phylo for AMD64
build-phylo-amd64:
needs: [get-version, build-core-amd64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (amd64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.phylo
platforms: linux/amd64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-phylo-amd64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-phylo-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-phylo-amd64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-phylo-amd64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Build phylo for ARM64
build-phylo-arm64:
needs: [get-version, build-core-arm64]
runs-on: ubuntu-24.04-arm
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (arm64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.phylo
platforms: linux/arm64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-phylo-arm64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-phylo-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-phylo-arm64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-phylo-arm64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Create multi-arch manifest for phylo
create-manifest-phylo:
needs: [get-version, build-phylo-amd64, build-phylo-arm64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
phylo-tag: ${{ steps.get-tag.outputs.tag }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Get phylo tag
id: get-tag
run: echo "tag=${{ needs.get-version.outputs.image-tag-prefix }}-phylo" >> $GITHUB_OUTPUT
- name: Create multi-arch manifest
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: ${{ steps.get-tag.outputs.tag }}
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-phylo-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-phylo-arm64
description: 'Viral genomics analysis tools - phylogenetics'
- name: Create additional tags for main branch
if: github.ref == 'refs/heads/main'
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: phylo
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-phylo-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-phylo-arm64
description: 'Viral genomics analysis tools - phylogenetics'
- name: Create version tags
if: github.ref_type == 'tag'
run: |
VERSION="${{ needs.get-version.outputs.image-tag-prefix }}"
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${VERSION}-phylo \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-phylo-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-phylo-arm64
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${MAJOR_MINOR}-phylo \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-phylo-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-phylo-arm64
# ============================================================================
# MEGA BUILD JOBS - Native multi-arch builds (all tools combined)
# ============================================================================
# Build mega for AMD64
build-mega-amd64:
needs: [get-version, build-core-amd64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (amd64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.mega
platforms: linux/amd64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-mega-amd64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-amd64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-mega-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-mega-amd64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-amd64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-mega-amd64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Build mega for ARM64
build-mega-arm64:
needs: [get-version, build-core-arm64]
runs-on: ubuntu-24.04-arm
permissions:
contents: read
packages: write
outputs:
digest: ${{ steps.build.outputs.digest }}
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Build and push (arm64)
id: build
uses: docker/build-push-action@v6
with:
context: .
file: docker/Dockerfile.mega
platforms: linux/arm64
provenance: false
sbom: false
outputs: type=image,push=true,oci-mediatypes=false
tags: ${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-mega-arm64
build-args: |
BASEIMAGE=${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-core-arm64
VERSION=${{ needs.get-version.outputs.version }}
cache-from: |
type=registry,ref=${{ env.GHCR_REPO }}:cache-mega-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-mega-arm64-main
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-${{ needs.get-version.outputs.image-tag-prefix }}
type=registry,ref=${{ env.GHCR_REPO }}:cache-core-arm64-main
cache-to: type=registry,ref=${{ env.GHCR_REPO }}:cache-mega-arm64-${{ needs.get-version.outputs.image-tag-prefix }},mode=max
# Create multi-arch manifest for mega
create-manifest-mega:
needs: [get-version, build-mega-amd64, build-mega-arm64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Setup Docker build environment
uses: ./.github/actions/setup-docker-build
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
checkout: 'false'
- name: Create multi-arch manifest
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: ${{ needs.get-version.outputs.image-tag-prefix }}
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-mega-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-mega-arm64
description: 'Viral genomics analysis tools - all tools combined'
- name: Create additional tags for main branch
if: github.ref == 'refs/heads/main'
uses: ./.github/actions/create-manifest
with:
ghcr-repo: ${{ env.GHCR_REPO }}
target-tag: latest
source-amd64: ${{ needs.get-version.outputs.image-tag-prefix }}-mega-amd64
source-arm64: ${{ needs.get-version.outputs.image-tag-prefix }}-mega-arm64
description: 'Viral genomics analysis tools - all tools combined'
- name: Create version tags
if: github.ref_type == 'tag'
run: |
VERSION="${{ needs.get-version.outputs.image-tag-prefix }}"
MAJOR_MINOR=$(echo "$VERSION" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${VERSION} \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-mega-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-mega-arm64
docker buildx imagetools create \
--tag ${{ env.GHCR_REPO }}:${MAJOR_MINOR} \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-mega-amd64 \
${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-mega-arm64
# ============================================================================
# VULNERABILITY SCAN JOBS - Trivy scans of amd64 images after build
# Scans all 6 flavors in parallel; results uploaded as SARIF to Security tab
# ============================================================================
scan-containers:
needs: [get-version, build-baseimage-amd64, build-core-amd64, build-assemble-amd64, build-classify-amd64, build-phylo-amd64, build-mega-amd64]
runs-on: ubuntu-latest
permissions:
contents: read
packages: read
security-events: write
strategy:
fail-fast: false
matrix:
flavor: [baseimage, core, assemble, classify, phylo, mega]
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: '${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-${{ matrix.flavor }}-amd64'
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'CRITICAL,HIGH'
limit-severities-for-sarif: true
exit-code: '0'
ignore-unfixed: true
trivyignores: '.trivyignore'
ignore-policy: '.trivy-ignore-policy.rego'
- name: Run Trivy vulnerability scanner (JSON)
uses: aquasecurity/trivy-action@master
with:
image-ref: '${{ env.GHCR_REPO }}:${{ needs.get-version.outputs.image-tag-prefix }}-${{ matrix.flavor }}-amd64'
format: 'json'
output: 'trivy-results.json'
severity: 'CRITICAL,HIGH'
exit-code: '1'
ignore-unfixed: true
trivyignores: '.trivyignore'
ignore-policy: '.trivy-ignore-policy.rego'
- name: Log scan result count
if: always()
run: |
if [ -f trivy-results.sarif ]; then
COUNT=$(jq '[.runs[].results[]] | length' trivy-results.sarif)
echo "::notice::Trivy found $COUNT findings for ${{ matrix.flavor }} (after policy filtering)"
fi
- name: Upload Trivy scan results to GitHub Security tab
if: always()
uses: github/codeql-action/upload-sarif@v3
with:
sarif_file: 'trivy-results.sarif'
category: 'container-${{ matrix.flavor }}'
- name: Upload Trivy JSON results
if: always()
uses: actions/upload-artifact@v4
with:
name: trivy-${{ matrix.flavor }}
path: trivy-results.json
# ============================================================================
# TEST JOBS - Run after corresponding manifests are created
# Each test runs in its flavor's Docker container (x86 only)
# ============================================================================
# Test core (x86)
test-core:
needs: [paths-filter, get-version, create-manifest-core]
if: |
github.event_name == 'pull_request' ||
github.ref == 'refs/heads/main' ||
github.ref_type == 'tag' ||
needs.paths-filter.outputs.core == 'true' ||
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-core.outputs.core-tag }}
- name: Run core tests
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-core.outputs.core-tag }} \
pytest tests/unit/core/ \
-v --tb=short \
--cov=viral_ngs \
--cov-report=xml:/workspace/coverage-core.xml \
-n auto
- name: Upload coverage to Codecov
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || github.ref_type == 'tag'
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage-core.xml
flags: core
name: codecov-core
fail_ci_if_error: false
# Test core (ARM64) - only on PRs with docker changes
test-core-arm64:
needs: [paths-filter, get-version, create-manifest-core]
if: |
github.event_name == 'pull_request' &&
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-24.04-arm
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-core.outputs.core-tag }}
- name: Run core tests (ARM64)
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-core.outputs.core-tag }} \
pytest tests/unit/core/ \
-v --tb=short \
-n auto
# Test assemble (x86)
test-assemble:
needs: [paths-filter, get-version, create-manifest-assemble]
if: |
github.event_name == 'pull_request' ||
github.ref == 'refs/heads/main' ||
github.ref_type == 'tag' ||
needs.paths-filter.outputs.assemble == 'true' ||
needs.paths-filter.outputs.core == 'true' ||
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-assemble.outputs.assemble-tag }}
- name: Run assemble tests
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-assemble.outputs.assemble-tag }} \
pytest tests/unit/assemble/ \
-v --tb=short \
--cov=viral_ngs \
--cov-report=xml:/workspace/coverage-assemble.xml \
-n auto
- name: Upload coverage to Codecov
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || github.ref_type == 'tag'
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage-assemble.xml
flags: assemble
name: codecov-assemble
fail_ci_if_error: false
# Test assemble (ARM64) - only on PRs with docker changes
test-assemble-arm64:
needs: [paths-filter, get-version, create-manifest-assemble]
if: |
github.event_name == 'pull_request' &&
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-24.04-arm
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-assemble.outputs.assemble-tag }}
- name: Run assemble tests (ARM64)
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-assemble.outputs.assemble-tag }} \
pytest tests/unit/assemble/ \
-v --tb=short \
-n auto
# Test classify (x86)
test-classify:
needs: [paths-filter, get-version, create-manifest-classify]
if: |
github.event_name == 'pull_request' ||
github.ref == 'refs/heads/main' ||
github.ref_type == 'tag' ||
needs.paths-filter.outputs.classify == 'true' ||
needs.paths-filter.outputs.core == 'true' ||
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-classify.outputs.classify-tag }}
- name: Run classify tests
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-classify.outputs.classify-tag }} \
pytest tests/unit/classify/ \
-v --tb=short \
--cov=viral_ngs \
--cov-report=xml:/workspace/coverage-classify.xml \
-n auto
- name: Upload coverage to Codecov
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || github.ref_type == 'tag'
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage-classify.xml
flags: classify
name: codecov-classify
fail_ci_if_error: false
# Test classify (ARM64) - only on PRs with docker changes
test-classify-arm64:
needs: [paths-filter, get-version, create-manifest-classify]
if: |
github.event_name == 'pull_request' &&
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-24.04-arm
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-classify.outputs.classify-tag }}
- name: Run classify tests (ARM64)
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-classify.outputs.classify-tag }} \
pytest tests/unit/classify/ \
-v --tb=short \
-n auto
# Test phylo (x86)
test-phylo:
needs: [paths-filter, get-version, create-manifest-phylo]
if: |
github.event_name == 'pull_request' ||
github.ref == 'refs/heads/main' ||
github.ref_type == 'tag' ||
needs.paths-filter.outputs.phylo == 'true' ||
needs.paths-filter.outputs.core == 'true' ||
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-phylo.outputs.phylo-tag }}
- name: Run phylo tests
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-phylo.outputs.phylo-tag }} \
pytest tests/unit/phylo/ \
-v --tb=short \
--cov=viral_ngs \
--cov-report=xml:/workspace/coverage-phylo.xml \
-n auto
- name: Upload coverage to Codecov
if: github.event_name == 'pull_request' || github.ref == 'refs/heads/main' || github.ref_type == 'tag'
uses: codecov/codecov-action@v4
with:
token: ${{ secrets.CODECOV_TOKEN }}
files: ./coverage-phylo.xml
flags: phylo
name: codecov-phylo
fail_ci_if_error: false
# Test phylo (ARM64) - only on PRs with docker changes
test-phylo-arm64:
needs: [paths-filter, get-version, create-manifest-phylo]
if: |
github.event_name == 'pull_request' &&
needs.paths-filter.outputs.docker == 'true'
runs-on: ubuntu-24.04-arm
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Pull test image (with retries)
uses: ./.github/actions/pull-with-retry
with:
image: ${{ env.GHCR_REPO }}:${{ needs.create-manifest-phylo.outputs.phylo-tag }}
- name: Run phylo tests (ARM64)
run: |
docker run --rm \
-v ${{ github.workspace }}:/workspace \
-w /workspace \
${{ env.GHCR_REPO }}:${{ needs.create-manifest-phylo.outputs.phylo-tag }} \
pytest tests/unit/phylo/ \
-v --tb=short \
-n auto
# ============================================================================
# AGGREGATOR JOB - Single status check for branch protection
# This job always runs and reports combined test status, allowing conditional
# test jobs to be skipped without blocking PRs that don't need those tests.
# ============================================================================
all-tests-pass:
if: always()
needs:
- test-core
- test-core-arm64
- test-assemble
- test-assemble-arm64
- test-classify
- test-classify-arm64
- test-phylo
- test-phylo-arm64
runs-on: ubuntu-latest
permissions: {}
steps:
- name: Check test results
run: |
# Get all job results
results="${{ join(needs.*.result, ' ') }}"
echo "Job results: $results"
# Fail if any job failed or was cancelled
for result in $results; do
if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
echo "One or more test jobs failed or were cancelled"
exit 1
fi
done
echo "All test jobs passed or were skipped"
# ============================================================================
# DEPLOY JOBS - Copy images from GHCR to Quay after tests pass
# Uses crane to efficiently copy multi-arch manifests without re-pulling layers
# ============================================================================
deploy-to-quay:
needs: [
get-version,
create-manifest-baseimage,
create-manifest-core,
create-manifest-assemble,
create-manifest-classify,
create-manifest-phylo,
create-manifest-mega,
all-tests-pass
]
# Use always() to evaluate condition even when upstream jobs were skipped.
# Without this, GitHub skips this job if any job in the dependency chain was skipped,
# even if all direct dependencies succeeded.
if: |
always() &&
!cancelled() &&
needs.all-tests-pass.result == 'success' &&
needs.create-manifest-baseimage.result == 'success' &&
needs.create-manifest-core.result == 'success' &&
needs.create-manifest-assemble.result == 'success' &&
needs.create-manifest-classify.result == 'success' &&
needs.create-manifest-phylo.result == 'success' &&
needs.create-manifest-mega.result == 'success' &&
github.event_name != 'pull_request'
runs-on: ubuntu-latest
permissions:
contents: read
packages: read
strategy:
fail-fast: false
matrix:
include:
- flavor: baseimage
- flavor: core
- flavor: assemble
- flavor: classify
- flavor: phylo
- flavor: mega
steps:
- name: Install crane
uses: imjasonh/setup-crane@v0.4
- name: Log in to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Log in to Quay.io
uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.QUAY_USERNAME }}
password: ${{ secrets.QUAY_TOKEN }}
- name: Determine tags to copy
id: tags
run: |
FLAVOR="${{ matrix.flavor }}"
# Determine the suffix (mega has no suffix)
if [[ "$FLAVOR" == "mega" ]]; then
SUFFIX=""
else
SUFFIX="-${FLAVOR}"
fi
# Use image-tag-prefix which handles both branches and version tags
# (strips 'v' prefix from version tags, sanitizes '/' in branch names)
TAG_PREFIX="${{ needs.get-version.outputs.image-tag-prefix }}"
echo "src_tag=${TAG_PREFIX}${SUFFIX}" >> $GITHUB_OUTPUT
echo "dest_tags=${TAG_PREFIX}${SUFFIX}" >> $GITHUB_OUTPUT
# For version tags, also create major.minor tag
if [[ "${{ github.ref_type }}" == "tag" ]]; then
MAJOR_MINOR=$(echo "$TAG_PREFIX" | sed -E 's/^([0-9]+\.[0-9]+).*/\1/')
echo "extra_dest_tag=${MAJOR_MINOR}${SUFFIX}" >> $GITHUB_OUTPUT
else
echo "extra_dest_tag=" >> $GITHUB_OUTPUT
fi
# For main branch, also create the "latest" style tag
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
if [[ "$FLAVOR" == "mega" ]]; then
echo "latest_tag=latest" >> $GITHUB_OUTPUT
else
echo "latest_tag=${FLAVOR}" >> $GITHUB_OUTPUT
fi
else
echo "latest_tag=" >> $GITHUB_OUTPUT
fi
- name: Copy image to Quay.io
run: |
SRC="${{ env.GHCR_REPO }}:${{ steps.tags.outputs.src_tag }}"
DEST="${{ env.QUAY_REPO }}:${{ steps.tags.outputs.dest_tags }}"
echo "Copying $SRC -> $DEST"
crane copy "$SRC" "$DEST"
# Copy extra version tag if present (e.g., v1.25 for v1.25.0)
if [[ -n "${{ steps.tags.outputs.extra_dest_tag }}" ]]; then
EXTRA_DEST="${{ env.QUAY_REPO }}:${{ steps.tags.outputs.extra_dest_tag }}"
echo "Copying $SRC -> $EXTRA_DEST"
crane copy "$SRC" "$EXTRA_DEST"
fi
# Copy "latest" style tag for main branch
if [[ -n "${{ steps.tags.outputs.latest_tag }}" ]]; then
LATEST_DEST="${{ env.QUAY_REPO }}:${{ steps.tags.outputs.latest_tag }}"
echo "Copying $SRC -> $LATEST_DEST"
crane copy "$SRC" "$LATEST_DEST"
fi
================================================
FILE: .github/workflows/docs.yml
================================================
name: Documentation
on:
push:
branches:
- main
- '**'
paths:
- 'docs/**'
- 'src/viral_ngs/**'
- '.readthedocs.yml'
- '.github/workflows/docs.yml'
pull_request:
branches:
- main
paths:
- 'docs/**'
- 'src/viral_ngs/**'
- '.readthedocs.yml'
- '.github/workflows/docs.yml'
jobs:
build-docs:
runs-on: ubuntu-latest
permissions:
contents: read
steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0 # Full history for git describe version
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Install dependencies
run: |
pip install -r docs/requirements.txt
pip install -e .
- name: Build documentation
run: |
cd docs
sphinx-build -W -b html . _build/html
- name: Upload documentation artifact
uses: actions/upload-artifact@v4
with:
name: documentation
path: docs/_build/html/
retention-days: 7
================================================
FILE: .gitignore
================================================
VERSION
test/output/
# Sphinx documentation
docs/_build/
# Mac OSX
.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# WIP/dev temporary backup copies
*.bak
*.bak[0-9]
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
lib/
lib64/
parts/
sdist/
var/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Pyenv local
.python-version
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage*
.cache
.noseids
nosetests.xml
pytest.xml
coverage.xml
coverage/
test/input/TestVPhaser2/in.bam.bti
easy-deploy/data/Snakefile
easy-deploy-virtualized/data/Snakefile
easy-deploy-virtualized/data/config.json
easy-deploy-virtualized/data/config.yaml
easy-deploy-virtualized/data/viral-ngs/
**/.vagrant/
tools/build/
tools/conda-cache/
tools/conda-tools/
*.snakemake/
.pytest_cache/
.vscode/
vulnerability-mitigation-status.md
================================================
FILE: .readthedocs.yml
================================================
# .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Set the version of Python
build:
os: ubuntu-22.04
tools:
python: "3.10"
# Build documentation in the docs/ directory with Sphinx
sphinx:
builder: html
configuration: docs/conf.py
# Build documentation with MkDocs
#mkdocs:
# configuration: mkdocs.yml
# Optionally build your docs in additional formats such as PDF and ePub
formats: all
# Optionally set the version of Python and requirements required to build your docs
python:
install:
- requirements: docs/requirements.txt
- method: pip
path: .
extra_requirements: []
================================================
FILE: .trivy-ignore-policy.rego
================================================
#
# bioinformatics-platform.rego
#
# Conservative Trivy ignore policy for bioinformatics container images
# running on genomics PaaS platforms as batch pipeline tasks.
#
# ASSUMPTIONS (document and verify these match your platform):
# 1. Containers run as non-interactive batch jobs (no shell sessions,
# no web UIs, no Jupyter notebooks served from pipeline containers)
# 2. Containers have no inbound network listeners (no ports exposed)
# 3. Containers run with dropped capabilities (no CAP_SYS_ADMIN, etc.)
# 4. Containers do not run in privileged mode
# 5. Pipeline inputs are data files (FASTQ, BAM, VCF, reference genomes)
# which may be untrusted or malformed
#
# If any assumption does not hold for a given image or use case,
# DO NOT apply this policy to that image.
#
# USAGE:
# trivy image --ignore-policy bioinformatics-platform.rego \
# --severity CRITICAL,HIGH --ignore-unfixed <image>
#
# IMPORTANT: Before using this policy, run your image with:
# trivy image --format json <image> > scan.json
# and inspect the CVSS field structure. The field paths below
# (input.CVSS, input.CweIDs, etc.) reflect the Trivy JSON output
# structure. If your Trivy version uses different paths, adjust
# accordingly. The CVSS vector string location has changed across
# Trivy versions (see https://github.com/aquasecurity/trivy/issues/1627).
#
# CVSS VERSION SUPPORT:
# This policy supports both CVSS v3.1 and CVSS v4.0 vector strings.
# Trivy is transitioning to v4.0 for newer advisories. Some CVEs may
# have only a v4.0 vector (no v3.1). The helper functions at the bottom
# extract vectors from both versions and the rules are written to match
# either format.
#
# VERSION: 2.0
# LAST REVIEWED: 2026-03-20
# REVIEW CADENCE: Quarterly, or when platform architecture changes
#
package trivy
default ignore = false
###############################################################################
# SECTION 1: PHYSICAL ACCESS REQUIRED (AV:P)
#
# Rationale: Cloud-hosted containers are never physically accessible.
# These CVEs require hands-on hardware interaction (USB, Firewire,
# JTAG, etc.) which is impossible in any cloud PaaS context.
#
# CVSS v3.1: AV:P
# CVSS v4.0: AV:P (same field name)
#
# Risk of false negative: Essentially zero. There is no scenario
# in which a pipeline container is physically accessible to an attacker.
# Confidence: Very High
###############################################################################
ignore {
has_v3_field(input, "AV:P")
}
ignore {
has_v4_field(input, "AV:P")
}
###############################################################################
# SECTION 2: ADJACENT NETWORK REQUIRED (AV:A)
#
# Rationale: Adjacent-network attacks require the attacker to be on
# the same physical or logical network segment (e.g., same VLAN,
# Bluetooth, local WiFi). In a cloud PaaS, pipeline containers run
# on orchestrated infrastructure where the attacker cannot place
# themselves on an adjacent segment.
#
# CVSS v3.1: AV:A
# CVSS v4.0: AV:A (same field name)
#
# Risk of false negative: Very low. Cloud networking abstractions
# make adjacent-network attacks impractical against pipeline containers.
# Confidence: Very High
###############################################################################
ignore {
has_v3_field(input, "AV:A")
}
ignore {
has_v4_field(input, "AV:A")
}
###############################################################################
# SECTION 3: USER INTERACTION REQUIRED (UI:R) + LOCAL VECTOR (AV:L)
#
# Rationale: Batch pipeline containers have no interactive user sessions.
# No human is clicking links, opening files in a GUI, or interacting
# with the container during execution. CVEs that require BOTH local
# access AND user interaction (e.g., tricking a user into opening a
# malicious file in a desktop app) are not exploitable in this context.
#
# NOTE: We require BOTH conditions (AV:L AND UI:R), not either alone.
# - AV:L alone is NOT safe to ignore (local privilege escalation
# could be triggered by pipeline code)
# - UI:R alone is NOT safe to ignore for AV:N vulns (some network
# vulns with UI:R involve clicking a link, which doesn't apply,
# but others are ambiguous)
# - AV:L + UI:R together means "must have local access AND a human
# must do something" - genuinely inapplicable in batch containers.
#
# CVSS v3.1: AV:L + UI:R
# CVSS v4.0: AV:L + UI:P (Passive) or UI:A (Active)
# v4.0 splits "Required" into Passive (viewing content) and Active
# (clicking/interacting). Both require a human, so both are safe to
# ignore in batch containers.
#
# Risk of false negative: Very low for true batch pipeline containers.
# Confidence: High
###############################################################################
# v3: AV:L + UI:R
ignore {
has_v3_field(input, "AV:L")
has_v3_field(input, "UI:R")
}
# v4: AV:L + UI:P (Passive user interaction)
ignore {
has_v4_field(input, "AV:L")
has_v4_field(input, "UI:P")
}
# v4: AV:L + UI:A (Active user interaction)
ignore {
has_v4_field(input, "AV:L")
has_v4_field(input, "UI:A")
}
###############################################################################
# SECTION 4: HIGH PRIVILEGES REQUIRED + LOCAL VECTOR
#
# Rationale: CVEs requiring both local access and high (administrative)
# privileges assume the attacker already has elevated access to the
# system. In a properly configured container (non-root user, dropped
# capabilities), the process inside the container does not have high
# privileges to begin with. Combined with the local access requirement,
# this class of CVE is not practically exploitable.
#
# NOTE: We only ignore AV:L + PR:H, not AV:N + PR:H. A network-
# accessible vulnerability requiring high privileges may still be
# relevant if the service runs as a privileged user.
#
# CVSS v3.1: AV:L + PR:H
# CVSS v4.0: AV:L + PR:H (same field names)
#
# Risk of false negative: Low, assuming containers run as non-root.
# If your containers run as root, REMOVE THIS RULE.
# Confidence: High (conditional on non-root execution)
###############################################################################
ignore {
has_v3_field(input, "AV:L")
has_v3_field(input, "PR:H")
}
ignore {
has_v4_field(input, "AV:L")
has_v4_field(input, "PR:H")
}
###############################################################################
# SECTION 5: LOCAL ATTACK, SCOPE UNCHANGED (AV:L + S:U)
#
# Rationale: AV:L means the attacker already has local code execution
# inside the container. S:U (Scope Unchanged) means the impact does not
# cross a security boundary — it stays within the container.
#
# In an ephemeral batch container, this combination means: the attacker
# can already execute arbitrary code, and the vulnerability only lets
# them affect things inside a container that will be destroyed when the
# job completes. The vulnerability grants no capability the attacker
# does not already have.
#
# Contrast with S:C (Scope Changed): a local vulnerability that crosses
# the container-host boundary (e.g., container escape via kernel exploit)
# IS dangerous and is NOT ignored by this rule.
#
# CVSS v3.1: AV:L + S:U
# CVSS v4.0: AV:L + SC:N + SI:N + SA:N
# v4.0 replaced the binary S:U/S:C with three subsequent-component
# impact fields. SC:N + SI:N + SA:N means no impact on any component
# beyond the vulnerable one — equivalent to v3's S:U.
#
# Risk of false negative: Low. The theoretical concern is that AV:L+S:U
# could include reading mounted secrets, but an attacker with code
# execution can already read those secrets directly.
# Confidence: High
###############################################################################
# v3: AV:L + S:U
ignore {
has_v3_field(input, "AV:L")
has_v3_field(input, "S:U")
}
# v4: AV:L + no subsequent-component impact
ignore {
has_v4_field(input, "AV:L")
has_v4_field(input, "SC:N")
has_v4_field(input, "SI:N")
has_v4_field(input, "SA:N")
}
###############################################################################
# SECTION 6: AVAILABILITY-ONLY IMPACT, SCOPE UNCHANGED
#
# Rationale: CVEs where the only impact is availability (DoS/resource
# exhaustion) and scope is unchanged mean: processing crafted input can
# crash or hang the affected process, but cannot read data (C:N),
# modify data (I:N), or affect other components (S:U).
#
# In ephemeral batch containers, a DoS means a single pipeline job
# fails or hangs until it hits its timeout or memory limit. This is
# operationally equivalent to a corrupted input file or OOM — the job
# fails, the container is destroyed, and the next job runs on a fresh
# container. There is no persistent state corruption, no data
# exfiltration, and no lateral movement.
#
# This rule applies regardless of attack vector (including AV:N),
# because the impact is strictly contained: even if triggered by
# network-delivered data, the worst outcome is one failed job.
#
# NOTE: This does NOT ignore:
# - DoS with S:C / SC≠N / SI≠N / SA≠N (scope changed — could
# affect host or other containers)
# - DoS combined with any confidentiality or integrity impact
# (C≠N or I≠N), which could indicate data leaks or corruption
# alongside the crash
#
# CVSS v3.1: C:N + I:N + S:U (with any A value)
# CVSS v4.0: VC:N + VI:N + SC:N + SI:N + SA:N (with any VA value)
#
# Risk of false negative: Low. The concern would be if a DoS could be
# weaponized into a resource exhaustion attack against the compute
# platform (e.g., repeatedly submitting jobs with crafted inputs to
# burn credits). This is a business logic concern mitigated by job
# submission controls and cost alerts, not by container hardening.
# Confidence: High
###############################################################################
# v3: C:N + I:N + S:U (availability-only, scope unchanged)
ignore {
has_v3_field(input, "C:N")
has_v3_field(input, "I:N")
has_v3_field(input, "S:U")
}
# v4: VC:N + VI:N + no subsequent-component impact (availability-only)
ignore {
has_v4_field(input, "VC:N")
has_v4_field(input, "VI:N")
has_v4_field(input, "SC:N")
has_v4_field(input, "SI:N")
has_v4_field(input, "SA:N")
}
###############################################################################
# HELPER FUNCTIONS: Extract and match CVSS vector strings
#
# Trivy's JSON structure nests CVSS data under input.CVSS with vendor
# keys. The vector string location varies by data source. We check
# multiple common paths and prefer NVD.
#
# CVSS v3.1 vectors look like: CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
# CVSS v4.0 vectors look like: CVSS:4.0/AV:N/AC:L/AT:N/PR:N/UI:N/VC:N/VI:N/VA:H/SC:N/SI:N/SA:N
#
# Fields are slash-delimited key:value pairs. The has_vX_field helpers
# check for a field anywhere in the vector, handling both mid-string
# (/field/) and end-of-string (/field) positions.
#
# IMPORTANT: Run `trivy image --format json <your-image>` and inspect
# the .Vulnerabilities[].CVSS structure to confirm these paths work
# for your Trivy version. If the structure differs, update these
# functions accordingly.
###############################################################################
# --- CVSS v3.1 vector extraction ---
get_v3_vector(vuln) = vector {
vector := vuln.CVSS.nvd.V3Vector
} else = vector {
vector := vuln.CVSS.redhat.V3Vector
} else = vector {
vector := vuln.CVSS.ghsa.V3Vector
} else = vector {
some vendor
vector := vuln.CVSS[vendor].V3Vector
} else = "" {
true
}
# --- CVSS v4.0 vector extraction ---
get_v4_vector(vuln) = vector {
vector := vuln.CVSS.nvd.V40Vector
} else = vector {
vector := vuln.CVSS.redhat.V40Vector
} else = vector {
vector := vuln.CVSS.ghsa.V40Vector
} else = vector {
some vendor
vector := vuln.CVSS[vendor].V40Vector
} else = "" {
true
}
# --- Field matching helpers ---
# Check if a CVSS vector contains a specific field value.
# Handles both mid-string (/AV:N/) and end-of-string (/AV:N) positions.
has_v3_field(vuln, field) {
cvss_vector := get_v3_vector(vuln)
cvss_vector != ""
contains(cvss_vector, concat("", ["/", field, "/"]))
}
has_v3_field(vuln, field) {
cvss_vector := get_v3_vector(vuln)
cvss_vector != ""
endswith(cvss_vector, concat("", ["/", field]))
}
has_v4_field(vuln, field) {
cvss_vector := get_v4_vector(vuln)
cvss_vector != ""
contains(cvss_vector, concat("", ["/", field, "/"]))
}
has_v4_field(vuln, field) {
cvss_vector := get_v4_vector(vuln)
cvss_vector != ""
endswith(cvss_vector, concat("", ["/", field]))
}
###############################################################################
# RULES INTENTIONALLY NOT INCLUDED (and why):
#
# 1. AV:N (Network attack vector) — NOT blanket-ignored.
# Even though batch pipeline containers typically have no inbound
# listeners, some AV:N CVEs involve outbound connections triggered
# by processing attacker-influenced data (e.g., Log4Shell). We
# cannot safely blanket-ignore network-vector CVEs. However,
# Section 6 does ignore AV:N CVEs that are availability-only with
# no scope change, since the worst outcome is a crashed job.
#
# 2. UI:R alone (without AV:L) — NOT ignored.
# Some AV:N + UI:R vulnerabilities involve scenarios like processing
# a crafted file that triggers a callback, which could be borderline
# relevant if pipeline inputs are not fully trusted.
#
# 3. CWE-based filters — NOT included by default.
# Filtering by CWE class (e.g., ignoring all CWE-79 XSS in a
# non-web context) is tempting but risky as a default policy.
# CWE classifications can be inaccurate, and a single misclassified
# CVE could slip through. If your platform has specific architectural
# mitigations (e.g., provably no web server in any image), you may
# add CWE rules, but this should be a deliberate per-platform choice.
#
# 4. Specific CVE IDs — NOT included.
# Use a .trivyignore.yaml or VEX document for individual CVE
# exceptions with per-CVE justifications. The Rego policy should
# capture architectural/class-level mitigations, not one-off
# exceptions.
#
# 5. AV:L + S:C — NOT ignored.
# Local vulnerabilities with Scope Changed (S:C) can cross security
# boundaries (e.g., container escape via kernel exploit). These are
# dangerous even in ephemeral containers. Only AV:L + S:U (Scope
# Unchanged) is ignored — see Section 5 above.
#
# 6. Inbound-listener-only server CVEs — NOT categorically ignored.
# Many AV:N CVEs in fat JARs (Jetty, ZooKeeper, Netty server-side)
# require an active network listener that we never start. However,
# CVSS does not distinguish inbound-listener vs. data-processing
# attack surfaces within AV:N. Adding package-name-based exceptions
# here would be fragile and is better handled in .trivyignore with
# per-CVE justification documenting that the server component is
# never instantiated.
###############################################################################
================================================
FILE: .trivyignore
================================================
# Trivy ignore file — per-CVE exceptions with justifications
#
# This file documents individual CVE exceptions that cannot be addressed by
# version bumps or by the Rego policy (.trivy-ignore-policy.rego). Each entry
# MUST include a justification explaining why the CVE is accepted.
#
# Format: CVE-YYYY-NNNNN (one per line, # for comments)
# See: https://aquasecurity.github.io/trivy/latest/docs/configuration/filtering/#by-finding-ids
#
# Review cadence: Re-evaluate all entries quarterly or when the affected
# package is upgraded. Remove entries once the underlying package ships a fix.
#
# =============================================================================
# -----------------------------------------------------------------------------
# CVE-2026-23949 — jaraco.context path traversal in tarball extraction
# Severity: HIGH (NVD scored AV:N/S:C — see scoring dispute below)
# Package: setuptools (vendored at setuptools/_vendor/jaraco/context.py)
# Installed: setuptools 79.x (vendored jaraco.context 5.3.0)
# Fix: jaraco.context >= 6.1.0 (installed in conda env, but setuptools
# vendors its own copy which we cannot delete — setuptools imports
# jaraco.text and jaraco.context at runtime for pkg_resources)
#
# JUSTIFICATION:
# 1. The vulnerability is in archive extraction (tarball() context manager).
# setuptools uses this only during "pip install" of source distributions.
# Our containers run pip install at BUILD TIME only, from trusted sources
# (PyPI, conda-forge). No pip installs happen at runtime.
#
# 2. Even if an attacker could trigger pip install at runtime (which would
# require code execution inside the container), the path traversal only
# writes files WITHIN the container. The attacker already has code
# execution — they can write files anywhere in the container without
# needing this vulnerability. It grants no new capability.
#
# 3. Containers are ephemeral batch jobs. Any files written (malicious or
# otherwise) are destroyed when the job completes. There is no
# persistence, no lateral movement, and no host boundary crossing.
#
# 4. NVD SCORING DISPUTE: NVD scored this AV:N/AC:L/PR:N/UI:N/S:C/C:H,
# which assumes a network-facing service extracting untrusted archives
# where the traversal crosses a trust boundary. In our deployment model,
# there is no network-facing service, the archives come from trusted
# sources, and the "boundary" is a container that will be destroyed.
# The Rego policy cannot filter this because NVD mis-scored the attack
# vector as Network (AV:N) rather than Local (AV:L).
#
# RESOLUTION: Will self-resolve when setuptools releases a version with
# updated vendored dependencies (jaraco.context >= 6.1.0).
# ADDED: 2026-03-19
# -----------------------------------------------------------------------------
CVE-2026-23949
# -----------------------------------------------------------------------------
# CVE-2020-25649 — jackson-databind XXE in DOMDeserializer
# Severity: HIGH (AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:H/A:N)
# Package: com.fasterxml.jackson.core:jackson-databind 2.10.5
# Location: opt/conda/share/snpeff-5.2-3/snpEff.jar
#
# JUSTIFICATION:
# The vulnerable code path is DOMDeserializer, which is invoked when
# jackson-databind deserializes XML input into DOM objects without
# disabling external entity resolution. This enables XXE attacks
# (SSRF, local file read) when processing attacker-controlled XML.
#
# snpEff uses jackson-databind for JSON parsing only (config metadata,
# database indices). snpEff's input formats are VCF (tab-delimited),
# GenBank (flat text), and Java properties files. It never parses XML
# through Jackson's DOMDeserializer. The vulnerable code is present in
# the fat JAR but the code path is never traversed.
#
# This exclusion applies only to snpEff's bundled copy. If jackson-
# databind 2.10.5 appears in other JARs, this entry will NOT suppress
# those findings — Trivy matches .trivyignore by CVE ID globally, but
# we accept this because no other JAR in our images bundles this old
# version. If that changes, re-evaluate this entry.
#
# RESOLUTION: Upstream snpEff must update its bundled jackson-databind.
# ADDED: 2026-03-20
# -----------------------------------------------------------------------------
CVE-2020-25649
================================================
FILE: AGENTS.md
================================================
# AGENTS.md
This document provides guidance for AI assistants (Claude Code, GitHub Copilot, etc.) working on this repository.
## Overview
viral-ngs is a consolidated monorepo for viral NGS (Next-Generation Sequencing) analysis tools. It provides:
- **Core utilities**: Read manipulation, Illumina demultiplexing, file handling, QC
- **Assembly**: Genome assembly, scaffolding, gap filling
- **Classification**: Metagenomic classification, taxonomy filtering, k-mer analysis
- **Phylogenetics**: Variant calling, consensus generation, annotation
**Related resources:**
- Command-line documentation: https://viral-ngs.readthedocs.org/
- Higher-level pipelines: https://github.com/broadinstitute/viral-pipelines
---
## Development Environment
### Docker-Centric Development
Development is **intentionally docker-centric**. Developers need:
- Docker
- Git
- Text/code editor
### Development Workflow
1. Clone the repository:
```bash
git clone https://github.com/broadinstitute/viral-ngs.git
```
2. Run the container with local checkout mounted:
```bash
docker run -it --rm \
-v $(pwd):/opt/viral-ngs/source \
quay.io/broadinstitute/viral-ngs:main-core
```
3. If modifying conda dependencies, install them inside the container:
```bash
micromamba install <packages>
```
4. Test code interactively:
```bash
cd /opt/viral-ngs/source
pytest -rsxX -n auto tests/unit
```
5. Push changes to GitHub for automated CI testing
### Running Tests
```bash
# Run all unit tests in the core image
docker run --rm \
-v $(pwd):/opt/viral-ngs/source \
quay.io/broadinstitute/viral-ngs:main-core \
pytest -rsxX -n auto /opt/viral-ngs/source/tests/unit
# Run specific module tests
docker run --rm \
-v $(pwd):/opt/viral-ngs/source \
quay.io/broadinstitute/viral-ngs:main-classify \
pytest -rsxX -n auto /opt/viral-ngs/source/tests/unit/classify
```
**Important: Testing source code changes requires re-installing the package.**
The `-v` mount makes your local files visible on disk, but `viral_ngs` is already installed as a package inside the container image. Python imports resolve to the *installed* copy, not your mounted source files. If you've modified files under `src/viral_ngs/`, you must re-install before running tests:
```bash
# Run tests with local source changes applied
docker run --rm \
-v $(pwd):/opt/viral-ngs/source \
quay.io/broadinstitute/viral-ngs:main-core \
bash -c "pip install -e /opt/viral-ngs/source --quiet && pytest -rsxX -n auto /opt/viral-ngs/source/tests/unit"
```
Changes to test files (`tests/`) and test inputs (`tests/input/`) are picked up automatically via the volume mount — the re-install is only needed when modifying the `src/viral_ngs/` package code.
Running pytest directly on the host will generally not work — most dependencies (bioinformatics tools, conda packages) are only available inside the Docker containers. Always test inside Docker.
**Test conventions:**
- Uses pytest (not nose or unittest)
- Test files in `tests/unit/<module>/`
- Test input files in `tests/input/<TestClassName>/`
- Access via `viral_ngs.core.file.get_test_input_path(self)`
- Custom marker: `@pytest.mark.slow` for slow tests
---
## Code Architecture
### Directory Structure
```
viral-ngs/
├── pyproject.toml # Package configuration
├── src/viral_ngs/
│ ├── __init__.py # Version detection
│ ├── py.typed # PEP 561 marker
│ │
│ ├── # Command modules (CLI entry points)
│ ├── illumina.py # Illumina demux commands
│ ├── read_utils.py # Read manipulation commands
│ ├── assembly.py # Assembly commands
│ ├── metagenomics.py # Classification commands
│ ├── interhost.py # Phylo commands
│ │
│ ├── core/ # Core library (shared utilities + tool wrappers)
│ │ ├── __init__.py # Tool/InstallMethod classes
│ │ ├── samtools.py # Tool wrapper
│ │ ├── picard.py # Tool wrapper
│ │ ├── file.py # File utilities
│ │ ├── misc.py # General utilities
│ │ └── ...
│ │
│ ├── assemble/ # Assembly tool wrappers
│ │ ├── __init__.py
│ │ ├── spades.py
│ │ └── ...
│ │
│ ├── classify/ # Classification tool wrappers
│ │ ├── __init__.py
│ │ ├── kraken2.py
│ │ └── ...
│ │
│ └── phylo/ # Phylogenetics tool wrappers
│ ├── __init__.py
│ ├── mafft.py
│ └── ...
│
├── docker/
│ ├── Dockerfile.baseimage # Base with conda/python
│ ├── Dockerfile.core # Core tools
│ ├── Dockerfile.assemble # + assembly tools
│ ├── Dockerfile.classify # + classification tools
│ ├── Dockerfile.phylo # + phylo tools
│ ├── Dockerfile.mega # All tools combined
│ ├── install-conda-deps.sh
│ └── requirements/
│ ├── baseimage.txt
│ ├── core.txt
│ ├── core-x86.txt # x86-only core packages
│ ├── assemble.txt
│ ├── assemble-x86.txt # x86-only assembly packages
│ ├── classify.txt
│ ├── classify-x86.txt # x86-only classify packages
│ ├── phylo.txt
│ └── phylo-x86.txt # x86-only phylo packages
│
├── tests/
│ ├── conftest.py
│ ├── unit/
│ │ ├── core/
│ │ ├── assemble/
│ │ ├── classify/
│ │ └── phylo/
│ └── input/
│
├── scripts/ # Utility scripts
├── .github/workflows/
│ └── docker.yml # CI/CD workflow
└── docs/
```
### Command Module Pattern
Command modules define CLI entry points:
```python
__commands__ = []
def parser_<command_name>(parser=argparse.ArgumentParser()):
# Define arguments
return parser
def main_<command_name>(args):
# Implementation
pass
__commands__.append(('command_name', parser_command_name))
```
### Tool Wrapper Pattern
Tool wrappers in `core/`, `assemble/`, `classify/`, `phylo/`:
```python
import viral_ngs.core as core
class SamtoolsTool(core.Tool):
def __init__(self, install_methods=None):
if install_methods is None:
install_methods = [core.PrexistingUnixCommand('samtools')]
super().__init__(install_methods=install_methods)
def execute(self, command, *args):
# Run samtools with arguments
pass
```
---
## Import Patterns
### Standard imports
```python
# Within command modules (illumina.py, assembly.py, etc.)
import viral_ngs.core as core
import viral_ngs.core.file as util_file
import viral_ngs.core.misc as util_misc
# Using tools
samtools = core.samtools.SamtoolsTool()
bwa = core.bwa.BwaTool()
# Using utilities
util_file.mkstempfname()
util_misc.available_cpu_count()
```
### Within core/ modules (use relative imports)
```python
from . import samtools, picard
from .file import mkstempfname
from .misc import available_cpu_count
```
### Within subpackages (assemble/, classify/, phylo/)
```python
import viral_ngs.core as core
import viral_ngs.core.file as util_file
# For other tools in same subpackage
from . import mummer, mafft
```
### Key rules
1. **Prefer full imports**: `import viral_ngs.core.samtools` over `from viral_ngs.core import samtools`
2. **Use relative imports within packages**: `from . import X` inside core/, assemble/, etc.
3. **No backward compat stubs**: `viral_ngs.tools` and `viral_ngs.util` don't exist
---
## Dependencies
### Conda-First Approach
ALL runtime dependencies are installed via conda for speed and binary compatibility.
The `pyproject.toml` has empty dependencies - conda handles everything.
### Adding Dependencies
1. Check conda availability:
```bash
micromamba search <package> # default channel
micromamba search -c bioconda <package> # bioconda channel
```
2. Add to appropriate requirements file:
- `docker/requirements/core.txt` - core dependencies
- `docker/requirements/assemble.txt` - assembly-specific
- `docker/requirements/classify.txt` - classification-specific
- `docker/requirements/phylo.txt` - phylo-specific
3. For x86-only packages (no ARM64 build), add to the appropriate `-x86.txt` file:
- `core-x86.txt` - novoalign, mvicuna
- `classify-x86.txt` - bmtagger, kallisto, kb-python
- `phylo-x86.txt` - table2asn
### Dependency Resolution
When building derivative images, ALL dependencies (including x86-only) must be installed in a **single resolver call** using the `--x86-only:` prefix:
```bash
# Single resolver call - x86-only files skipped on ARM64
/tmp/install-conda-deps.sh \
/tmp/requirements/baseimage.txt \
/tmp/requirements/core.txt \
/tmp/requirements/classify.txt \
--x86-only:/tmp/requirements/classify-x86.txt
```
This prevents version regressions. **Never install incrementally.**
The `install-conda-deps.sh` script:
- On x86: Includes all files in one micromamba call
- On ARM64: Skips files tagged with `--x86-only:` but includes others
---
## Docker Images
### Image Hierarchy
```
baseimage (conda/python)
└── core (core tools)
├── assemble (+ assembly tools)
├── classify (+ classification tools)
├── phylo (+ phylo tools)
└── mega (all tools)
```
### Tag Format
```
quay.io/broadinstitute/viral-ngs:2.6.0-core
quay.io/broadinstitute/viral-ngs:2.6.0-classify
quay.io/broadinstitute/viral-ngs:2.6.0 # mega (no suffix)
quay.io/broadinstitute/viral-ngs:main-core # main branch
quay.io/broadinstitute/viral-ngs:latest # alias for main mega
```
### Building Locally
```bash
# Build baseimage
docker build -t viral-ngs:baseimage -f docker/Dockerfile.baseimage .
# Build core (needs baseimage)
docker build --build-arg BASEIMAGE=viral-ngs:baseimage \
-t viral-ngs:core -f docker/Dockerfile.core .
# Build derivatives (need core)
docker build --build-arg BASEIMAGE=viral-ngs:core \
-t viral-ngs:classify -f docker/Dockerfile.classify .
```
---
## CI/CD
### GitHub Actions Workflow
The `.github/workflows/docker.yml` workflow handles building and testing:
**Build Architecture:**
Each image flavor is built using 3 parallel jobs for native multi-arch support:
1. `build-{flavor}-amd64` - runs on `ubuntu-latest`
2. `build-{flavor}-arm64` - runs on `ubuntu-24.04-arm` (native ARM runner)
3. `create-manifest-{flavor}` - combines arch-specific images into multi-arch manifest
This approach is 3-5x faster than QEMU emulation for ARM builds.
**Build Job Flow:**
```
paths-filter + get-version (parallel)
↓
build-baseimage-amd64 ←→ build-baseimage-arm64 (parallel)
↓ ↓
create-manifest-baseimage
↓
build-core-amd64 ←→ build-core-arm64 (parallel)
↓ ↓
create-manifest-core
↓
build-{assemble,classify,phylo,mega}-amd64 ←→ build-{...}-arm64 (parallel)
↓
create-manifest-{flavor}
↓
test-{flavor} + test-{flavor}-arm64 (ARM64 tests only on PRs with docker changes)
↓
deploy-to-quay (push/tag events only)
```
**Test Jobs:**
- **test-core**: Runs on x86, tests `tests/unit/core/`
- **test-assemble**: Runs on x86, tests `tests/unit/assemble/`
- **test-classify**: Runs on x86, tests `tests/unit/classify/`
- **test-phylo**: Runs on x86, tests `tests/unit/phylo/`
- **test-{flavor}-arm64**: Runs on native ARM, only on PRs when docker files change
**Smart Test Scoping:**
Tests only run when relevant code changes:
- Core tests: `src/viral_ngs/*.py`, `core/**`, `util/**`, `tests/unit/core/**`
- Assemble tests: `assemble/**`, `assembly.py`, or core changes
- Classify tests: `classify/**`, `metagenomics.py`, `taxon_filter.py`, or core changes
- Phylo tests: `phylo/**`, `interhost.py`, `intrahost.py`, `ncbi.py`, or core changes
- Docker changes trigger all tests (including ARM64 tests on PRs)
**Coverage:**
Each x86 test job uploads coverage to Codecov with flavor-specific flags.
### Multi-Architecture Support
- Images built natively for `linux/amd64` and `linux/arm64` using parallel runners
- Multi-arch manifests created with OCI annotations using `docker buildx imagetools create`
- x86-only packages (novoalign, mvicuna, bmtagger, kallisto, kb-python, table2asn) handled via `--x86-only:` prefix in `install-conda-deps.sh`
- Python tool wrappers still importable on ARM64; only runtime execution fails for missing binaries
- Tests using x86-only tools have `@unittest.skipIf(IS_ARM, ...)` decorators
- Architecture-specific caches prevent cross-arch cache pollution
### ARM Test Skipping
Tests that use x86-only bioconda packages must be decorated to skip on ARM:
```python
from tests import IS_ARM
SKIP_X86_ONLY_REASON = "tool requires x86-only bioconda package (not available on ARM)"
@unittest.skipIf(IS_ARM, SKIP_X86_ONLY_REASON)
class TestSomeTool(TestCaseWithTmp):
...
# Or at method level:
@unittest.skipIf(IS_ARM, SKIP_X86_ONLY_REASON)
def test_specific_tool(self):
...
```
### Documentation Build
The `docs.yml` workflow builds Sphinx documentation. Key points:
- Uses `mock` to stub heavy dependencies (`Bio`, `pysam`, `scipy`, etc.) in `docs/conf.py`
- When adding new imports to source code, add corresponding mocks to `MOCK_MODULES` in `docs/conf.py`
- Runs `sphinx-build -W` (warnings as errors)
### Registry Strategy
- **GHCR (ghcr.io)**: Primary build registry, images pushed during CI for all events including PRs
- **Quay.io**: Production registry, images copied from GHCR after tests pass (push/tag events only)
- Feature branch images should be cleaned up periodically from Quay.io
---
## Coding Guidelines
### Agent Attribution
**Commit messages**: By default, do NOT include agent/model credits (e.g., "Co-Authored-By: Claude") in commit messages. This reduces noise in the git history.
**Code review comments**: DO include notes about agent/model involvement when writing code review comments (e.g., PR reviews, inline comments). This provides useful context about how the review was conducted.
**Explicit requests**: Include agent attribution in commits or elsewhere when explicitly requested by a human reviewer or contributor.
**Avoid amending pushed commits**: Do not use `git commit --amend` after a commit has been pushed to the remote. Amending pushed commits causes problems for collaboration. Instead, create a new commit with the fix. Amending is fine for local commits that haven't been pushed yet.
### Test-Driven Development
1. Writ
gitextract_xof_5l96/
├── .agents/
│ └── skills/
│ ├── claude-on-vertex-ci/
│ │ └── SKILL.md
│ ├── container-vulns/
│ │ └── SKILL.md
│ ├── dsub-batch-jobs/
│ │ └── SKILL.md
│ └── regression-testing/
│ ├── SKILL.md
│ ├── compare_sample_pair.py
│ ├── discover_pairs.py
│ ├── generate_report.py
│ └── run_vadr.sh
├── .claude/
│ └── rules/
│ └── container-vulns.md
├── .codecov.yml
├── .dockerignore
├── .gitattributes
├── .github/
│ ├── actions/
│ │ ├── create-manifest/
│ │ │ └── action.yml
│ │ ├── pull-with-retry/
│ │ │ └── action.yml
│ │ └── setup-docker-build/
│ │ └── action.yml
│ ├── copilot-instructions.md
│ └── workflows/
│ ├── audit-quay-tags.yml
│ ├── cleanup-images.yml
│ ├── container-scan.yml
│ ├── docker.yml
│ └── docs.yml
├── .gitignore
├── .readthedocs.yml
├── .trivy-ignore-policy.rego
├── .trivyignore
├── AGENTS.md
├── CLAUDE.md
├── LICENSE
├── README.md
├── docker/
│ ├── Dockerfile.assemble
│ ├── Dockerfile.baseimage
│ ├── Dockerfile.classify
│ ├── Dockerfile.core
│ ├── Dockerfile.mega
│ ├── Dockerfile.phylo
│ ├── install-conda-deps.sh
│ ├── requirements/
│ │ ├── assemble-x86.txt
│ │ ├── assemble.txt
│ │ ├── baseimage.txt
│ │ ├── classify-x86.txt
│ │ ├── classify.txt
│ │ ├── core-x86.txt
│ │ ├── core.txt
│ │ ├── phylo-x86.txt
│ │ └── phylo.txt
│ └── scripts/
│ ├── calc_mem.py
│ └── fasta-trim-terminal-ambigs.pl
├── docs/
│ ├── Makefile
│ ├── assembly.rst
│ ├── broad_utils.rst
│ ├── cmdline.rst
│ ├── conf.py
│ ├── description.rst
│ ├── file_utils.rst
│ ├── illumina.rst
│ ├── index.rst
│ ├── interhost.rst
│ ├── intrahost.rst
│ ├── kmer_utils.rst
│ ├── metagenomics.rst
│ ├── ncbi.rst
│ ├── read_utils.rst
│ ├── reports.rst
│ ├── requirements.txt
│ └── taxon_filter.rst
├── pyproject.toml
├── src/
│ └── viral_ngs/
│ ├── __init__.py
│ ├── assemble/
│ │ ├── __init__.py
│ │ ├── freebayes.py
│ │ ├── gap2seq.py
│ │ ├── mafft.py
│ │ ├── mummer.py
│ │ ├── muscle.py
│ │ ├── rasusa.py
│ │ ├── skani.py
│ │ ├── spades.py
│ │ ├── vcf.py
│ │ └── wgsim.py
│ ├── assembly.py
│ ├── broad_utils.py
│ ├── classify/
│ │ ├── __init__.py
│ │ ├── blast.py
│ │ ├── bmtagger.py
│ │ ├── kb.py
│ │ ├── kma.py
│ │ ├── kmc.py
│ │ ├── kraken2.py
│ │ ├── krona.py
│ │ ├── last.py
│ │ └── taxonomy.py
│ ├── core/
│ │ ├── __init__.py
│ │ ├── bbmap.py
│ │ ├── bwa.py
│ │ ├── cdhit.py
│ │ ├── cmd.py
│ │ ├── errors.py
│ │ ├── fastqc.py
│ │ ├── file.py
│ │ ├── illumina_indices.py
│ │ ├── illumina_utils.py
│ │ ├── minimap2.py
│ │ ├── misc.py
│ │ ├── mvicuna.py
│ │ ├── novoalign.py
│ │ ├── picard.py
│ │ ├── prinseq.py
│ │ ├── priorities.py
│ │ ├── sambamba.py
│ │ ├── samtools.py
│ │ ├── splitcode.py
│ │ ├── stats.py
│ │ ├── trimmomatic.py
│ │ └── version.py
│ ├── file_utils.py
│ ├── illumina.py
│ ├── interhost.py
│ ├── intrahost.py
│ ├── kmer_utils.py
│ ├── metagenomics.py
│ ├── ncbi.py
│ ├── phylo/
│ │ ├── __init__.py
│ │ ├── feature_table.py
│ │ ├── feature_table_types.py
│ │ ├── genbank.py
│ │ ├── mafft.py
│ │ ├── mummer.py
│ │ ├── muscle.py
│ │ ├── snpeff.py
│ │ ├── vcf.py
│ │ └── vphaser2.py
│ ├── py.typed
│ ├── read_utils.py
│ ├── reports.py
│ └── taxon_filter.py
└── tests/
├── __init__.py
├── conftest.py
├── input/
│ ├── 5kb_human_from_chr6.fasta
│ ├── G5012.3.fasta
│ ├── G5012.3.mini.bam
│ ├── G5012.3.subset.bam
│ ├── G5012.3.testreads.bam
│ ├── README.md
│ ├── TestAssembleSpades/
│ │ ├── clipDb.fasta
│ │ └── trinity_contigs.fasta
│ ├── TestBamFilter/
│ │ ├── expected.bam
│ │ └── input.bam
│ ├── TestBlastnDbBuild/
│ │ └── expected/
│ │ ├── TestBlastnDbBuild.nhr
│ │ ├── TestBlastnDbBuild.nin
│ │ └── TestBlastnDbBuild.nsq
│ ├── TestBmtagger/
│ │ ├── expected.Match.1.fastq
│ │ ├── expected.Match.2.fastq
│ │ ├── expected.NoMatch.1.fastq
│ │ ├── expected.NoMatch.2.fastq
│ │ ├── humanChr1Subset.bitmask
│ │ ├── humanChr1Subset.fa
│ │ ├── humanChr9Subset.bitmask
│ │ ├── humanChr9Subset.fa
│ │ ├── in1.fastq
│ │ └── in2.fastq
│ ├── TestBmtaggerDbBuild/
│ │ └── expected/
│ │ ├── TestBmtaggerDbBuild.bitmask
│ │ ├── TestBmtaggerDbBuild.srprism.amp
│ │ ├── TestBmtaggerDbBuild.srprism.idx.md5
│ │ ├── TestBmtaggerDbBuild.srprism.imp
│ │ ├── TestBmtaggerDbBuild.srprism.map.md5
│ │ ├── TestBmtaggerDbBuild.srprism.pmp
│ │ ├── TestBmtaggerDbBuild.srprism.rmp
│ │ ├── TestBmtaggerDbBuild.srprism.ss.md5
│ │ ├── TestBmtaggerDbBuild.srprism.ssa
│ │ └── TestBmtaggerDbBuild.srprism.ssd
│ ├── TestDepleteBlastnBam/
│ │ ├── expected.sam
│ │ ├── humanChr1Subset.fa
│ │ ├── humanChr9Subset.fa
│ │ └── in.bam
│ ├── TestDepleteHuman/
│ │ ├── aligned-expected/
│ │ │ ├── test-reads.blastn.bam
│ │ │ ├── test-reads.bmtagger.bam
│ │ │ ├── test-reads.bwa.bam
│ │ │ ├── test-reads.revert.bam
│ │ │ ├── test-reads.rmdup.bam
│ │ │ └── test-reads.taxfilt.imperfect.bam
│ │ ├── expected/
│ │ │ ├── test-reads.blastn.bam
│ │ │ ├── test-reads.bmtagger.bam
│ │ │ ├── test-reads.bwa.bam
│ │ │ ├── test-reads.revert.bam
│ │ │ ├── test-reads.rmdup.bam
│ │ │ ├── test-reads.taxfilt.bam
│ │ │ ├── test-reads.taxfilt.imperfect-2.bam
│ │ │ └── test-reads.taxfilt.imperfect.bam
│ │ ├── partial_pan-viral-9seqs-with-human-random-subset.fasta
│ │ ├── test-reads-aligned.bam
│ │ ├── test-reads-human.bam
│ │ └── test-reads.bam
│ ├── TestDifficultSampleNames/
│ │ ├── RunInfo.xml
│ │ ├── SampleSheet-inline-commas-strings.csv
│ │ └── SampleSheet.csv
│ ├── TestFastaFetch/
│ │ ├── JQ610675.1.fa
│ │ ├── JQ610675.1.fasta
│ │ ├── JQ610676.1.fa
│ │ ├── JQ610676.1.fasta
│ │ ├── JQ610677.1.fa
│ │ ├── JQ610677.1.fasta
│ │ ├── JQ610678.1.fa
│ │ ├── JQ610678.1.fasta
│ │ ├── JQ610679.1.fa
│ │ ├── JQ610679.1.fasta
│ │ ├── JQ610680.1.fa
│ │ ├── JQ610680.1.fasta
│ │ ├── JQ610681.1.fa
│ │ ├── JQ610681.1.fasta
│ │ ├── JQ610682.1.fa
│ │ ├── JQ610682.1.fasta
│ │ ├── JQ610683.1.fa
│ │ ├── JQ610683.1.fasta
│ │ ├── JQ610684.1.fa
│ │ ├── JQ610684.1.fasta
│ │ ├── orungo.fa
│ │ └── orungo.fasta
│ ├── TestFastqBam/
│ │ ├── expected.fastq1
│ │ ├── expected.java1_7.sam
│ │ ├── expected.java1_8.sam
│ │ ├── expected.java1_8_v1.5.sam
│ │ ├── in1.fastq
│ │ ├── in2.fastq
│ │ └── inHeader.txt
│ ├── TestFeatureReader/
│ │ ├── GU481072.1.tbl
│ │ ├── GU481073.1.tbl
│ │ ├── KM821772.1.tbl
│ │ ├── KM821773.1.tbl
│ │ ├── LC889323.1.tbl
│ │ ├── NC_026438.1.tbl
│ │ ├── test1-S.tbl
│ │ └── test2-L.tbl
│ ├── TestFeatureTableFetch/
│ │ ├── JQ610675.1.table
│ │ ├── JQ610675.1.tbl
│ │ ├── JQ610676.1.table
│ │ ├── JQ610676.1.tbl
│ │ ├── JQ610677.1.table
│ │ ├── JQ610677.1.tbl
│ │ ├── JQ610678.1.table
│ │ ├── JQ610678.1.tbl
│ │ ├── JQ610679.1.table
│ │ ├── JQ610679.1.tbl
│ │ ├── JQ610680.1.table
│ │ ├── JQ610680.1.tbl
│ │ ├── JQ610681.1.table
│ │ ├── JQ610681.1.tbl
│ │ ├── JQ610682.1.table
│ │ ├── JQ610682.1.tbl
│ │ ├── JQ610683.1.table
│ │ ├── JQ610683.1.tbl
│ │ ├── JQ610684.1.table
│ │ ├── JQ610684.1.tbl
│ │ ├── orungo.table
│ │ └── orungo.tbl
│ ├── TestFeatureTransfer/
│ │ ├── adenovirus_truncated/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── internal_partials/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── lasv/
│ │ │ ├── expected/
│ │ │ │ ├── LASV_NGA_2018_0026-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0026-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0097-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0097-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0541-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0541-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0611-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0611-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0664-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0664-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0959-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0959-2.tbl
│ │ │ │ ├── LASV_NGA_2018_0998-1.tbl
│ │ │ │ ├── LASV_NGA_2018_0998-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1024-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1024-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1079-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1079-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1177-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1177-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1375-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1375-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1381-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1381-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1392-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1392-2.tbl
│ │ │ │ ├── LASV_NGA_2018_1643-1.tbl
│ │ │ │ ├── LASV_NGA_2018_1643-2.tbl
│ │ │ │ └── test.tbl
│ │ │ └── input/
│ │ │ ├── KM821997.1.tbl
│ │ │ ├── KM821998.1.tbl
│ │ │ ├── align_mafft-ref-lasv-ISTH2376_1.fasta
│ │ │ ├── align_mafft-ref-lasv-ISTH2376_2.fasta
│ │ │ └── ref-lasv-ISTH2376.fasta
│ │ ├── negative_strand_partial/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── synthetic/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ ├── synthetic_ignore_ambig_edges/
│ │ │ ├── expected/
│ │ │ │ └── mapped.tbl
│ │ │ └── input/
│ │ │ ├── aligned_1.fasta
│ │ │ ├── ref.fasta
│ │ │ └── ref.tbl
│ │ └── synthetic_oob_clip/
│ │ ├── expected/
│ │ │ └── mapped.tbl
│ │ └── input/
│ │ ├── aligned_1.fasta
│ │ ├── ref.fasta
│ │ └── ref.tbl
│ ├── TestFilterLastal/
│ │ ├── expected.fastq
│ │ └── in.fastq
│ ├── TestGap2Seq/
│ │ └── expected.ebov.doublehit.gapfill.fasta
│ ├── TestGenbankRecordFetch/
│ │ ├── JQ610675.1.gb
│ │ ├── JQ610675.1.gbk
│ │ ├── JQ610676.1.gb
│ │ ├── JQ610676.1.gbk
│ │ ├── JQ610677.1.gb
│ │ ├── JQ610677.1.gbk
│ │ ├── JQ610678.1.gb
│ │ ├── JQ610678.1.gbk
│ │ ├── JQ610679.1.gb
│ │ ├── JQ610679.1.gbk
│ │ ├── JQ610680.1.gb
│ │ ├── JQ610680.1.gbk
│ │ ├── JQ610681.1.gb
│ │ ├── JQ610681.1.gbk
│ │ ├── JQ610682.1.gb
│ │ ├── JQ610682.1.gbk
│ │ ├── JQ610683.1.gb
│ │ ├── JQ610683.1.gbk
│ │ ├── JQ610684.1.gb
│ │ ├── JQ610684.1.gbk
│ │ ├── orungo.gb
│ │ └── orungo.gbk
│ ├── TestIlluminaBarcodeHelper/
│ │ ├── ambiguous/
│ │ │ ├── barcodes.txt
│ │ │ ├── expected.txt
│ │ │ └── metrics.txt
│ │ ├── few_assigned/
│ │ │ ├── barcodes.txt
│ │ │ └── metrics.txt
│ │ ├── one_correction/
│ │ │ ├── barcodes.txt
│ │ │ ├── expected.txt
│ │ │ └── metrics.txt
│ │ ├── single_index/
│ │ │ ├── barcodes.txt
│ │ │ ├── expected.txt
│ │ │ └── metrics.txt
│ │ └── single_index_i5_only/
│ │ ├── barcodes.txt
│ │ ├── expected.txt
│ │ └── metrics.txt
│ ├── TestIlluminaDir/
│ │ ├── bcl-indented.tgz
│ │ ├── bcl-plain.tar.bz2
│ │ ├── bcl-plain.tar.lz4
│ │ ├── bcl-plain.tgz
│ │ └── empty_dir/
│ │ └── Data/
│ │ └── Intensities/
│ │ └── BaseCalls/
│ │ └── README
│ ├── TestImputeFromReference/
│ │ ├── contigs.sub.ebov.fasta
│ │ ├── expected.hhv3.mummer.fasta
│ │ ├── expected.hhv3.muscle.fasta
│ │ ├── expected.sub.ebov.impute.fasta
│ │ ├── ref.sub.ebov.fasta
│ │ └── test.pseudo.fasta
│ ├── TestKMA/
│ │ └── ref.fasta
│ ├── TestKbPython/
│ │ ├── palmdb.corona.idx
│ │ ├── palmdb_clustered_t2g.txt
│ │ └── palmdb_rdrp_seqs.corona.fa
│ ├── TestKmers/
│ │ ├── ambig_bases.fasta
│ │ ├── filt.fasta
│ │ ├── palindromic_kmers.fasta
│ │ ├── simple.fasta
│ │ ├── simple.fasta.kmers.k4.txt
│ │ └── tcgaattt.fasta
│ ├── TestLastalDbBuild/
│ │ └── expected/
│ │ ├── TestLastalDbBuild.bck
│ │ ├── TestLastalDbBuild.des
│ │ ├── TestLastalDbBuild.prj
│ │ ├── TestLastalDbBuild.sds
│ │ ├── TestLastalDbBuild.ssp
│ │ ├── TestLastalDbBuild.suf.md5
│ │ └── TestLastalDbBuild.tis
│ ├── TestManualSnpCaller/
│ │ ├── indel.vcf.gz.tbi
│ │ └── output.fasta
│ ├── TestMetagenomicsSimple/
│ │ ├── db/
│ │ │ ├── library/
│ │ │ │ ├── Viruses/
│ │ │ │ │ ├── Bundibugyo_ebolavirus/
│ │ │ │ │ │ ├── GCF_000889155.1_ViralProj51245_genomic.fna
│ │ │ │ │ │ └── GCF_000889155.1_ViralProj51245_protein.faa
│ │ │ │ │ ├── Reston_ebolavirus/
│ │ │ │ │ │ ├── GCF_000854085.1_ViralProj15006_genomic.fna
│ │ │ │ │ │ └── GCF_000854085.1_ViralProj15006_protein.faa
│ │ │ │ │ ├── Sudan_ebolavirus/
│ │ │ │ │ │ ├── GCF_000855585.1_ViralProj15012_genomic.fna
│ │ │ │ │ │ └── GCF_000855585.1_ViralProj15012_protein.faa
│ │ │ │ │ ├── Tai_Forest_ebolavirus/
│ │ │ │ │ │ ├── GCF_000888475.1_ViralProj51257_genomic.fna
│ │ │ │ │ │ └── GCF_000888475.1_ViralProj51257_protein.faa
│ │ │ │ │ └── Zaire_ebolavirus/
│ │ │ │ │ ├── GCF_000848505.1_ViralProj14703_genomic.fna
│ │ │ │ │ └── GCF_000848505.1_ViralProj14703_protein.faa
│ │ │ │ └── prelim_map.txt
│ │ │ └── taxonomy/
│ │ │ ├── accession2taxid/
│ │ │ │ ├── nucl_est.accession2taxid
│ │ │ │ ├── nucl_gb.accession2taxid
│ │ │ │ ├── nucl_gss.accession2taxid
│ │ │ │ ├── nucl_wgs.accession2taxid
│ │ │ │ ├── pdb.accession2taxid
│ │ │ │ └── prot.accession2taxid
│ │ │ ├── delnodes.dmp
│ │ │ ├── gi_taxid_nucl.dmp
│ │ │ ├── gi_taxid_prot.dmp
│ │ │ ├── merged.dmp
│ │ │ ├── names.dmp
│ │ │ └── nodes.dmp
│ │ ├── test-reads.bam
│ │ ├── zaire_ebola.1.fastq
│ │ ├── zaire_ebola.2.fastq
│ │ └── zaire_ebola.bam
│ ├── TestMetagenomicsViralMix/
│ │ ├── db/
│ │ │ ├── library/
│ │ │ │ ├── Viruses/
│ │ │ │ │ ├── Enterovirus_C/
│ │ │ │ │ │ ├── GCF_000861165.1_ViralProj15288_genomic.fna
│ │ │ │ │ │ └── GCF_000861165.1_ViralProj15288_protein.faa
│ │ │ │ │ ├── Hepatitis_C_virus/
│ │ │ │ │ │ ├── GCF_000861845.1_ViralProj15432_genomic.fna
│ │ │ │ │ │ └── GCF_000861845.1_ViralProj15432_protein.faa
│ │ │ │ │ ├── Tomato_mosaic_virus/
│ │ │ │ │ │ ├── GCF_000853705.1_ViralProj14926_genomic.fna
│ │ │ │ │ │ └── GCF_000853705.1_ViralProj14926_protein.faa
│ │ │ │ │ └── partial_pan-viral-9seqs-with-human-random-subset.fna
│ │ │ │ └── prelim_map.txt
│ │ │ └── taxonomy/
│ │ │ ├── accession2taxid/
│ │ │ │ ├── nucl_est.accession2taxid
│ │ │ │ ├── nucl_gb.accession2taxid
│ │ │ │ ├── nucl_gss.accession2taxid
│ │ │ │ ├── nucl_wgs.accession2taxid
│ │ │ │ ├── pdb.accession2taxid
│ │ │ │ └── prot.accession2taxid
│ │ │ ├── delnodes.dmp
│ │ │ ├── gi_taxid_nucl.dmp
│ │ │ ├── gi_taxid_prot.dmp
│ │ │ ├── merged.dmp
│ │ │ ├── names.dmp
│ │ │ └── nodes.dmp
│ │ └── test-reads.bam
│ ├── TestMinimap2Idxstats/
│ │ ├── multi-viral-reads.bam
│ │ └── multi-viral-refs.fasta
│ ├── TestMiseqToBam/
│ │ ├── RunInfo.xml
│ │ └── SampleSheet.csv
│ ├── TestMvicuna/
│ │ ├── expected_pairedOut.1.fastq
│ │ ├── expected_pairedOut.2.fastq
│ │ ├── expected_unpairedOut.fastq
│ │ ├── in.1.fastq
│ │ └── in.2.fastq
│ ├── TestOrderAndOrient/
│ │ ├── contig.mummer3_fail_lasv.fasta
│ │ ├── contigs.ebov.doublehit.fasta
│ │ ├── contigs.ebov.fasta
│ │ ├── contigs.hhv3.fasta
│ │ ├── contigs.hiv.big_indel.fasta
│ │ ├── contigs.hiv.wrapped.fasta
│ │ ├── contigs.influenza.fasta
│ │ ├── contigs.lasv.fasta
│ │ ├── contigs.lasv.one_small.fasta
│ │ ├── expected.ebov.ambig.fasta
│ │ ├── expected.ebov.doublehit.fasta
│ │ ├── expected.ebov.small.fasta
│ │ ├── expected.hhv3.fasta
│ │ ├── expected.hiv.big_indel.alternates.fasta
│ │ ├── expected.hiv.big_indel.fasta
│ │ ├── expected.hiv.wrapped.fasta
│ │ ├── expected.influenza.fasta
│ │ ├── expected.lasv.ambig.fasta
│ │ ├── expected.lasv.fasta
│ │ ├── expected.lasv.promer.fasta
│ │ ├── expected.refsel.ebov.stats.tsv
│ │ ├── expected.refsel.lasv.stats.tsv
│ │ ├── ref.ebov.gin.fasta
│ │ ├── ref.ebov.lbr.fasta
│ │ ├── ref.ebov.makona_C15.fasta
│ │ ├── ref.ebov.sle.fasta
│ │ ├── ref.ebov.small.fasta
│ │ ├── ref.hhv3.fasta
│ │ ├── ref.hiv.fasta
│ │ ├── ref.influenza.fasta
│ │ ├── ref.lasv.BNI_Nig08_A19.fasta
│ │ ├── ref.lasv.ISTH2376.fasta
│ │ ├── ref.lasv.KGH_G502.fasta
│ │ ├── ref.lasv.nomatch.fasta
│ │ ├── ref.lasv.pinneo.fasta
│ │ └── refs.ebov.fasta
│ ├── TestOrderOrientAndImputeFromReference/
│ │ ├── contigs.influenza.fasta
│ │ ├── expected.influenza.impute.mafft.fasta
│ │ ├── expected.influenza.impute.mummer.fasta
│ │ ├── expected.influenza.impute.muscle.fasta
│ │ └── ref.influenza_partial.fasta
│ ├── TestPerSample/
│ │ ├── in.2libs.bam
│ │ ├── in.2libs3rgs.bam
│ │ ├── in.3libs.bam
│ │ ├── in.bam
│ │ ├── in.indels.bam
│ │ ├── in.oneunmapped.bam
│ │ ├── ref.fasta
│ │ ├── ref.fasta.fai
│ │ ├── ref.indels.fasta
│ │ ├── ref.indels.fasta.fai
│ │ ├── vphaser_one_sample_2libs_expected.txt
│ │ ├── vphaser_one_sample_3libs_expected.txt
│ │ ├── vphaser_one_sample_expected.txt
│ │ └── vphaser_one_sample_indels_expected.txt
│ ├── TestPurgeUnmated/
│ │ ├── expected1.fastq
│ │ ├── expected2.fastq
│ │ ├── in1.fastq
│ │ ├── in2.fastq
│ │ ├── in_sra1.fastq
│ │ └── in_sra2.fastq
│ ├── TestRefineAssembly/
│ │ ├── expected.ebov.refine1.fasta
│ │ ├── expected.ebov.refine1.freebayes.fasta
│ │ ├── expected.ebov.refine1.new.fasta
│ │ ├── expected.ebov.refine2.fasta
│ │ ├── expected.ebov.refine2.freebayes.fasta
│ │ └── impute.ebov.fasta
│ ├── TestRmdupUnaligned/
│ │ ├── expected.bam
│ │ └── input.bam
│ ├── TestRunInfo/
│ │ ├── RunInfo-hiseq.xml
│ │ ├── RunInfo-miseq.xml
│ │ ├── RunInfo-nextseq-1000-2000-p1.xml
│ │ ├── RunInfo-nextseq550.xml
│ │ ├── RunInfo-novaseq-x-plus.xml
│ │ ├── RunInfo-novaseq.xml
│ │ ├── RunInfo-novel-fcid-and-tilecount.xml
│ │ ├── RunInfo-novel-fcid.xml
│ │ └── RunInfo-novel-tile-count.xml
│ ├── TestSampleSheet/
│ │ ├── SampleSheet-AEHWY-subset.csv
│ │ ├── SampleSheet-custom-1.txt
│ │ ├── SampleSheet-custom-1_macos9-endings.txt
│ │ ├── SampleSheet-custom-1_win-endings.txt
│ │ ├── SampleSheet-custom-2.txt
│ │ ├── SampleSheet-custom-2_win-endings.tsv
│ │ ├── SampleSheet-custom-inner-barcodes-outer-collapse.tsv
│ │ ├── SampleSheet-hiseq-1.csv
│ │ ├── SampleSheet-in-Broad-MiSeq-Format_with_Picard_Block.csv
│ │ ├── SampleSheet-miseq-1.csv
│ │ ├── SampleSheet-submit-1.csv
│ │ ├── SampleSheet-submit-2.csv
│ │ ├── SampleSheet-submit-3.csv
│ │ └── SampleSheet-with-blanklines.csv
│ ├── TestSkaniReferenceSelection/
│ │ ├── RVA_DQ473496.1_Rhinovirus_A49.fa
│ │ ├── RVA_DQ473498.1_Rhinovirus_A10.fa
│ │ ├── RVA_DQ473499.1_Human_rhinovirus_A44.fa
│ │ ├── RVA_DQ473501.1_Rhinovirus_A34.fa
│ │ ├── RVA_DQ473507.1_Rhinovirus_A53.fa
│ │ ├── RVA_FJ445116.1_Human_rhinovirus_13_strain_ATCC_VR-1123.fa
│ │ ├── RVA_FJ445140.1_Human_rhinovirus_56_strain_ATCC_VR-1166.fa
│ │ ├── RVA_FJ445177.1_Human_rhinovirus_9_strain_ATCC_VR-489.fa
│ │ ├── RVA_GQ223229.1_Human_rhinovirus_A_isolate_N13.fa
│ │ ├── RVA_L24917.1_Human_rhinovirus_type_16_polyprotein_gene.fa
│ │ └── USA-MA-Broad_BWH-19947-2023.l000013249603_C5.HTKJ7DRX3.1.acellular.dedup.assembly1-spades.fasta
│ ├── TestSnpEff/
│ │ ├── RBV16.fasta
│ │ ├── ann_eff.vcf.gz.tbi
│ │ ├── merged.vcf.gz.tbi
│ │ ├── msa.fasta
│ │ └── ref-rabies-JQ685920.fasta
│ ├── TestSplitReads/
│ │ ├── expected.fasta.01
│ │ ├── expected.fasta.02
│ │ ├── expected.fastq.01
│ │ ├── expected.fastq.02
│ │ ├── expected.fastq.03
│ │ ├── expected.fastq.1
│ │ ├── expected.fastq.2
│ │ ├── in.fasta
│ │ └── in.fastq
│ ├── TestSplitcodeDemuxFastqs/
│ │ ├── RunInfo.xml
│ │ ├── SampleSheet.csv
│ │ ├── samples_3bc.tsv
│ │ ├── samples_3bc_i5_rc_with_n.tsv
│ │ └── samples_3bc_i5_revcomp.tsv
│ ├── TestSplitcodeDemuxIntegration/
│ │ ├── RunInfo.xml
│ │ └── SampleSheet.tsv
│ ├── TestSplitcodeLookupTable/
│ │ ├── AAAAAAAA-TTTTTTTT.lLibA_summary.json
│ │ ├── ATCGATCG-GCTAGCTA.lB1_summary.json
│ │ ├── ATCGATCG-GCTAGCTA.lL1_summary.json
│ │ ├── GGGGGGGG-CCCCCCCC.lLibB_summary.json
│ │ ├── TTTTAAAA-CCCCGGGG.lB2_summary.json
│ │ ├── sample_sheet_basic.tsv
│ │ ├── sample_sheet_multi_pool.tsv
│ │ ├── sample_sheet_unique_lib_ids.tsv
│ │ └── sample_sheet_zero_reads.tsv
│ ├── TestTarballMerger/
│ │ ├── mixed-compressed-input/
│ │ │ ├── file2.tar.lz4
│ │ │ ├── file3.tar.zst
│ │ │ └── file4.tar.bz2
│ │ └── raw-input/
│ │ ├── file1
│ │ ├── file2
│ │ ├── file3
│ │ └── file4
│ ├── TestTaxonomy/
│ │ └── simple.m8
│ ├── TestToolKrakenExecute/
│ │ ├── empty-report.txt
│ │ ├── expected-kraken-mix.reads.txt
│ │ └── expected-kraken-mix.report.txt
│ ├── TestToolNovoalign/
│ │ └── ebov_reads.bam
│ ├── TestToolPicard/
│ │ ├── in.dict
│ │ ├── in.fasta
│ │ ├── messy-headers.fasta
│ │ └── simple.sam
│ ├── TestToolSamtools/
│ │ ├── in.fasta
│ │ ├── in.fasta.fai
│ │ ├── indel_cigar.sam
│ │ └── simple.sam
│ ├── TestTrimRmdupSubsamp/
│ │ └── clipDb.fasta
│ ├── TestTrimmomatic/
│ │ ├── clip.fasta
│ │ ├── empty.fastq
│ │ ├── expected1.fastq
│ │ ├── expected1.maxinfo.fastq
│ │ ├── expected2.fastq
│ │ ├── expected2.maxinfo.fastq
│ │ ├── in1.fastq
│ │ └── in2.fastq
│ ├── TestTsvJoin/
│ │ ├── expected-out.txt
│ │ ├── tab-1.txt
│ │ └── tab-2.txt
│ ├── TestUtilMisc/
│ │ ├── cfg1.yaml
│ │ ├── cfg2.yaml
│ │ ├── cfg_std.yaml
│ │ └── empty.yaml
│ ├── TestVPhaser2/
│ │ ├── expected.cpickle
│ │ ├── in.bam
│ │ └── in.bam.bai
│ ├── almost-empty-2.bam
│ ├── almost-empty.bam
│ ├── broken.bam
│ ├── ebola.fasta
│ ├── ebola.fasta.bz2
│ ├── ebola.fasta.lz4
│ ├── ebola.fasta.zst
│ ├── ebov-makona.fasta
│ ├── empty.bam
│ ├── empty.fasta
│ ├── one_gene.vcf.gz.tbi
│ ├── ref.lasv.fasta
│ └── s3/
│ └── sabeti-public-dbs/
│ ├── blast/
│ │ ├── hybsel_probe_adapters.fasta
│ │ ├── hybsel_probe_adapters.nhr
│ │ ├── hybsel_probe_adapters.nin
│ │ ├── hybsel_probe_adapters.nsq
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.dict
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.fasta.fai
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.nhr
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.nin
│ │ ├── metag_v3.ncRNA.mRNA.mitRNA.consensus.nix
│ │ └── metag_v3.ncRNA.mRNA.mitRNA.consensus.nsq
│ ├── bmtagger/
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.bitmask
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.amp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.idx
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.imp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.pmp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.rmp
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.ss
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.ssa
│ │ ├── GRCh37.68_ncRNA-GRCh37.68_transcripts-HS_rRNA_mitRNA.srprism.ssd
│ │ ├── hg19.bitmask
│ │ ├── hg19.srprism.amp
│ │ ├── hg19.srprism.idx
│ │ ├── hg19.srprism.imp
│ │ ├── hg19.srprism.pmp
│ │ ├── hg19.srprism.rmp
│ │ ├── hg19.srprism.ss
│ │ ├── hg19.srprism.ssa
│ │ ├── hg19.srprism.ssd
│ │ ├── metagenomics_contaminants_v3.bitmask
│ │ ├── metagenomics_contaminants_v3.readme.txt
│ │ ├── metagenomics_contaminants_v3.srprism.amp
│ │ ├── metagenomics_contaminants_v3.srprism.idx
│ │ ├── metagenomics_contaminants_v3.srprism.imp
│ │ ├── metagenomics_contaminants_v3.srprism.pmp
│ │ ├── metagenomics_contaminants_v3.srprism.rmp
│ │ ├── metagenomics_contaminants_v3.srprism.ss
│ │ ├── metagenomics_contaminants_v3.srprism.ssa
│ │ └── metagenomics_contaminants_v3.srprism.ssd
│ ├── bwa/
│ │ ├── hg19.amb
│ │ ├── hg19.ann
│ │ ├── hg19.bwt
│ │ ├── hg19.pac
│ │ └── hg19.sa
│ ├── kaiju/
│ │ └── nr/
│ │ └── nr.fmi
│ ├── krakenuniq/
│ │ ├── database.idx
│ │ ├── database.jdb
│ │ ├── database.kdb
│ │ └── lca.complete
│ ├── krona/
│ │ └── taxonomy.tab
│ ├── rna_bwa/
│ │ ├── human_viral_rrna.amb
│ │ ├── human_viral_rrna.ann
│ │ ├── human_viral_rrna.bwt
│ │ ├── human_viral_rrna.pac
│ │ └── human_viral_rrna.sa
│ ├── spikeins/
│ │ └── ercc_spike-ins.fasta
│ ├── taxonomy/
│ │ ├── merged.dmp
│ │ ├── names.dmp
│ │ └── nodes.dmp
│ └── trim_clip/
│ └── contaminants.fasta
└── unit/
├── assemble/
│ ├── test_assembly.py
│ ├── test_assembly_integration.py
│ └── test_util_vcf.py
├── classify/
│ ├── __init__.py
│ ├── fixtures.py
│ ├── test_integration_kb.py
│ ├── test_integration_kraken2.py
│ ├── test_integration_taxon_filter.py
│ ├── test_kmer_utils.py
│ ├── test_metagenomics.py
│ ├── test_taxon_filter.py
│ ├── test_taxonomy.py
│ ├── test_tools_kb_python.py
│ ├── test_tools_kma.py
│ └── test_tools_krona.py
├── core/
│ ├── test_conftest.py
│ ├── test_file_utils.py
│ ├── test_illumina.py
│ ├── test_read_utils.py
│ ├── test_tools.py
│ ├── test_tools_bbmap.py
│ ├── test_tools_bwa.py
│ ├── test_tools_fastqc.py
│ ├── test_tools_minimap2.py
│ ├── test_tools_novoalign.py
│ ├── test_tools_picard.py
│ ├── test_tools_sambamba.py
│ ├── test_tools_samtools.py
│ ├── test_tools_splitcode.py
│ ├── test_tools_trimmomatic.py
│ ├── test_util_file.py
│ └── test_util_misc.py
└── phylo/
├── __init__.py
├── test_interhost.py
├── test_intrahost.py
├── test_ncbi.py
├── test_tools.py
├── test_tools_vphaser2.py
└── test_util_vcf.py
SYMBOL INDEX (2098 symbols across 106 files)
FILE: .agents/skills/regression-testing/compare_sample_pair.py
function gcloud_cat (line 29) | def gcloud_cat(gcs_uri):
function gcloud_cp (line 40) | def gcloud_cp(gcs_uri, local_path):
function parse_tsv (line 50) | def parse_tsv(content):
function parse_fasta (line 80) | def parse_fasta(content):
function run_mafft_pair (line 101) | def run_mafft_pair(old_seq, new_seq, work_dir, pair_id='0'):
function align_and_analyze_fastas (line 123) | def align_and_analyze_fastas(old_fasta_path, new_fasta_path, work_dir):
function analyze_alignment_seqs (line 210) | def analyze_alignment_seqs(old_seq_str, new_seq_str):
function analyze_alignment (line 346) | def analyze_alignment(aligned_fasta_path):
function compare_assembly (line 355) | def compare_assembly(assembly_id, old_row, new_row, work_dir):
function main (line 442) | def main():
FILE: .agents/skills/regression-testing/discover_pairs.py
function gcloud_ls (line 25) | def gcloud_ls(path):
function find_tsv_in_call_dir (line 41) | def find_tsv_in_call_dir(call_dir_uri):
function discover_submission_tsvs (line 73) | def discover_submission_tsvs(bucket, submission_id):
function main (line 103) | def main():
FILE: .agents/skills/regression-testing/generate_report.py
function get_deps (line 16) | def get_deps():
function load_results (line 24) | def load_results(results_dir):
function build_comparison_table (line 36) | def build_comparison_table(results, workspace_name):
function build_sample_summary (line 90) | def build_sample_summary(results):
function generate_plots (line 106) | def generate_plots(df, plot_dir):
function generate_markdown_report (line 267) | def generate_markdown_report(df, sample_df, workspace_name, report_dir, ...
function main (line 407) | def main():
FILE: docker/scripts/calc_mem.py
function available_cpu_count (line 34) | def available_cpu_count():
function mem_from_proc_meminfo (line 93) | def mem_from_proc_meminfo():
function mem_from_cgroups (line 104) | def mem_from_cgroups():
function mem_from_psutil (line 124) | def mem_from_psutil(metric_name="total"):
FILE: docs/conf.py
function _git_version (line 44) | def _git_version():
FILE: src/viral_ngs/assemble/freebayes.py
class FreeBayesTool (line 23) | class FreeBayesTool(Tool):
method __init__ (line 25) | def __init__(self):
method call (line 29) | def call(self, inBam, refFasta, outVcf, options=None):
FILE: src/viral_ngs/assemble/gap2seq.py
class Gap2SeqTool (line 28) | class Gap2SeqTool(viral_ngs.core.Tool):
method __init__ (line 31) | def __init__(self, install_methods=None):
method version (line 36) | def version(self):
method execute (line 39) | def execute(self, args): # pylint: disable=W0221
method _run_gap2seq (line 44) | def _run_gap2seq(self, reads, scaffolds, filled, *args, **kwargs):
method gapfill (line 59) | def gapfill(self, in_scaffold, in_bam, out_scaffold, solid_kmer_thresh...
FILE: src/viral_ngs/assemble/mafft.py
class MafftTool (line 23) | class MafftTool(viral_ngs.core.Tool):
method __init__ (line 25) | def __init__(self, install_methods=None):
method version (line 30) | def version(self):
method _get_tool_version (line 35) | def _get_tool_version(self):
method __seqIdsAreAllUnique (line 38) | def __seqIdsAreAllUnique(self, filePath, inputFormat="fasta"):
method execute (line 53) | def execute(
FILE: src/viral_ngs/assemble/mummer.py
class MummerTool (line 22) | class MummerTool(viral_ngs.core.Tool):
method __init__ (line 24) | def __init__(self, install_methods=None):
method version (line 29) | def version(self):
method _get_tool_version (line 34) | def _get_tool_version(self):
method executable_path (line 37) | def executable_path(self):
method execute (line 44) | def execute(self, refFasta, qryFastas):
method nucmer (line 50) | def nucmer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=N...
method promer (line 75) | def promer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=N...
method delta_filter (line 100) | def delta_filter(self, inDelta, outDelta):
method show_tiling (line 107) | def show_tiling(self, inDelta, outTiling, outFasta=None,
method trim_contigs (line 136) | def trim_contigs(self, refFasta, contigsFasta, outFasta,
method scaffold_contigs (line 182) | def scaffold_contigs(self, refFasta, contigsFasta, outFasta,
method scaffold_contigs_custom (line 210) | def scaffold_contigs_custom(self, refFasta, contigsFasta, outFasta,
method align_one_to_one (line 341) | def align_one_to_one(self, refFasta, otherFasta, outFasta):
function contig_chooser (line 378) | def contig_chooser(alt_seqs, ref_len, coords_debug=""):
class AmbiguousAlignmentException (line 454) | class AmbiguousAlignmentException(Exception):
class AlignsReader (line 457) | class AlignsReader(object):
method __init__ (line 460) | def __init__(self, aligns_file, ref_fasta=None):
method _load_align (line 469) | def _load_align(self):
method _load_fastas (line 525) | def _load_fastas(self):
method get_alignments (line 529) | def get_alignments(self):
method get_intervals (line 533) | def get_intervals(self):
method _dummy_row (line 554) | def _dummy_row(self, start, stop, filler='N'):
method get_ref_seq (line 559) | def get_ref_seq(self, start, stop):
method retrieve_alts_by_ref (line 566) | def retrieve_alts_by_ref(self, start, stop, aln_start=None, aln_stop=N...
method _aln_to_alt_seq (line 597) | def _aln_to_alt_seq(self, aln, start, stop):
FILE: src/viral_ngs/assemble/muscle.py
class MuscleTool (line 22) | class MuscleTool(viral_ngs.core.Tool):
method __init__ (line 24) | def __init__(self, install_methods=None):
method version (line 29) | def version(self):
method _get_tool_version (line 34) | def _get_tool_version(self):
method execute (line 38) | def execute(
FILE: src/viral_ngs/assemble/rasusa.py
class RasusaTool (line 19) | class RasusaTool(Tool):
method __init__ (line 21) | def __init__(self):
method downsample_bam (line 25) | def downsample_bam(self, inBam, outBam, coverage, seed=None):
FILE: src/viral_ngs/assemble/skani.py
class UndirectedGraph (line 31) | class UndirectedGraph:
method __init__ (line 34) | def __init__(self):
method add_node (line 37) | def add_node(self, node):
method add_edge (line 40) | def add_edge(self, node1, node2):
method _dfs (line 44) | def _dfs(self, node, visited):
method get_clusters (line 53) | def get_clusters(self):
class SkaniTool (line 62) | class SkaniTool(viral_ngs.core.Tool):
method __init__ (line 64) | def __init__(self, install_methods=None):
method version (line 69) | def version(self):
method _get_tool_version (line 74) | def _get_tool_version(self):
method _is_fasta_basically_empty (line 77) | def _is_fasta_basically_empty(self, inFasta, min_length=500):
method _sort_skani_table_by_product (line 87) | def _sort_skani_table_by_product(self, in_tsv, out_tsv):
method execute (line 106) | def execute(self, subcommand, args, outfile, threads=None):
method triangle (line 120) | def triangle(self, ref_fastas, outfile_ani, other_args = (), threads=N...
method dist (line 125) | def dist(self, query_fasta, ref_fastas, outfile, other_args = (), thre...
method find_reference_clusters (line 136) | def find_reference_clusters(self, ref_fastas,
method find_closest_reference (line 159) | def find_closest_reference(self, contigs_fasta, ref_fastas, out_file,
FILE: src/viral_ngs/assemble/spades.py
class SpadesTool (line 26) | class SpadesTool(viral_ngs.core.Tool):
method __init__ (line 29) | def __init__(self, install_methods=None):
method version (line 34) | def version(self):
method _get_tool_version (line 39) | def _get_tool_version(self):
method execute (line 42) | def execute(self, args): # pylint: disable=W0221
method assemble (line 47) | def assemble(self, reads_fwd, reads_bwd, contigs_out, reads_unpaired=N...
FILE: src/viral_ngs/assemble/vcf.py
function make_intervals (line 17) | def make_intervals(i, n, fasta, chr_prefix='', verbose=False):
function sliding_windows (line 58) | def sliding_windows(fasta, width, offset, chr_prefix=''):
class GenomePosition (line 74) | class GenomePosition(object):
method __init__ (line 80) | def __init__(self, seqDb):
method get_gpos (line 92) | def get_gpos(self, c, p):
method get_chr_pos (line 98) | def get_chr_pos(self, gpos):
function parse_contig_header (line 109) | def parse_contig_header(line):
function get_chrlens (line 127) | def get_chrlens(inFile):
function calc_maf (line 162) | def calc_maf(genos, ancestral=None, ploidy=1):
class TabixReader (line 200) | class TabixReader(pysam.TabixFile):
method __init__ (line 209) | def __init__(self, inFile, parser=pysam.asTuple()):
method __enter__ (line 225) | def __enter__(self):
method __exit__ (line 228) | def __exit__(self, exc_type, exc_val, exc_tb):
method chroms (line 236) | def chroms(self):
method get (line 239) | def get(self, chrom=None, start=None, stop=None, region=None):
function get_pos_from_vcf_record (line 245) | def get_pos_from_vcf_record(vcfrec):
function bytes_to_string (line 250) | def bytes_to_string(o):
class VcfReader (line 256) | class VcfReader(TabixReader):
method __init__ (line 264) | def __init__(self, inFile, ploidy=1, parser=pysam.asVCF()):
method samples (line 285) | def samples(self):
method chrlens (line 288) | def chrlens(self):
method get_positions (line 291) | def get_positions(self, c=None, start=None, stop=None, region=None):
method get_range (line 296) | def get_range(self, c=None, start=None, stop=None, region=None, as_str...
method get_snp_genos (line 328) | def get_snp_genos(self, c, p, as_strings=True):
method getFullSequences (line 337) | def getFullSequences(self, c, start, stop, samples,
function replaceAlleles (line 397) | def replaceAlleles(sample, seq, vcf_records):
FILE: src/viral_ngs/assemble/wgsim.py
class WgsimTool (line 25) | class WgsimTool(viral_ngs.core.Tool):
method __init__ (line 27) | def __init__(self, install_methods=None):
method version (line 32) | def version(self):
method _get_tool_version (line 37) | def _get_tool_version(self):
method slice_fasta (line 56) | def slice_fasta(self, in_fasta, out_fasta, seq_id=None, start=None, en...
method coverage_to_read_pairs (line 94) | def coverage_to_read_pairs(self, coverage, sequence_length, read_length):
method fastqs_to_bam (line 110) | def fastqs_to_bam(self, in_fastq1, in_fastq2, out_bam, sample_name='sa...
method execute (line 133) | def execute(self, in_fasta, out_fastq1, out_fastq2,
FILE: src/viral_ngs/assembly.py
class DenovoAssemblyError (line 55) | class DenovoAssemblyError(RuntimeError):
method __init__ (line 60) | def __init__(self, reason):
function assemble_spades (line 64) | def assemble_spades(
function parser_assemble_spades (line 108) | def parser_assemble_spades(parser=argparse.ArgumentParser()):
function gapfill_gap2seq (line 137) | def gapfill_gap2seq(in_scaffold, in_bam, out_scaffold, threads=None, mem...
function parser_gapfill_gap2seq (line 151) | def parser_gapfill_gap2seq(parser=argparse.ArgumentParser(description='C...
function cluster_references_ani (line 171) | def cluster_references_ani(inRefs, outClusters, m=15, s=50, c=10, min_af...
function parser_cluster_references_ani (line 180) | def parser_cluster_references_ani(parser=argparse.ArgumentParser(descrip...
function skani_contigs_to_refs (line 194) | def skani_contigs_to_refs(inContigs, inRefs, out_skani_dist, out_skani_d...
function parser_skani_contigs_to_refs (line 222) | def parser_skani_contigs_to_refs(parser=argparse.ArgumentParser(descript...
function _order_and_orient_orig (line 241) | def _order_and_orient_orig(inFasta, inReference, outFasta,
function _call_order_and_orient_orig (line 282) | def _call_order_and_orient_orig(inReference, outFasta, outAlternateConti...
function order_and_orient (line 285) | def order_and_orient(inFasta, inReference, outFasta,
function parser_order_and_orient (line 360) | def parser_order_and_orient(parser=argparse.ArgumentParser()):
class IncompleteAssemblyError (line 472) | class IncompleteAssemblyError(Exception):
method __init__ (line 473) | def __init__(self, actual_n, expected_n):
class PoorAssemblyError (line 479) | class PoorAssemblyError(Exception):
method __init__ (line 480) | def __init__(self, chr_idx, seq_len, non_n_count, min_length, segment_...
function impute_from_reference (line 488) | def impute_from_reference(
function parser_impute_from_reference (line 614) | def parser_impute_from_reference(parser=argparse.ArgumentParser()):
function refine_assembly (line 663) | def refine_assembly(
function parser_refine_assembly (line 801) | def parser_refine_assembly(parser=argparse.ArgumentParser()):
function normalize_coverage (line 892) | def normalize_coverage(inBam, outBam, max_coverage, seed=None, threads=N...
function parser_normalize_coverage (line 907) | def parser_normalize_coverage(parser=argparse.ArgumentParser()):
function parser_filter_short_seqs (line 925) | def parser_filter_short_seqs(parser=argparse.ArgumentParser()):
function main_filter_short_seqs (line 941) | def main_filter_short_seqs(args):
function parser_modify_contig (line 959) | def parser_modify_contig(parser=argparse.ArgumentParser()):
function main_modify_contig (line 1040) | def main_modify_contig(args):
class ContigModifier (line 1092) | class ContigModifier(object):
method __init__ (line 1098) | def __init__(self, ref, consensus):
method get_stripped_consensus (line 1105) | def get_stripped_consensus(self):
method call_reference_ns (line 1108) | def call_reference_ns(self):
method call_reference_ambiguous (line 1114) | def call_reference_ambiguous(self):
method trim_ends (line 1121) | def trim_ends(self):
method replace_end_gaps (line 1131) | def replace_end_gaps(self):
method replace_5ends (line 1140) | def replace_5ends(self, replace_length):
method replace_3ends (line 1154) | def replace_3ends(self, replace_length):
method remove_end_ns (line 1165) | def remove_end_ns(self):
class MutableSequence (line 1178) | class MutableSequence(object):
method __init__ (line 1180) | def __init__(self, name, start, stop, init_seq=None):
method modify (line 1194) | def modify(self, p, new_base):
method replace (line 1200) | def replace(self, start, stop, new_seq):
method __change__ (line 1205) | def __change__(self, start, stop, new_seq):
method replay_deletions (line 1224) | def replay_deletions(self):
method emit (line 1228) | def emit(self):
function alleles_to_ambiguity (line 1232) | def alleles_to_ambiguity(allelelist):
function vcfrow_parse_and_call_snps (line 1246) | def vcfrow_parse_and_call_snps(vcfrow, samples, min_dp=0, major_cutoff=0...
function vcf_to_seqs (line 1302) | def vcf_to_seqs(vcfIter, chrlens, samples, min_dp=0, major_cutoff=0.5, m...
function parser_vcf_to_fasta (line 1348) | def parser_vcf_to_fasta(parser=argparse.ArgumentParser()):
function main_vcf_to_fasta (line 1401) | def main_vcf_to_fasta(args):
function parser_trim_fasta (line 1449) | def parser_trim_fasta(parser=argparse.ArgumentParser()):
function trim_fasta (line 1457) | def trim_fasta(inFasta, outFasta):
function deambig_base (line 1473) | def deambig_base(base):
function deambig_fasta (line 1479) | def deambig_fasta(inFasta, outFasta):
function parser_deambig_fasta (line 1492) | def parser_deambig_fasta(parser=argparse.ArgumentParser()):
function vcf_dpdiff (line 1503) | def vcf_dpdiff(vcfs):
function parser_dpdiff (line 1521) | def parser_dpdiff(parser=argparse.ArgumentParser()):
function dpdiff (line 1529) | def dpdiff(inVcfs, outFile):
function alignment_summary (line 1546) | def alignment_summary(inFastaFileOne, inFastaFileTwo, outfileName=None, ...
function parser_alignment_summary (line 1653) | def parser_alignment_summary(parser=argparse.ArgumentParser()):
function simulate_illumina_reads (line 1664) | def simulate_illumina_reads(
function parser_simulate_illumina_reads (line 1798) | def parser_simulate_illumina_reads(parser=argparse.ArgumentParser()):
function full_parser (line 1823) | def full_parser():
function main (line 1827) | def main():
FILE: src/viral_ngs/broad_utils.py
function get_json_from_picard (line 25) | def get_json_from_picard(picardDir):
function get_run_date (line 35) | def get_run_date(jsonfile):
function get_bustard_dir (line 40) | def get_bustard_dir(jsonfile):
function parser_get_bustard_dir (line 46) | def parser_get_bustard_dir(parser=argparse.ArgumentParser()):
function main_get_bustard_dir (line 51) | def main_get_bustard_dir(args):
function parser_get_run_date (line 58) | def parser_get_run_date(parser=argparse.ArgumentParser()):
function main_get_run_date (line 63) | def main_get_run_date(args):
function iterate_wells (line 74) | def iterate_wells(runfile):
function get_all_samples (line 80) | def get_all_samples(runfile):
function get_all_libraries (line 84) | def get_all_libraries(runfile):
function get_run_id (line 89) | def get_run_id(well):
function get_all_runs (line 98) | def get_all_runs(runfile):
function parser_get_all_names (line 103) | def parser_get_all_names(parser=argparse.ArgumentParser()):
function main_get_all_names (line 109) | def main_get_all_names(args):
function full_parser (line 124) | def full_parser():
function main (line 128) | def main():
FILE: src/viral_ngs/classify/blast.py
class BlastTools (line 27) | class BlastTools(core.Tool):
method __init__ (line 31) | def __init__(self, install_methods=None):
method execute (line 42) | def execute(self, *args):
class BlastnTool (line 48) | class BlastnTool(BlastTools):
method get_hits_pipe (line 52) | def get_hits_pipe(self, inPipe, db, threads=None, task=None, outfmt='6...
method get_hits_bam (line 106) | def get_hits_bam(self, inBam, db, threads=None):
method get_hits_fasta (line 112) | def get_hits_fasta(self, inFasta, db, threads=None, task=None, outfmt=...
class MakeblastdbTool (line 119) | class MakeblastdbTool(BlastTools):
method build_database (line 123) | def build_database(self, fasta_files, database_prefix_path):
FILE: src/viral_ngs/classify/bmtagger.py
class BmtaggerTools (line 16) | class BmtaggerTools(core.Tool):
method __init__ (line 31) | def __init__(self, install_methods=None):
method execute (line 38) | def execute(self, *args):
method silent_execute (line 43) | def silent_execute(self, *args):
class BmtaggerShTool (line 50) | class BmtaggerShTool(BmtaggerTools):
class BmfilterTool (line 55) | class BmfilterTool(BmtaggerTools):
class BmtoolTool (line 60) | class BmtoolTool(BmtaggerTools):
method build_database (line 64) | def build_database(self, fasta_files, bitmask_file_path, max_ambig=0, ...
class ExtractFullseqTool (line 95) | class ExtractFullseqTool(BmtaggerTools):
class SrprismTool (line 100) | class SrprismTool(BmtaggerTools):
method build_database (line 104) | def build_database(self, fasta_files, database_prefix_path):
FILE: src/viral_ngs/classify/kb.py
class kb (line 25) | class kb(core.Tool):
method __init__ (line 28) | def __init__(self, install_methods=None):
method version (line 34) | def version(self):
method libexec (line 38) | def libexec(self):
method execute (line 44) | def execute(self, command,output, args=None, options=None):
method build (line 100) | def build(self, ref_fasta, index, workflow='standard', kmer_len=31, p...
method classify (line 123) | def classify(self, in_bam, index_file, out_dir, t2g_file, k=31, parity...
method extract (line 229) | def extract(self, in_bam, index_file, target_ids, out_dir, t2g_file, p...
method _extract_h5ad_from_tarball_to_tmpdir (line 309) | def _extract_h5ad_from_tarball_to_tmpdir(self, count_tar, tmp_dir):
method _add_sample_metadata_to_h5ad (line 331) | def _add_sample_metadata_to_h5ad(self, h5ad_or_tarball, sample_name=No...
method merge_h5ads (line 389) | def merge_h5ads(self, in_count_tars, out_h5ad, tmp_dir_parent=None):
method parse_h5ad_counts (line 433) | def parse_h5ad_counts(self, h5ad_file):
method extract_hit_ids_from_h5ad (line 454) | def extract_hit_ids_from_h5ad(self, h5ad_file, threshold=1):
FILE: src/viral_ngs/classify/kma.py
class KMA (line 19) | class KMA(core.Tool):
method __init__ (line 21) | def __init__(self, install_methods=None):
method version (line 27) | def version(self):
method libexec (line 41) | def libexec(self):
method execute (line 46) | def execute(self, command, args=None, options=None):
method build (line 70) | def build(self, ref_fasta, db_prefix, num_threads=None):
method classify (line 87) | def classify(self, in_bam, db, out_prefix, num_threads=None):
FILE: src/viral_ngs/classify/kmc.py
class KmcTool (line 36) | class KmcTool(core.Tool):
method __init__ (line 39) | def __init__(self, install_methods=None):
method version (line 45) | def version(self):
method _kmer_db_name (line 48) | def _kmer_db_name(self, kmer_db):
method is_kmer_db (line 53) | def is_kmer_db(self, kmer_db):
method _get_file_format_opt (line 58) | def _get_file_format_opt(self, fname):
method build_kmer_db (line 70) | def build_kmer_db(self, seq_files, kmer_db, kmer_size=DEFAULT_KMER_SIZ...
method execute (line 129) | def execute(self, args, threads=None, return_output=False): # pylint:...
method dump_kmer_counts (line 144) | def dump_kmer_counts(self, kmer_db, out_kmers, min_occs=1, max_occs=mi...
method read_kmer_counts (line 152) | def read_kmer_counts(self, kmer_counts_txt):
method get_kmer_counts (line 161) | def get_kmer_counts(self, kmer_db, **kwargs):
method get_kmer_db_info (line 168) | def get_kmer_db_info(self, kmer_db):
method filter_reads (line 188) | def filter_reads(self, kmer_db, in_reads, out_reads, db_min_occs=1, db...
method kmers_binary_op (line 285) | def kmers_binary_op(self, op, kmer_db1, kmer_db2, kmer_db_out,
method set_kmer_counts (line 298) | def set_kmer_counts(self, kmer_db_in, value, kmer_db_out, threads=None):
FILE: src/viral_ngs/classify/kraken2.py
class Kraken2 (line 24) | class Kraken2(core.Tool):
method __init__ (line 26) | def __init__(self, install_methods=None):
method version (line 32) | def version(self):
method libexec (line 36) | def libexec(self):
method build (line 41) | def build(self, db, standard_libraries=(), custom_libraries=(), taxdum...
method inspect (line 121) | def inspect(self, db, output, num_threads=None):
method execute (line 128) | def execute(self, command, db, output, args=None, options=None):
method pipeline (line 153) | def pipeline(self, db, in_bams, out_reports=None, out_reads=None,
method classify (line 165) | def classify(self, in_bam, db, out_reads=None, out_report=None,
FILE: src/viral_ngs/classify/krona.py
class Krona (line 10) | class Krona(core.Tool):
method __init__ (line 11) | def __init__(self, install_methods=None):
method opt (line 17) | def opt(self):
method import_taxonomy (line 25) | def import_taxonomy(self,
method create_db (line 60) | def create_db(self, db_dir):
method build_db (line 72) | def build_db(self, db_dir, taxdump_tar_gz=None, get_accessions=False):
FILE: src/viral_ngs/classify/last.py
class LastTools (line 20) | class LastTools(core.Tool):
method __init__ (line 26) | def __init__(self, install_methods=None):
class Lastal (line 33) | class Lastal(LastTools):
method get_hits (line 37) | def get_hits(self, inBam, db,
class Lastdb (line 85) | class Lastdb(LastTools):
method is_indexed (line 89) | def is_indexed(self, db_prefix):
method build_database (line 93) | def build_database(self, fasta_files, database_prefix_path=None): # py...
method execute (line 123) | def execute(self, inputFasta, outputDirectory, outputFilePrefix): #...
FILE: src/viral_ngs/classify/taxonomy.py
class TaxIdError (line 21) | class TaxIdError(ValueError):
function maybe_compressed (line 25) | def maybe_compressed(fn):
function blast_records (line 44) | def blast_records(f):
function paired_query_id (line 61) | def paired_query_id(record):
function blast_m8_taxids (line 72) | def blast_m8_taxids(record):
function extract_tax_id (line 76) | def extract_tax_id(sam1):
function coverage_lca (line 85) | def coverage_lca(query_ids, parents, lca_percent=100):
function parents_to_children (line 125) | def parents_to_children(parents):
function rank_code (line 140) | def rank_code(rank):
class TaxonomyDb (line 164) | class TaxonomyDb(object):
method __init__ (line 172) | def __init__(
method load_gi_single_dmp (line 218) | def load_gi_single_dmp(self, dmp_path):
method load_names (line 231) | def load_names(self, names_db, scientific_only=True):
method load_nodes (line 249) | def load_nodes(self, nodes_db):
method get_ordered_ancestors (line 263) | def get_ordered_ancestors(self, taxid):
method process_blast_hits (line 270) | def process_blast_hits(self, hits, top_percent):
method process_sam_hits (line 296) | def process_sam_hits(self, sam_hits, top_percent):
method translate_gi_to_tax_id (line 316) | def translate_gi_to_tax_id(self, record):
method sam_lca (line 324) | def sam_lca(self, sam_file, output=None, top_percent=10, unique_only=T...
method sam_lca_report (line 367) | def sam_lca_report(self, bam_aligned, outReport, outReads=None, unique...
method blast_lca (line 378) | def blast_lca(self,
method kraken_dfs (line 412) | def kraken_dfs(self, lines, taxa_hits, total_hits, taxid, level):
method kraken_dfs_report (line 424) | def kraken_dfs_report(self, taxa_hits):
FILE: src/viral_ngs/core/__init__.py
function iter_leaf_subclasses (line 28) | def iter_leaf_subclasses(a_class):
function all_tool_classes (line 40) | def all_tool_classes():
function get_tool_by_name (line 44) | def get_tool_by_name(name):
function skip_install_test (line 50) | def skip_install_test(condition=None):
function is_osx (line 62) | def is_osx():
class Tool (line 66) | class Tool(object):
method __init__ (line 70) | def __init__(self, install_methods=None):
method is_installed (line 78) | def is_installed(self):
method install (line 81) | def install(self):
method get_install_methods (line 92) | def get_install_methods(self):
method set_install_methods (line 95) | def set_install_methods(self, methods):
method version (line 98) | def version(self):
method _get_tool_version (line 103) | def _get_tool_version(self):
method executable_path (line 106) | def executable_path(self):
method execute (line 109) | def execute(self, *args):
method install_and_get_path (line 112) | def install_and_get_path(self):
class InstallMethod (line 119) | class InstallMethod(object):
method __init__ (line 126) | def __init__(self):
method is_attempted (line 129) | def is_attempted(self):
method attempt_install (line 132) | def attempt_install(self): # Override _attempt_install, not this.
method _attempt_install (line 136) | def _attempt_install(self):
method is_installed (line 139) | def is_installed(self):
method executable_path (line 142) | def executable_path(self):
class PrexistingUnixCommand (line 146) | class PrexistingUnixCommand(InstallMethod):
method __init__ (line 152) | def __init__(self, path, verifycmd=None, verifycode=0, require_executa...
method _attempt_install (line 160) | def _attempt_install(self):
method is_installed (line 169) | def is_installed(self):
method executable_path (line 174) | def executable_path(self):
FILE: src/viral_ngs/core/bbmap.py
class BBMapTool (line 19) | class BBMapTool(Tool):
method __init__ (line 22) | def __init__(self, install_methods=None):
method _get_tool_version (line 27) | def _get_tool_version(self):
method execute (line 30) | def execute(self, tool, **kwargs): # pylint: disable=arguments-differ
method align (line 39) | def align(self, inBam, refFasta, outBam, min_qual=0, nodisk=True, JVMm...
method bbnorm (line 65) | def bbnorm(self, inFastq, outFastq, tmpdir=None, target=None, k=None,
FILE: src/viral_ngs/core/bwa.py
class Bwa (line 25) | class Bwa(Tool):
method __init__ (line 27) | def __init__(self, install_methods=None):
method execute (line 32) | def execute(self, command, args, stdout=None, stdin=None, background=F...
method index (line 44) | def index(self, inFasta, output=None, algorithm=None):
method align_mem_bam (line 56) | def align_mem_bam(self, inBam, refDb, outBam, options=None,
method align_mem_one_rg (line 128) | def align_mem_one_rg(self, inBam, refDb, outBam, rgid=None, options=None,
method mem (line 206) | def mem(self, inReads, refDb, outAlign, options=None, min_score_to_fil...
method filter_sam_on_alignment_score (line 240) | def filter_sam_on_alignment_score(self, in_sam, out_sam, min_score_to_...
FILE: src/viral_ngs/core/cdhit.py
class CdHit (line 18) | class CdHit(Tool):
method __init__ (line 28) | def __init__(self, install_methods=None):
method execute (line 33) | def execute(self, command, input_fn, output_fn, options=None, option_s...
FILE: src/viral_ngs/core/cmd.py
class color (line 29) | class color(object):
function setup_logger (line 43) | def setup_logger(log_level):
function script_name (line 52) | def script_name():
function common_args (line 56) | def common_args(parser, arglist=(('tmp_dir', None), ('loglevel', None))):
function main_command (line 97) | def main_command(mainfunc):
function attach_main (line 112) | def attach_main(parser, cmd_main, split_args=False):
class _HelpAction (line 121) | class _HelpAction(argparse._HelpAction):
method __call__ (line 123) | def __call__(self, parser, namespace, values, option_string=None):
class storeMultiArgsOrFallBackToConst (line 156) | class storeMultiArgsOrFallBackToConst(argparse.Action):
method __call__ (line 180) | def __call__(self, parser, namespace, values, option_string=None):
function make_parser (line 189) | def make_parser(commands, description):
function main_argparse (line 220) | def main_argparse(commands, description):
function find_tmp_dir (line 264) | def find_tmp_dir():
class BadInputError (line 285) | class BadInputError(RuntimeError):
method __init__ (line 289) | def __init__(self, reason):
function check_input (line 292) | def check_input(condition, error_msg):
function parse_cmd (line 297) | def parse_cmd(module, cmd, args):
function run_cmd (line 307) | def run_cmd(module, cmd, args):
FILE: src/viral_ngs/core/errors.py
class QCError (line 3) | class QCError(RuntimeError):
method __init__ (line 6) | def __init__(self, reason):
class InvalidBamHeaderError (line 9) | class InvalidBamHeaderError(ValueError):
FILE: src/viral_ngs/core/fastqc.py
class FastQC (line 21) | class FastQC(Tool):
method __init__ (line 23) | def __init__(self, install_methods=None):
method _get_tool_version (line 28) | def _get_tool_version(self):
method execute (line 31) | def execute(self, inBam, out_html, out_zip=None, threads=None): # p...
FILE: src/viral_ngs/core/file.py
class StringNotFoundException (line 57) | class StringNotFoundException(Exception):
function get_project_path (line 62) | def get_project_path():
function get_build_path (line 96) | def get_build_path():
function get_test_path (line 101) | def get_test_path():
function get_test_input_path (line 106) | def get_test_input_path(testClassInstance=None):
function check_paths (line 115) | def check_paths(read=(), write=(), read_and_write=()):
function mkstempfname (line 137) | def mkstempfname(suffix='', prefix='tmp', directory=None, text=False):
function tempfname (line 149) | def tempfname(*args, **kwargs):
function tempfnames (line 162) | def tempfnames(suffixes, *args, **kwargs):
function tmp_dir (line 176) | def tmp_dir(*args, **kwargs):
function pushd_popd (line 199) | def pushd_popd(target_dir):
function keep_tmp (line 208) | def keep_tmp():
function set_tmp_dir (line 214) | def set_tmp_dir(name):
function destroy_tmp_dir (line 228) | def destroy_tmp_dir(tempdir=None):
function extract_tarball (line 237) | def extract_tarball(tarfile, out_dir=None, threads=None, compression='au...
function fifo (line 320) | def fifo(num_pipes=1, names=None, name=None):
function mkdir_p (line 343) | def mkdir_p(dirpath):
function touch_p (line 355) | def touch_p(path, times=None):
function zstd_open (line 362) | def zstd_open(fname, mode='r', **kwargs):
function open_or_gzopen (line 384) | def open_or_gzopen(fname, mode='r', **kwargs):
function read_tabfile_dict (line 409) | def read_tabfile_dict(inFile, header_prefix="#", skip_prefix=None, rowco...
function read_tabfile (line 440) | def read_tabfile(inFile):
function readFlatFileHeader (line 450) | def readFlatFileHeader(filename, headerPrefix='#', delim='\t'):
class FlatFileParser (line 458) | class FlatFileParser(object):
method __init__ (line 462) | def __init__(self, lineIter=None, name=None, outType='dict',
method __enter__ (line 477) | def __enter__(self):
method __exit__ (line 480) | def __exit__(self, exc_type, exc_val, exc_tb):
method __iter__ (line 483) | def __iter__(self):
method parse (line 490) | def parse(self, row):
method parseHeader (line 516) | def parseHeader(self, row):
method parseRow (line 522) | def parseRow(self, row):
function fastaMaker (line 534) | def fastaMaker(seqs, linewidth=60):
function makeFastaFile (line 549) | def makeFastaFile(seqs, outFasta):
function bam_is_sorted (line 557) | def bam_is_sorted(bam_file_path):
function concat (line 566) | def concat(inputFilePaths, outputFilePath, append=False):
function download_file (line 580) | def download_file(uriToGet, dest, destFileName=None):
function webfile_readlines (line 605) | def webfile_readlines(uriToGet):
function replace_in_file (line 613) | def replace_in_file(filename, original, new):
function cat (line 627) | def cat(output_file, input_files):
function temp_catted_files (line 636) | def temp_catted_files(input_files, suffix=None, prefix=None):
function _get_pathconf (line 645) | def _get_pathconf(file_system_path, param_suffix, default):
function max_file_name_length (line 656) | def max_file_name_length(file_system_path):
function max_path_length (line 660) | def max_path_length(file_system_path):
function sanitize_id_for_sam_rname (line 664) | def sanitize_id_for_sam_rname(string_in):
function write_fasta_with_sanitized_ids (line 683) | def write_fasta_with_sanitized_ids(fasta_in, out_filepath):
function fastas_with_sanitized_ids (line 696) | def fastas_with_sanitized_ids(input_fasta_paths, use_tmp=False):
class TranspositionError (line 728) | class TranspositionError(Exception):
method __init___ (line 729) | def __init___(self, *args, **kwargs):
function transposeChromosomeFiles (line 732) | def transposeChromosomeFiles(inputFilenamesList, sampleRelationFile=None...
function string_to_file_name (line 783) | def string_to_file_name(string_value, file_system_path=None, length_marg...
function grep_count (line 848) | def grep_count(file_path, to_match, additional_flags=None, fixed_mode=Tr...
function count_occurrences_in_tsv (line 879) | def count_occurrences_in_tsv(filePath,
function count_occurrences_in_tsv_sqlite_backed (line 892) | def count_occurrences_in_tsv_sqlite_backed(db_file_path,
function count_str_in_file (line 907) | def count_str_in_file(in_file, query_str, starts_with=False):
function fasta_length (line 923) | def fasta_length(fasta_path):
function count_fastq_reads (line 929) | def count_fastq_reads(inFastq):
function line_count (line 941) | def line_count(infname):
function touch (line 948) | def touch(fname, times=None):
function make_empty (line 952) | def make_empty(fname):
function dump_file (line 957) | def dump_file(fname, value):
function slurp_file (line 962) | def slurp_file(fname, maxSizeMb=50):
function is_broken_link (line 973) | def is_broken_link(filename):
function find_broken_symlinks (line 984) | def find_broken_symlinks(rootdir, followlinks=False):
function uncompressed_file_type (line 1008) | def uncompressed_file_type(fname):
function repack_tarballs (line 1015) | def repack_tarballs(out_compressed_tarball,
class DBConnection (line 1140) | class DBConnection:
method __init__ (line 1141) | def __init__(self, db_file=None):
method start (line 1158) | def start(self):
method __enter__ (line 1160) | def __enter__(self):
method __exit__ (line 1162) | def __exit__(self, exc_type, exc_val, exc_tb):
method close (line 1165) | def close(self):
class CountDB (line 1176) | class CountDB(DBConnection):
method __init__ (line 1182) | def __init__(self, db_file=None):
method start (line 1185) | def start(self):
method get_count_for_ID (line 1191) | def get_count_for_ID(self, idval):
method get_count_for_multiple_IDs (line 1196) | def get_count_for_multiple_IDs(self, idvals):
method set_count_for_ID (line 1201) | def set_count_for_ID(self, idval, count):
method set_count_for_multiple_IDs (line 1206) | def set_count_for_multiple_IDs(self, idvals, count):
method increment_count_for_multiple_IDs (line 1212) | def increment_count_for_multiple_IDs(self, idvals, increment_val=1):
method add_counts_from_other_db (line 1222) | def add_counts_from_other_db(self, other_db):
method increment_count_for_ID (line 1240) | def increment_count_for_ID(self, idval, increment_val=1):
method decrement_count_for_multiple_IDs (line 1243) | def decrement_count_for_multiple_IDs(self, idvals, decrement_val=-1):
method decrement_count_for_ID (line 1246) | def decrement_count_for_ID(self, idval, decrement_val=-1):
method get_counts_descending (line 1249) | def get_counts_descending(self):
method get_num_IDS (line 1254) | def get_num_IDS(self):
FILE: src/viral_ngs/core/illumina_indices.py
function memoize (line 22) | def memoize(obj):
class IlluminaIndexReference (line 33) | class IlluminaIndexReference(object):
method __init__ (line 35) | def __init__(self, kit=None, instrument=None):
method neighbors (line 2315) | def neighbors(cls, seq, distance=1):
method reverse_complement (line 2333) | def reverse_complement(seq):
method _barcodes_meta_all (line 2342) | def _barcodes_meta_all(self):
method kits (line 2355) | def kits(cls):
method instruments (line 2359) | def instruments(self):
method index_for_seq (line 2369) | def index_for_seq(self, seq, kit=None, instrument=None):
method seq_for_index (line 2385) | def seq_for_index(self, index, kit=None, instrument=None):
method guess_index (line 2402) | def guess_index(self, seq, distance=1, kit=None, instrument=None):
class UncertainSamplesheetError (line 2416) | class UncertainSamplesheetError(Exception):
class IlluminaBarcodeHelper (line 2419) | class IlluminaBarcodeHelper(object):
method __init__ (line 2421) | def __init__(self, barcode_counts, picard_metrics, sample_name, rows_l...
method outlier_barcodes (line 2456) | def outlier_barcodes(self, outlier_threshold=0.775, expected_assigned_...
method mean (line 2530) | def mean(cls, nums):
method stddevp (line 2534) | def stddevp(cls, nums):
method median (line 2541) | def median(cls, nums):
method guess_barcodes_for_sample (line 2555) | def guess_barcodes_for_sample(self, sample_name):
method find_uncertain_barcodes (line 2660) | def find_uncertain_barcodes(self, sample_names=None, outlier_threshold...
method write_guessed_barcodes (line 2721) | def write_guessed_barcodes(self, out_tsv, guessed_barcodes):
FILE: src/viral_ngs/core/illumina_utils.py
class IlluminaDirectory (line 29) | class IlluminaDirectory(object):
method __init__ (line 32) | def __init__(self, uri):
method __enter__ (line 39) | def __enter__(self):
method __exit__ (line 43) | def __exit__(self, exc_type, exc_val, exc_tb):
method load (line 47) | def load(self):
method _fix_path (line 58) | def _fix_path(self):
method _extract_tarball (line 84) | def _extract_tarball(self, tarfile):
method close (line 89) | def close(self):
method get_RunInfo (line 94) | def get_RunInfo(self):
method get_SampleSheet (line 101) | def get_SampleSheet(self, only_lane=None, append_run_id=None, **kwargs):
method get_intensities_dir (line 113) | def get_intensities_dir(self):
method get_BCLdir (line 116) | def get_BCLdir(self):
class RunInfo (line 125) | class RunInfo(object):
method __init__ (line 130) | def __init__(self, xml_fname):
method get_fname (line 134) | def get_fname(self):
method get_run_id (line 137) | def get_run_id(self):
method get_flowcell_raw (line 140) | def get_flowcell_raw(self):
method get_flowcell (line 143) | def get_flowcell(self):
method _get_rundate_obj (line 159) | def _get_rundate_obj(self):
method get_rundate_american (line 182) | def get_rundate_american(self):
method get_rundate_iso (line 185) | def get_rundate_iso(self):
method get_machine (line 188) | def get_machine(self):
method get_read_structure (line 191) | def get_read_structure(self):
method num_reads (line 201) | def num_reads(self):
method get_lane_count (line 208) | def get_lane_count(self):
method get_surface_count (line 212) | def get_surface_count(self):
method get_swath_count (line 216) | def get_swath_count(self):
method get_tile_count (line 220) | def get_tile_count(self):
method get_section_count (line 224) | def get_section_count(self):
method tile_count (line 228) | def tile_count(self):
method machine_model_from_tile_count (line 240) | def machine_model_from_tile_count(self):
method get_flowcell_chemistry (line 300) | def get_flowcell_chemistry(self):
method get_flowcell_lane_count (line 304) | def get_flowcell_lane_count(self):
method get_machine_model (line 311) | def get_machine_model(self):
method get_machines_for_flowcell_id (line 316) | def get_machines_for_flowcell_id(cls, fcid):
method infer_sequencer_model (line 325) | def infer_sequencer_model(self):
class SampleSheetError (line 479) | class SampleSheetError(Exception):
method __init__ (line 480) | def __init__(self, message, fname):
class SampleSheet (line 486) | class SampleSheet(object):
method __init__ (line 491) | def __init__(
method _detect_and_load_sheet (line 531) | def _detect_and_load_sheet(self, infile):
method can_be_collapsed (line 675) | def can_be_collapsed(self) -> bool:
method collapse_sample_index_duplicates (line 685) | def collapse_sample_index_duplicates(self, output_tsv=None, overwrite_...
method inner_demux_mapper (line 768) | def inner_demux_mapper(self):
method make_barcodes_file (line 825) | def make_barcodes_file(self, outFile):
method write_tsv (line 842) | def write_tsv(self, outFile, force=False):
method rev_comp_barcode_values (line 852) | def rev_comp_barcode_values(self, barcode_columns_to_revcomp=None, inp...
method make_params_file (line 884) | def make_params_file(self, bamDir, outFile):
method get_fname (line 909) | def get_fname(self):
method get_rows (line 912) | def get_rows(self):
method print_rows (line 915) | def print_rows(self, row_indices=None):
method num_indexes (line 926) | def num_indexes(self):
method num_samples (line 931) | def num_samples(self):
method fetch_by_index (line 934) | def fetch_by_index(self, idx):
FILE: src/viral_ngs/core/minimap2.py
class Minimap2 (line 25) | class Minimap2(Tool):
method __init__ (line 27) | def __init__(self, install_methods=None):
method _get_tool_version (line 33) | def _get_tool_version(self):
method execute (line 36) | def execute(self, args, stdout=None, stdin=None, background=False): ...
method align_bam (line 48) | def align_bam(self, inBam, refDb, outBam, options=None,
method align_one_rg (line 104) | def align_one_rg(self, inBam, refDb, outBam, rgid=None, preset=None, o...
method align_cmd (line 203) | def align_cmd(self, inReads, refDb, outAlign, options=None, threads=No...
method scaffold (line 226) | def scaffold(self, contigs_fasta, ref_fasta, outAlign, divergence=20, ...
method idxstats (line 252) | def idxstats(self, inReads, refDb, outIdxstats, outReadlist=None, thre...
FILE: src/viral_ngs/core/misc.py
function unambig_count (line 37) | def unambig_count(seq):
function timer (line 42) | def timer(prefix):
function memoize (line 50) | def memoize(obj):
function unique (line 62) | def unique(items):
function collapse_dup_strs_to_str_or_md5 (line 70) | def collapse_dup_strs_to_str_or_md5(values,
function md5_digest (line 123) | def md5_digest(in_str, last_n_chr=8):
function reverse_complement (line 127) | def reverse_complement(seq):
function histogram (line 135) | def histogram(items):
function freqs (line 144) | def freqs(items, zero_checks=None):
function intervals (line 170) | def intervals(i, n, l):
function pairwise (line 187) | def pairwise(iterable):
function batch_iterator (line 195) | def batch_iterator(iterator, batch_size):
function list_contains (line 214) | def list_contains(sublist, list_):
function run_and_print (line 223) | def run_and_print(args, stdout=None, stderr=subprocess.STDOUT,
function run_and_save (line 307) | def run_and_save(args, stdout=None, stdin=None,
class FeatureSorter (line 333) | class FeatureSorter(object):
method __init__ (line 337) | def __init__(self, collection=None):
method add (line 346) | def add(self, c, start, stop, strand='+', other=None):
method _cleanup (line 360) | def _cleanup(self):
method get_seqids (line 366) | def get_seqids(self):
method get_features (line 369) | def get_features(self, c=None, left=0, right=float('inf')):
method get_intervals (line 386) | def get_intervals(self, c=None):
function available_cpu_count (line 403) | def available_cpu_count():
function sanitize_thread_count (line 459) | def sanitize_thread_count(threads=None, tool_max_cores_value=available_c...
function which (line 492) | def which(application_binary_name):
function is_nonstr_iterable (line 506) | def is_nonstr_iterable(x, str_types=str):
function make_seq (line 510) | def make_seq(x, str_types=str):
function load_yaml_or_json (line 517) | def load_yaml_or_json(fname):
function load_config (line 525) | def load_config(cfg, include_directive='include', std_includes=(), param...
function as_type (line 630) | def as_type(val, types):
function subdict (line 641) | def subdict(d, keys):
function chk (line 647) | def chk(condition, message='Check failed', exc=RuntimeError):
function wraps (line 652) | def wraps(f):
function unwrap (line 658) | def unwrap(f):
function convert_size_str (line 662) | def convert_size_str(input_size_str, output_unit="m", round_number=True):
class ReadIdStore (line 690) | class ReadIdStore:
method __init__ (line 698) | def __init__(self, db_path):
method add_from_fastq (line 720) | def add_from_fastq(self, fastq_path):
method add_from_readlist (line 766) | def add_from_readlist(self, readlist_path):
method __len__ (line 777) | def __len__(self):
method __iter__ (line 782) | def __iter__(self):
method __contains__ (line 788) | def __contains__(self, read_id):
method contains_batch (line 796) | def contains_batch(self, read_ids, batch_size=10000):
method add (line 828) | def add(self, read_id):
method extend (line 845) | def extend(self, read_ids):
method __delitem__ (line 882) | def __delitem__(self, read_id):
method discard (line 900) | def discard(self, read_id):
method write_to_file (line 913) | def write_to_file(self, out_path, max_reads=None):
method shrink_to_subsample (line 941) | def shrink_to_subsample(self, n):
method filter_bam_by_ids (line 971) | def filter_bam_by_ids(self, inBam, outBam, include=True):
method close (line 1072) | def close(self):
method __enter__ (line 1076) | def __enter__(self):
method __exit__ (line 1079) | def __exit__(self, exc_type, exc_val, exc_tb):
class CoordMapperError (line 1086) | class CoordMapperError(Exception):
method __init___ (line 1087) | def __init___(self, *args, **kwargs):
class CoordMapper (line 1091) | class CoordMapper(collections.abc.MutableMapping):
method __init__ (line 1106) | def __init__(self, alignerTool=None):
method __getitem__ (line 1130) | def __getitem__(self, key):
method __setitem__ (line 1133) | def __setitem__(self, key, value):
method __delitem__ (line 1136) | def __delitem__(self, key):
method __len__ (line 1139) | def __len__(self):
method __iter__ (line 1142) | def __iter__(self):
method __contains__ (line 1146) | def __contains__(self, key):
method keys (line 1152) | def keys(self):
method mapAtoB (line 1155) | def mapAtoB(self, fromChrom, fromPos=None, side=0):
method mapBtoA (line 1169) | def mapBtoA(self, fromChrom, fromPos=None, side=0):
method mapChr (line 1183) | def mapChr(self, fromChrom, toChrom, fromPos=None, side=0):
method load_alignments (line 1206) | def load_alignments(self, aligned_files, a_idx=None, b_idx=None):
method align_and_load_sequences (line 1260) | def align_and_load_sequences(self, unaligned_fasta_files, aligner=None):
class CoordMapper2Seqs (line 1281) | class CoordMapper2Seqs(object):
method __init__ (line 1308) | def __init__(self, seq0, seq1):
method __call__ (line 1336) | def __call__(self, fromPos, fromWhich):
FILE: src/viral_ngs/core/mvicuna.py
class MvicunaTool (line 17) | class MvicunaTool(Tool):
method __init__ (line 19) | def __init__(self, install_methods=None):
method rmdup (line 24) | def rmdup(self, inPair, outPair, outUnpaired=None):
method rmdup_single (line 60) | def rmdup_single(self, inFastq, outFastq):
FILE: src/viral_ngs/core/novoalign.py
class NovoalignTool (line 27) | class NovoalignTool(Tool):
method __init__ (line 29) | def __init__(self, path=None, license_path=None):
method _get_tool_version (line 60) | def _get_tool_version(self):
method _fasta_to_idx_name (line 63) | def _fasta_to_idx_name(self, fasta):
method execute (line 68) | def execute(self, inBam, refFasta, outBam, options=None, min_qual=0, J...
method align_one_rg_bam (line 120) | def align_one_rg_bam(self, inBam, refFasta, outBam, rgid=None, rgs=Non...
method index_fasta (line 194) | def index_fasta(self, refFasta, k=None, s=None):
FILE: src/viral_ngs/core/picard.py
class PicardTools (line 25) | class PicardTools(Tool):
method is_installed (line 29) | def is_installed(self):
method __init__ (line 32) | def __init__(self, install_methods=None):
method _get_tool_version (line 37) | def _get_tool_version(self):
method execute (line 40) | def execute(self, command, picardOptions=None, JVMmemory=None, backgro...
method dict_to_picard_opts (line 59) | def dict_to_picard_opts(options):
class RevertSamTool (line 63) | class RevertSamTool(PicardTools):
method execute (line 66) | def execute(self, inBam, outBam, picardOptions=None, JVMmemory=None, b...
class CheckIlluminaDirectoryTool (line 75) | class CheckIlluminaDirectoryTool(PicardTools):
method execute (line 78) | def execute(self,
class MarkDuplicatesTool (line 123) | class MarkDuplicatesTool(PicardTools):
method execute (line 126) | def execute(
class SplitSamByLibraryTool (line 139) | class SplitSamByLibraryTool(PicardTools):
method execute (line 142) | def execute(
class SamToFastqTool (line 155) | class SamToFastqTool(PicardTools):
method execute (line 159) | def execute(self, inBam, outFastq1, outFastq2=None, outFastq0=None,
method execute_tmp (line 192) | def execute_tmp(self, inBam, sfx='', includeUnpaired=False, **kwargs):
method per_read_group (line 206) | def per_read_group(self, inBam, outDir, picardOptions=None, JVMmemory=...
class FastqToSamTool (line 217) | class FastqToSamTool(PicardTools):
method isFastqEmpty (line 222) | def isFastqEmpty(fastq_file):
method execute (line 252) | def execute(
class SortSamTool (line 322) | class SortSamTool(PicardTools):
method execute (line 327) | def execute(
class DownsampleSamTool (line 339) | class DownsampleSamTool(PicardTools):
method execute (line 346) | def execute(self,
method downsample_to_approx_count (line 386) | def downsample_to_approx_count(
class MergeSamFilesTool (line 414) | class MergeSamFilesTool(PicardTools):
method execute (line 417) | def execute(self, inBams, outBam, picardOptions=None, JVMmemory=None, ...
class ReplaceSamHeaderTool (line 427) | class ReplaceSamHeaderTool(PicardTools):
method execute (line 430) | def execute(self, inBam, headerBam, outBam, picardOptions=None, JVMmem...
class FilterSamReadsTool (line 438) | class FilterSamReadsTool(PicardTools):
method execute (line 450) | def execute(self, inBam, exclude, readList, outBam, picardOptions=None...
class CreateSequenceDictionaryTool (line 485) | class CreateSequenceDictionaryTool(PicardTools):
method execute (line 489) | def execute(
class BuildBamIndexTool (line 512) | class BuildBamIndexTool(PicardTools):
method execute (line 516) | def execute(self, inBam, picardOptions=None, JVMmemory=None): # pyl...
class CollectIlluminaLaneMetricsTool (line 522) | class CollectIlluminaLaneMetricsTool(PicardTools):
method execute (line 530) | def execute(
class ExtractIlluminaBarcodesTool (line 555) | class ExtractIlluminaBarcodesTool(PicardTools):
method execute (line 570) | def execute(
class AddCommentsToBamTool (line 602) | class AddCommentsToBamTool(PicardTools):
method execute (line 607) | def execute(self,
class IlluminaBasecallsToSamTool (line 637) | class IlluminaBasecallsToSamTool(PicardTools):
method execute (line 660) | def execute(self,
method execute_single_sample (line 691) | def execute_single_sample(self,
FILE: src/viral_ngs/core/prinseq.py
class PrinseqTool (line 15) | class PrinseqTool(Tool):
method __init__ (line 17) | def __init__(self, install_methods=None):
method _get_tool_version (line 22) | def _get_tool_version(self):
method rmdup_fastq_single (line 25) | def rmdup_fastq_single(self, inFastq, outFastq):
method rmdup_fastq_paired (line 45) | def rmdup_fastq_paired(self, inFastq1, inFastq2, outFastq1, outFastq2,...
FILE: src/viral_ngs/core/priorities.py
function compactify_sequences (line 16) | def compactify_sequences(sparse_matrix, sequence_names):
function sequence_to_int_array (line 30) | def sequence_to_int_array(s, fill_value=110, fill_gaps=True):
function calculate_snp_matrix (line 39) | def calculate_snp_matrix(fastafile, consensus=None, zipped=False, fill_v...
function calculate_distance_matrix (line 101) | def calculate_distance_matrix(sparse_matrix_A, sparse_matrix_B, consensus):
FILE: src/viral_ngs/core/sambamba.py
class SambambaTool (line 27) | class SambambaTool(Tool):
method __init__ (line 30) | def __init__(self, install_methods=None):
method _get_tool_version (line 37) | def _get_tool_version(self):
method execute (line 57) | def execute(self, command, args, stdout=None, stderr=None):
method sort (line 73) | def sort(self, inBam, outBam, sort_order='coordinate', threads=None):
method index (line 104) | def index(self, inBam, threads=None):
method merge (line 128) | def merge(self, inBams, outBam, threads=None):
method flagstat (line 148) | def flagstat(self, inBam, threads=None):
method _parse_flagstat (line 171) | def _parse_flagstat(self, output):
method markdup (line 230) | def markdup(self, inBam, outBam, remove_duplicates=False, threads=None,
FILE: src/viral_ngs/core/samtools.py
class SamtoolsTool (line 38) | class SamtoolsTool(Tool):
method __init__ (line 40) | def __init__(self, install_methods=None):
method _get_tool_version (line 45) | def _get_tool_version(self):
method execute (line 48) | def execute(self, command, args, stdout=None, stderr=None, background=...
method view (line 68) | def view(self, args, inFile, outFile, regions=None, threads=None, back...
method bam2fq (line 79) | def bam2fq(self, inBam, outFq1, outFq2=None):
method bam2fq_pipe (line 85) | def bam2fq_pipe(self, inBam, threads=None):
method bam2fa (line 94) | def bam2fa(self, inBam, outFa1, outFa2=None, outFa0=None, append_mate_...
method bam2fa_pipe (line 107) | def bam2fa_pipe(self, inBam):
method bam2fq_tmp (line 114) | def bam2fq_tmp(self, inBam):
method bam2fa_tmp (line 120) | def bam2fa_tmp(self, inBam):
method import_fastq (line 125) | def import_fastq(self, inFastq1, inFastq2, outBam,
method _is_fastq_empty (line 188) | def _is_fastq_empty(self, fastq_file):
method _create_empty_bam_with_header (line 209) | def _create_empty_bam_with_header(self, outBam, sample_name, library_n...
method sort (line 253) | def sort(self, inFile, outFile, args=None, threads=None):
method merge (line 265) | def merge(self, inFiles, outFile, options=None):
method index (line 274) | def index(self, inBam, threads=None):
method faidx (line 283) | def faidx(self, inFasta, overwrite=False):
method depth (line 297) | def depth(self, inBam, outFile, options=None):
method idxstats (line 303) | def idxstats(self, inBam, statsFile, threads=None):
method reheader (line 311) | def reheader(self, inBam, headerFile, outBam):
method dumpHeader (line 314) | def dumpHeader(self, inBam, outHeader):
method removeDoublyMappedReads (line 323) | def removeDoublyMappedReads(self, inBam, outBam):
method filter_to_proper_primary_mapped_reads (line 328) | def filter_to_proper_primary_mapped_reads(self, inBam, outBam, require...
method filterByCigarString (line 380) | def filterByCigarString(self, inBam, outBam,
method downsample (line 411) | def downsample(self, inBam, outBam, probability):
method downsample_to_approx_count (line 421) | def downsample_to_approx_count(self, inBam, outBam, read_count):
method getHeader (line 432) | def getHeader(self, inBam):
method getReadGroups (line 441) | def getReadGroups(self, inBam):
method count (line 453) | def count(self, inBam, opts=None, regions=None):
method mpileup (line 464) | def mpileup(self, inBam, outPileup, opts=None):
method isEmpty (line 469) | def isEmpty(self, inBam):
FILE: src/viral_ngs/core/splitcode.py
class SplitCodeTool (line 22) | class SplitCodeTool(Tool):
method __init__ (line 24) | def __init__(self, install_methods=None):
method _get_tool_version (line 29) | def _get_tool_version(self):
method execute (line 32) | def execute( self,
method check_installation (line 104) | def check_installation(self):
method run_splitcode (line 109) | def run_splitcode(self, **kwargs):
function create_splitcode_lookup_table (line 134) | def create_splitcode_lookup_table(sample_sheet_or_dataframe, csv_out, un...
function plot_read_counts (line 533) | def plot_read_counts(df_csv_path, outDir):
function plot_sorted_curve (line 607) | def plot_sorted_curve(df_csv_path, out_dir, unmatched_name, out_basename...
function run_splitcode_on_pool (line 697) | def run_splitcode_on_pool( pool_id,
function generate_splitcode_config_and_keep_files (line 799) | def generate_splitcode_config_and_keep_files(
function convert_splitcode_demux_metrics_to_picard_style (line 952) | def convert_splitcode_demux_metrics_to_picard_style(
FILE: src/viral_ngs/core/stats.py
function product (line 9) | def product(iterable):
function chi2_contingency (line 16) | def chi2_contingency(contingencyTable, correction=True):
function fisher_exact (line 64) | def fisher_exact(contingencyTable):
function log_choose (line 132) | def log_choose(n, k):
function gammainc_halfint (line 139) | def gammainc_halfint(s, x):
function pchisq (line 174) | def pchisq(x, k):
FILE: src/viral_ngs/core/trimmomatic.py
class TrimmomaticTool (line 15) | class TrimmomaticTool(Tool):
method __init__ (line 17) | def __init__(self, install_methods=None):
method _get_tool_version (line 22) | def _get_tool_version(self):
method execute (line 25) | def execute(self,
FILE: src/viral_ngs/core/version.py
function get_project_path (line 21) | def get_project_path(include_derived_modules=True):
function call_git_describe (line 63) | def call_git_describe():
function release_file (line 80) | def release_file():
function read_release_version (line 84) | def read_release_version():
function write_release_version (line 93) | def write_release_version(version):
function approx_version_number (line 98) | def approx_version_number():
function get_version (line 143) | def get_version():
FILE: src/viral_ngs/file_utils.py
function merge_tarballs (line 24) | def merge_tarballs(out_tarball, in_tarballs, threads=None, extract_to_di...
function parser_merge_tarballs (line 30) | def parser_merge_tarballs(parser=argparse.ArgumentParser()):
function parser_rename_fasta_sequences (line 60) | def parser_rename_fasta_sequences(parser=argparse.ArgumentParser()):
function main_rename_fasta_sequences (line 75) | def main_rename_fasta_sequences(args):
class Adder_Table_Map (line 103) | class Adder_Table_Map:
method __init__ (line 104) | def __init__(self, tab_file):
method _make_key_str (line 119) | def _make_key_str(self, row):
method extra_headers (line 122) | def extra_headers(self):
method modify_row (line 124) | def modify_row(self, row):
class Adder_Source_Lab_Subset (line 132) | class Adder_Source_Lab_Subset:
method __init__ (line 133) | def __init__(self, restrict_string):
method extra_headers (line 136) | def extra_headers(self):
method modify_row (line 138) | def modify_row(self, row):
function parser_tsv_derived_cols (line 144) | def parser_tsv_derived_cols(parser=argparse.ArgumentParser()):
function tsv_derived_cols (line 152) | def tsv_derived_cols(in_tsv, out_tsv, table_map=None, lab_highlight_loc=...
function parser_tsv_join (line 178) | def parser_tsv_join(parser=argparse.ArgumentParser()):
function tsv_join (line 185) | def tsv_join(in_tsvs, out_tsv, join_id=None):
function full_parser (line 235) | def full_parser():
function main (line 239) | def main():
FILE: src/viral_ngs/illumina.py
function parse_illumina_fastq_filename (line 49) | def parse_illumina_fastq_filename(filename):
function normalize_barcode (line 141) | def normalize_barcode(barcode):
function barcode_n_fraction (line 190) | def barcode_n_fraction(barcode):
function barcode_matches_with_n (line 204) | def barcode_matches_with_n(observed, expected):
function barcode_matches_fuzzy (line 245) | def barcode_matches_fuzzy(observed, expected, max_mismatches=1):
function match_barcodes_with_orientation (line 285) | def match_barcodes_with_orientation(target_bc1, target_bc2, sample_rows,
function build_run_info_json (line 465) | def build_run_info_json(
function illumina_metadata (line 549) | def illumina_metadata(
function parser_illumina_metadata (line 692) | def parser_illumina_metadata(parser=argparse.ArgumentParser()):
function run_picard_fastq_to_sam_for_splitcode_demux (line 753) | def run_picard_fastq_to_sam_for_splitcode_demux(
function _parse_barcode_from_header (line 824) | def _parse_barcode_from_header(header_line):
function consensus_barcode_from_fastq (line 854) | def consensus_barcode_from_fastq(fastq_path, num_reads=10):
function splitcode_demux_fastqs (line 944) | def splitcode_demux_fastqs(
function parser_splitcode_demux_fastqs (line 1684) | def parser_splitcode_demux_fastqs(parser=argparse.ArgumentParser()):
function parser_illumina_demux (line 1777) | def parser_illumina_demux(parser=argparse.ArgumentParser()):
function main_illumina_demux (line 1921) | def main_illumina_demux(args):
function parser_flowcell_metadata (line 2225) | def parser_flowcell_metadata(parser=argparse.ArgumentParser()):
function main_flowcell_metadata (line 2261) | def main_flowcell_metadata(args):
function parser_lane_metrics (line 2301) | def parser_lane_metrics(parser=argparse.ArgumentParser()):
function main_lane_metrics (line 2327) | def main_lane_metrics(args):
function parser_common_barcodes (line 2369) | def parser_common_barcodes(parser=argparse.ArgumentParser()):
function main_common_barcodes (line 2445) | def main_common_barcodes(args):
function count_and_sort_barcodes (line 2525) | def count_and_sort_barcodes(
function parser_guess_barcodes (line 2658) | def parser_guess_barcodes(parser=argparse.ArgumentParser()):
function main_guess_barcodes (line 2731) | def main_guess_barcodes(
function miseq_fastq_to_bam (line 2789) | def miseq_fastq_to_bam(
function parser_miseq_fastq_to_bam (line 2885) | def parser_miseq_fastq_to_bam(parser=argparse.ArgumentParser()):
function extract_fc_metadata (line 2920) | def extract_fc_metadata(flowcell, outRunInfo, outSampleSheet):
function parser_extract_fc_metadata (line 2929) | def parser_extract_fc_metadata(parser=argparse.ArgumentParser()):
function write_barcode_metrics_for_pools (line 2948) | def write_barcode_metrics_for_pools(input_csv_path,
function run_picard_fastq_to_ubam (line 3053) | def run_picard_fastq_to_ubam(fq1,
function splitcode_demux (line 3076) | def splitcode_demux(
function main_splitcode_demux (line 3587) | def main_splitcode_demux(args):
function parser_splitcode_demux (line 3619) | def parser_splitcode_demux(parser=None):
function add_constant_column_to_metrics (line 3799) | def add_constant_column_to_metrics(
function merge_demux_metrics (line 3872) | def merge_demux_metrics(
function parser_merge_demux_metrics (line 3991) | def parser_merge_demux_metrics(parser=argparse.ArgumentParser()):
function full_parser (line 4018) | def full_parser():
function main (line 4022) | def main():
FILE: src/viral_ngs/interhost.py
function parser_snpEff (line 33) | def parser_snpEff(parser=argparse.ArgumentParser()):
function parser_general_mafft (line 54) | def parser_general_mafft(parser=argparse.ArgumentParser()):
function parser_align_mafft (line 94) | def parser_align_mafft(parser):
function main_align_mafft (line 104) | def main_align_mafft(args):
function parser_multichr_mafft (line 131) | def parser_multichr_mafft(parser):
function multichr_mafft (line 155) | def multichr_mafft(args):
function call_snps_3 (line 203) | def call_snps_3(inFasta, outVcf, REF="KJ660346.2"):
function find_ref (line 212) | def find_ref(a, ref):
function vcf_header (line 219) | def vcf_header(a):
function make_vcf (line 229) | def make_vcf(a, ref_idx, chrom):
function full_parser (line 252) | def full_parser():
function main (line 256) | def main():
FILE: src/viral_ngs/intrahost.py
class AlleleFieldParser (line 40) | class AlleleFieldParser(object):
method __init__ (line 48) | def __init__(self, field=None, allele=None, fcount=None, rcount=None, ...
method __repr__ (line 64) | def __repr__(self):
method allele (line 69) | def allele(self):
method total (line 77) | def total(self):
method strand_counts (line 80) | def strand_counts(self):
method allele_and_strand_counts (line 84) | def allele_and_strand_counts(self):
method lib_counts (line 87) | def lib_counts(self):
method lib_bias_pval (line 94) | def lib_bias_pval(self):
function vphaser_one_sample (line 104) | def vphaser_one_sample(inBam, inConsFasta, outTab, vphaserNumThreads=None,
function filter_strand_bias (line 155) | def filter_strand_bias(isnvs, minReadsEach=None, maxBias=None):
function compute_library_bias (line 181) | def compute_library_bias(isnvs, inBam, inConsFasta):
function parse_alleles_string (line 264) | def parse_alleles_string(allelesStr):
function get_mpileup_allele_counts (line 298) | def get_mpileup_allele_counts(inBam, chrom, pos, inConsFasta, samtools=N...
function parser_vphaser_one_sample (line 344) | def parser_vphaser_one_sample(parser=argparse.ArgumentParser()):
function parser_vphaser (line 372) | def parser_vphaser(parser=argparse.ArgumentParser()):
function vphaser_main (line 381) | def vphaser_main(inBam, outTab, numThreads=None):
function tabfile_values_rename (line 396) | def tabfile_values_rename(inFile, mapFile, outFile, col=0):
function parser_tabfile_rename (line 417) | def parser_tabfile_rename(parser=argparse.ArgumentParser()):
function count_iter_items (line 439) | def count_iter_items(iterable):
function strip_accession_version (line 449) | def strip_accession_version(acc):
function merge_to_vcf (line 463) | def merge_to_vcf(
function parser_merge_to_vcf (line 902) | def parser_merge_to_vcf(parser=argparse.ArgumentParser()):
function compute_Fws (line 954) | def compute_Fws(vcfrow):
function add_Fws_vcf (line 978) | def add_Fws_vcf(inVcf, outVcf):
function parser_Fws (line 999) | def parser_Fws(parser=argparse.ArgumentParser()):
function parse_eff (line 1012) | def parse_eff(eff_field):
class SnpEffException (line 1033) | class SnpEffException(Exception):
function parse_ann (line 1037) | def parse_ann(ann_field, alleles, transcript_blacklist=None):
function iSNV_table (line 1084) | def iSNV_table(vcf_iter):
function parser_iSNV_table (line 1128) | def parser_iSNV_table(parser=argparse.ArgumentParser()):
function main_iSNV_table (line 1136) | def main_iSNV_table(args):
function iSNP_per_patient (line 1156) | def iSNP_per_patient(table, agg_fun=median):
function parser_iSNP_per_patient (line 1172) | def parser_iSNP_per_patient(parser=argparse.ArgumentParser()):
function main_iSNP_per_patient (line 1180) | def main_iSNP_per_patient(args):
function sampleIDMatch (line 1198) | def sampleIDMatch(inputString):
function full_parser (line 1212) | def full_parser():
function main (line 1216) | def main():
FILE: src/viral_ngs/kmer_utils.py
function build_kmer_db (line 27) | def build_kmer_db(seq_files, kmer_db, kmer_size=kmc.DEFAULT_KMER_SIZE, m...
function parser_build_kmer_db (line 33) | def parser_build_kmer_db(parser=argparse.ArgumentParser()):
function dump_kmer_counts (line 59) | def dump_kmer_counts(kmer_db, out_kmers, min_occs=1, max_occs=misc.MAX_I...
function parser_dump_kmer_counts (line 63) | def parser_dump_kmer_counts(parser=argparse.ArgumentParser()):
function filter_reads (line 80) | def filter_reads(kmer_db, in_reads, out_reads, db_min_occs=1, db_max_occ...
function parser_filter_reads (line 118) | def parser_filter_reads(parser=argparse.ArgumentParser()):
function kmers_binary_op (line 148) | def kmers_binary_op(op, kmer_db1, kmer_db2, kmer_db_out,
function parser_kmers_binary_op (line 155) | def parser_kmers_binary_op(parser=argparse.ArgumentParser()):
function kmers_set_counts (line 176) | def kmers_set_counts(kmer_db_in, value, kmer_db_out, threads=None):
function parser_kmers_set_counts (line 181) | def parser_kmers_set_counts(parser=argparse.ArgumentParser()):
function full_parser (line 194) | def full_parser():
function main (line 199) | def main():
FILE: src/viral_ngs/metagenomics.py
function tree_level_lookup (line 55) | def tree_level_lookup(parents, node, level_cache):
function push_up_tree_hits (line 77) | def push_up_tree_hits(parents, hits, min_support_percent=None, min_suppo...
function file_lines (line 125) | def file_lines(filename):
function collect_children (line 132) | def collect_children(children, original_taxids):
function collect_parents (line 142) | def collect_parents(parents, taxids):
function parser_subset_taxonomy (line 154) | def parser_subset_taxonomy(parser=argparse.ArgumentParser()):
function subset_taxonomy (line 204) | def subset_taxonomy(taxDb, outputDb, whitelistTaxids=None, whitelistTaxi...
function parser_filter_taxids_to_focal_hits (line 304) | def parser_filter_taxids_to_focal_hits(parser=argparse.ArgumentParser()):
function filter_taxids_to_focal_hits (line 314) | def filter_taxids_to_focal_hits(taxids_tsv, focal_report_tsv, taxdb_dir,...
function taxa_hits_from_tsv (line 344) | def taxa_hits_from_tsv(f, taxid_column=2):
function parser_kraken2 (line 353) | def parser_kraken2(parser=argparse.ArgumentParser()):
function main_kraken2 (line 370) | def main_kraken2(db, inBams, outReports=None, outReads=None, min_base_qu...
function parser_kb (line 383) | def parser_kb(parser=argparse.ArgumentParser()):
function kb_python (line 407) | def kb_python(in_bam, index=None, t2g=None, kmer_len=31, parity='single'...
function parser_kma (line 441) | def parser_kma(parser=argparse.ArgumentParser()):
function main_kma (line 450) | def main_kma(db, inBams, outPrefixes=None, threads=None):
function parser_kma_build (line 459) | def parser_kma_build(parser=argparse.ArgumentParser()):
function main_kma_build (line 467) | def main_kma_build(ref_fasta, db_prefix, threads=None):
function parser_krona (line 474) | def parser_krona(parser=argparse.ArgumentParser()):
function main_krona (line 489) | def main_krona(inReports, db, outHtml, queryColumn=None, taxidColumn=Non...
function parser_metagenomic_report_merge (line 541) | def parser_metagenomic_report_merge(parser=argparse.ArgumentParser()):
function metagenomic_report_merge (line 555) | def metagenomic_report_merge(metagenomic_reports, out_krona_input):
function parser_filter_bam_to_taxa (line 569) | def parser_filter_bam_to_taxa(parser=argparse.ArgumentParser()):
function filter_bam_to_taxa (line 586) | def filter_bam_to_taxa(in_bam, read_IDs_to_tax_IDs, out_bam,
function parser_kraken_taxlevel_summary (line 673) | def parser_kraken_taxlevel_summary(parser=argparse.ArgumentParser()):
function taxlevel_summary (line 689) | def taxlevel_summary(summary_files_in, json_out, csv_out, tax_headings, ...
function parser_kraken_taxlevel_plurality (line 944) | def parser_kraken_taxlevel_plurality(parser=argparse.ArgumentParser()):
function taxlevel_plurality (line 953) | def taxlevel_plurality(summary_file, tax_heading, out_report, min_reads):
function parser_kb_extract (line 1051) | def parser_kb_extract(parser=argparse.ArgumentParser()):
function kb_extract (line 1071) | def kb_extract(in_bam, index, t2g, targets, protein=False, out_dir=None,...
function parser_kb_top_taxa (line 1109) | def parser_kb_top_taxa(parser=argparse.ArgumentParser()):
function kb_top_taxa (line 1118) | def kb_top_taxa(counts_tar, out_report, id_to_tax_map=None, target_taxon...
function parser_kb_merge_h5ads (line 1249) | def parser_kb_merge_h5ads(parser=argparse.ArgumentParser()):
function kb_merge_h5ads (line 1255) | def kb_merge_h5ads(in_count_tars, out_h5ad, tmp_dir=None):
function parser_krona_build (line 1276) | def parser_krona_build(parser=argparse.ArgumentParser()):
function krona_build (line 1283) | def krona_build(db, taxdump_tar_gz=None, get_accessions=False):
function parser_kraken2_build (line 1292) | def parser_kraken2_build(parser=argparse.ArgumentParser()):
function kraken2_build (line 1312) | def kraken2_build(db,
function parser_kb_build (line 1341) | def parser_kb_build(parser=argparse.ArgumentParser()):
function kb_build (line 1351) | def kb_build(ref_fasta, index, workflow='standard', kmer_len=31, protein...
function full_parser (line 1373) | def full_parser():
function main (line 1377) | def main():
FILE: src/viral_ngs/ncbi.py
function fasta_chrlens (line 29) | def fasta_chrlens(fasta):
function tbl_transfer_common (line 36) | def tbl_transfer_common(cmap, ref_tbl, out_tbl, alt_chrlens, oob_clip=Fa...
function tbl_transfer (line 141) | def tbl_transfer(ref_fasta, ref_tbl, alt_fasta, out_tbl, oob_clip=False,...
function parser_tbl_transfer (line 152) | def parser_tbl_transfer(parser=argparse.ArgumentParser()):
function tbl_transfer_multichr (line 182) | def tbl_transfer_multichr(ref_fastas, ref_tbls, alt_fasta, out_dir, oob_...
function parser_tbl_transfer_multichr (line 209) | def parser_tbl_transfer_multichr(parser=argparse.ArgumentParser()):
function tbl_transfer_prealigned (line 241) | def tbl_transfer_prealigned(inputFasta, refFasta, refAnnotTblFiles, outp...
function parser_tbl_transfer_prealigned (line 321) | def parser_tbl_transfer_prealigned(parser=argparse.ArgumentParser()):
function fetch_fastas (line 356) | def fetch_fastas(accession_IDs, destinationDir, emailAddress, forceOverw...
function fetch_feature_tables (line 374) | def fetch_feature_tables(accession_IDs, destinationDir, emailAddress, fo...
function fetch_genbank_records (line 392) | def fetch_genbank_records(accession_IDs, destinationDir, emailAddress, f...
function parser_fetch_reference_common (line 410) | def parser_fetch_reference_common(parser=argparse.ArgumentParser()):
function parser_fetch_fastas (line 451) | def parser_fetch_fastas(parser):
function parser_fetch_feature_tables (line 462) | def parser_fetch_feature_tables(parser):
function parser_fetch_genbank_records (line 473) | def parser_fetch_genbank_records(parser):
function biosample_to_genbank (line 484) | def biosample_to_genbank(attributes, num_segments, taxid, out_genbank_sm...
function parser_biosample_to_genbank (line 566) | def parser_biosample_to_genbank(parser=argparse.ArgumentParser()):
function fasta2fsa (line 594) | def fasta2fsa(infname, outdir, biosample=None):
function multi_smt_table (line 615) | def multi_smt_table(in_table, out_cmt):
function make_structured_comment_file (line 628) | def make_structured_comment_file(cmt_fname, name=None, seq_tech=None, co...
function prep_genbank_files (line 651) | def prep_genbank_files(templateFile, fasta_files, annotDir,
function parser_prep_genbank_files (line 727) | def parser_prep_genbank_files(parser=argparse.ArgumentParser()):
function prep_sra_table (line 756) | def prep_sra_table(lib_fname, biosampleFile, md5_fname, outFile):
function parser_prep_sra_table (line 795) | def parser_prep_sra_table(parser=argparse.ArgumentParser()):
function full_parser (line 815) | def full_parser():
function main (line 819) | def main():
FILE: src/viral_ngs/phylo/feature_table.py
class SeqPosition (line 8) | class SeqPosition(object):
method __init__ (line 9) | def __init__(self, position, location_operator=None, allow_fuzzy=True):
method __str__ (line 14) | def __str__(self):
method __int__ (line 20) | def __int__(self):
method is_fuzzy (line 23) | def is_fuzzy(self):
method __eq__ (line 26) | def __eq__(self, other):
method __ne__ (line 29) | def __ne__(self, other):
method __lt__ (line 32) | def __lt__(self, other):
method __le__ (line 35) | def __le__(self, other):
method __gt__ (line 38) | def __gt__(self, other):
method __ge__ (line 41) | def __ge__(self, other):
class SeqQualifier (line 44) | class SeqQualifier(object):
method __init__ (line 45) | def __init__(self, qualifier_key, qualifier_value):
method __str__ (line 49) | def __str__(self):
class SeqLocation (line 55) | class SeqLocation(object):
method __init__ (line 56) | def __init__(self, start_pos, end_pos, feature_type=None):
method __str__ (line 61) | def __str__(self):
method __eq__ (line 69) | def __eq__(self, other):
method __ne__ (line 72) | def __ne__(self, other):
method __lt__ (line 75) | def __lt__(self, other):
method __le__ (line 78) | def __le__(self, other):
method __gt__ (line 81) | def __gt__(self, other):
method __ge__ (line 84) | def __ge__(self, other):
class SeqFeature (line 87) | class SeqFeature(object):
method __init__ (line 88) | def __init__(self, locations=None, feature_type=None):
method add_location (line 93) | def add_location(self, location):
method add_location (line 96) | def add_location(self, start, location_operator_start, end, location_o...
method sort (line 101) | def sort(self):
method add_qualifier (line 105) | def add_qualifier(self, qualifier_key, qualifier_value, *args, **kwargs):
method add_note (line 108) | def add_note(self, note_text):
method lines (line 112) | def lines(self):
class AttrDict (line 127) | class AttrDict(dict):
method __init__ (line 136) | def __init__(self, *args, **kwargs):
class FeatureTable (line 140) | class FeatureTable(object):
method __init__ (line 144) | def __init__(self, filepath=None, valid_feature_types=None):
method _parse_line (line 160) | def _parse_line(self, line):
method features (line 171) | def features(self):
method default_feature_types (line 175) | def default_feature_types(self):
method add_feature (line 179) | def add_feature(self, feature):
method read_feature_table (line 182) | def read_feature_table(self, filepath, map_function=None, allow_fuzzy=...
method _is_valid_location (line 233) | def _is_valid_location(self, location):
method remap_locations (line 247) | def remap_locations(self, map_function=None):
method lines (line 327) | def lines(self, exclude_patterns=None):
FILE: src/viral_ngs/phylo/genbank.py
function parse_accession_str (line 15) | def parse_accession_str(chr_ref):
function get_feature_table_id (line 25) | def get_feature_table_id(featureTableFile):
function _seq_chunks (line 46) | def _seq_chunks(seq, n):
function _fetch_from_nuccore (line 52) | def _fetch_from_nuccore(accessionList, destinationDir, emailAddress,
function fetch_fastas_from_genbank (line 176) | def fetch_fastas_from_genbank(
function fetch_feature_tables_from_genbank (line 193) | def fetch_feature_tables_from_genbank(
function fetch_full_records_from_genbank (line 210) | def fetch_full_records_from_genbank(
FILE: src/viral_ngs/phylo/mafft.py
class MafftTool (line 23) | class MafftTool(core.Tool):
method __init__ (line 25) | def __init__(self, install_methods=None):
method version (line 30) | def version(self):
method _get_tool_version (line 35) | def _get_tool_version(self):
method __seqIdsAreAllUnique (line 38) | def __seqIdsAreAllUnique(self, filePath, inputFormat="fasta"):
method execute (line 53) | def execute(
FILE: src/viral_ngs/phylo/mummer.py
class MummerTool (line 22) | class MummerTool(core.Tool):
method __init__ (line 24) | def __init__(self, install_methods=None):
method version (line 29) | def version(self):
method _get_tool_version (line 34) | def _get_tool_version(self):
method executable_path (line 37) | def executable_path(self):
method execute (line 44) | def execute(self, refFasta, qryFastas):
method nucmer (line 50) | def nucmer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=N...
method promer (line 75) | def promer(self, refFasta, qryFasta, outDelta, extend=None, breaklen=N...
method delta_filter (line 100) | def delta_filter(self, inDelta, outDelta):
method show_tiling (line 107) | def show_tiling(self, inDelta, outTiling, outFasta=None,
method trim_contigs (line 136) | def trim_contigs(self, refFasta, contigsFasta, outFasta,
method scaffold_contigs (line 182) | def scaffold_contigs(self, refFasta, contigsFasta, outFasta,
method scaffold_contigs_custom (line 210) | def scaffold_contigs_custom(self, refFasta, contigsFasta, outFasta,
method align_one_to_one (line 341) | def align_one_to_one(self, refFasta, otherFasta, outFasta):
function contig_chooser (line 378) | def contig_chooser(alt_seqs, ref_len, coords_debug=""):
class AmbiguousAlignmentException (line 454) | class AmbiguousAlignmentException(Exception):
class AlignsReader (line 457) | class AlignsReader(object):
method __init__ (line 460) | def __init__(self, aligns_file, ref_fasta=None):
method _load_align (line 469) | def _load_align(self):
method _load_fastas (line 525) | def _load_fastas(self):
method get_alignments (line 529) | def get_alignments(self):
method get_intervals (line 533) | def get_intervals(self):
method _dummy_row (line 554) | def _dummy_row(self, start, stop, filler='N'):
method get_ref_seq (line 559) | def get_ref_seq(self, start, stop):
method retrieve_alts_by_ref (line 566) | def retrieve_alts_by_ref(self, start, stop, aln_start=None, aln_stop=N...
method _aln_to_alt_seq (line 597) | def _aln_to_alt_seq(self, aln, start, stop):
FILE: src/viral_ngs/phylo/muscle.py
class MuscleTool (line 22) | class MuscleTool(core.Tool):
method __init__ (line 24) | def __init__(self, install_methods=None):
method version (line 29) | def version(self):
method _get_tool_version (line 34) | def _get_tool_version(self):
method execute (line 38) | def execute(
FILE: src/viral_ngs/phylo/snpeff.py
class SnpEff (line 29) | class SnpEff(core.Tool):
method __init__ (line 31) | def __init__(self, install_methods=None, extra_genomes=None):
method version (line 39) | def version(self):
method execute (line 42) | def execute(self, command, args, JVMmemory=None, stdin=None, stdout=No...
method has_genome (line 60) | def has_genome(self, genome):
method download_db (line 67) | def download_db(self, dbname, verbose=False):
method create_db (line 75) | def create_db(self, accessions, emailAddress=None, JVMmemory=None):
method available_databases (line 135) | def available_databases(self):
method annotate_vcf (line 159) | def annotate_vcf(self, inVcf, genomes, outVcf, emailAddress=None, JVMm...
function get_data_dir (line 220) | def get_data_dir(config_file):
function add_genomes_to_snpeff_config_file (line 230) | def add_genomes_to_snpeff_config_file(config_file, new_genomes):
FILE: src/viral_ngs/phylo/vcf.py
function make_intervals (line 18) | def make_intervals(i, n, fasta, chr_prefix='', verbose=False):
function sliding_windows (line 59) | def sliding_windows(fasta, width, offset, chr_prefix=''):
class GenomePosition (line 75) | class GenomePosition(object):
method __init__ (line 81) | def __init__(self, seqDb):
method get_gpos (line 93) | def get_gpos(self, c, p):
method get_chr_pos (line 99) | def get_chr_pos(self, gpos):
function get_chrlens (line 110) | def get_chrlens(inFile):
function calc_maf (line 145) | def calc_maf(genos, ancestral=None, ploidy=1):
class TabixReader (line 183) | class TabixReader(pysam.TabixFile):
method __init__ (line 192) | def __init__(self, inFile, parser=pysam.asTuple()):
method __enter__ (line 208) | def __enter__(self):
method __exit__ (line 211) | def __exit__(self, exc_type, exc_val, exc_tb):
method chroms (line 219) | def chroms(self):
method get (line 222) | def get(self, chrom=None, start=None, stop=None, region=None):
function get_pos_from_vcf_record (line 228) | def get_pos_from_vcf_record(vcfrec):
function bytes_to_string (line 233) | def bytes_to_string(o):
class VcfReader (line 239) | class VcfReader(TabixReader):
method __init__ (line 247) | def __init__(self, inFile, ploidy=1, parser=pysam.asVCF()):
method samples (line 268) | def samples(self):
method chrlens (line 271) | def chrlens(self):
method get_positions (line 274) | def get_positions(self, c=None, start=None, stop=None, region=None):
method get_range (line 279) | def get_range(self, c=None, start=None, stop=None, region=None, as_str...
method get_snp_genos (line 311) | def get_snp_genos(self, c, p, as_strings=True):
method getFullSequences (line 320) | def getFullSequences(self, c, start, stop, samples,
function replaceAlleles (line 380) | def replaceAlleles(sample, seq, vcf_records):
FILE: src/viral_ngs/phylo/vphaser2.py
class Vphaser2Tool (line 19) | class Vphaser2Tool(core.Tool):
method __init__ (line 21) | def __init__(self, install_methods=None):
method execute (line 26) | def execute(self, inBam, outDir, numThreads=None): # pylint: disabl...
method iterate (line 51) | def iterate(self, inBam, numThreads=None):
FILE: src/viral_ngs/read_utils.py
function parser_index_fasta_samtools (line 51) | def parser_index_fasta_samtools(parser=argparse.ArgumentParser()):
function main_index_fasta_samtools (line 58) | def main_index_fasta_samtools(args):
function parser_index_fasta_picard (line 71) | def parser_index_fasta_picard(parser=argparse.ArgumentParser()):
function main_index_fasta_picard (line 89) | def main_index_fasta_picard(args):
function parser_mkdup_picard (line 106) | def parser_mkdup_picard(parser=argparse.ArgumentParser()):
function main_mkdup_picard (line 133) | def main_mkdup_picard(args):
function parser_revert_sam_common (line 152) | def parser_revert_sam_common(parser=argparse.ArgumentParser()):
function parser_revert_bam_picard (line 183) | def parser_revert_bam_picard(parser=argparse.ArgumentParser()):
function main_revert_bam_picard (line 203) | def main_revert_bam_picard(inBam, outBam, clear_tags=False, tags_to_clea...
function revert_bam_if_aligned (line 220) | def revert_bam_if_aligned(inBam, revert_bam=None, clear_tags=True, tags_...
function parser_picard (line 267) | def parser_picard(parser=argparse.ArgumentParser()):
function main_picard (line 284) | def main_picard(args):
function parser_sort_bam (line 297) | def parser_sort_bam(parser=argparse.ArgumentParser()):
function main_sort_bam (line 336) | def main_sort_bam(args):
function parser_downsample_bams (line 356) | def parser_downsample_bams(parser=argparse.ArgumentParser()):
function main_downsample_bams (line 381) | def main_downsample_bams(in_bams, out_path, specified_read_count=None, d...
function parser_merge_bams (line 477) | def parser_merge_bams(parser=argparse.ArgumentParser()):
function main_merge_bams (line 496) | def main_merge_bams(args):
function parser_filter_bam (line 510) | def parser_filter_bam(parser=argparse.ArgumentParser()):
function main_filter_bam (line 528) | def main_filter_bam(args):
function fastq_to_bam (line 547) | def fastq_to_bam(
function parser_fastq_to_bam (line 607) | def parser_fastq_to_bam(parser=argparse.ArgumentParser()):
function join_paired_fastq (line 642) | def join_paired_fastq(
function parser_join_paired_fastq (line 656) | def parser_join_paired_fastq(parser=argparse.ArgumentParser()):
function split_bam (line 675) | def split_bam(inBam, outBams):
function parser_split_bam (line 724) | def parser_split_bam(parser=argparse.ArgumentParser()):
function parser_reheader_bam (line 739) | def parser_reheader_bam(parser=argparse.ArgumentParser()):
function main_reheader_bam (line 748) | def main_reheader_bam(args):
function parser_reheader_bams (line 776) | def parser_reheader_bams(parser=argparse.ArgumentParser()):
function main_reheader_bams (line 783) | def main_reheader_bams(args):
function mvicuna_fastqs_to_readlist (line 820) | def mvicuna_fastqs_to_readlist(inFastq1, inFastq2, readList):
function rmdup_cdhit_bam (line 848) | def rmdup_cdhit_bam(inBam, outBam, max_mismatches=None, jvm_memory=None):
function parser_rmdup_cdhit_bam (line 897) | def parser_rmdup_cdhit_bam(parser=argparse.ArgumentParser()):
function _merge_fastqs_and_mvicuna (line 913) | def _merge_fastqs_and_mvicuna(lb, files):
function rmdup_mvicuna_bam (line 943) | def rmdup_mvicuna_bam(inBam, outBam, threads=None):
function parser_rmdup_mvicuna_bam (line 991) | def parser_rmdup_mvicuna_bam(parser=argparse.ArgumentParser()):
function rmdup_bbnorm_bam (line 1002) | def rmdup_bbnorm_bam(inBam, outBam,
function parser_rmdup_bbnorm_bam (line 1077) | def parser_rmdup_bbnorm_bam(parser=argparse.ArgumentParser()):
function parser_rmdup_prinseq_fastq (line 1126) | def parser_rmdup_prinseq_fastq(parser=argparse.ArgumentParser()):
function main_rmdup_prinseq_fastq (line 1144) | def main_rmdup_prinseq_fastq(args):
function filter_bam_mapped_only (line 1156) | def filter_bam_mapped_only(inBam, outBam):
function parser_filter_bam_mapped_only (line 1166) | def parser_filter_bam_mapped_only(parser=argparse.ArgumentParser()):
function parser_novoalign (line 1179) | def parser_novoalign(parser=argparse.ArgumentParser()):
function main_novoalign (line 1201) | def main_novoalign(args):
function parser_novoindex (line 1217) | def parser_novoindex(parser=argparse.ArgumentParser()):
function main_novoindex (line 1229) | def main_novoindex(args):
function align_and_fix (line 1239) | def align_and_fix(
function parser_align_and_fix (line 1352) | def parser_align_and_fix(parser=argparse.ArgumentParser()):
function filter_bam_to_proper_primary_mapped_reads (line 1407) | def filter_bam_to_proper_primary_mapped_reads(inBam, outBam, doNotRequir...
function parser_filter_bam_to_proper_primary_mapped_reads (line 1430) | def parser_filter_bam_to_proper_primary_mapped_reads(parser=argparse.Arg...
function minimap2_idxstats (line 1456) | def minimap2_idxstats(inBam, refFasta, outStats, outReadlist=None, threa...
function parser_minimap2_idxstats (line 1474) | def parser_minimap2_idxstats(parser=argparse.ArgumentParser()):
function bwamem_idxstats (line 1487) | def bwamem_idxstats(inBam, refFasta, outBam=None, outStats=None,
function parser_bwamem_idxstats (line 1530) | def parser_bwamem_idxstats(parser=argparse.ArgumentParser()):
function parser_extract_tarball (line 1584) | def parser_extract_tarball(parser=argparse.ArgumentParser()):
function main_extract_tarball (line 1597) | def main_extract_tarball(*args, **kwargs):
function fasta_read_names (line 1607) | def fasta_read_names(in_fasta, out_read_names):
function read_names (line 1621) | def read_names(in_reads, out_read_names, threads=None):
function parser_read_names (line 1630) | def parser_read_names(parser=argparse.ArgumentParser()):
function trim_rmdup_subsamp_reads (line 1645) | def trim_rmdup_subsamp_reads(inBam, clipDb, outBam, n_reads=100000, trim...
function parser_trim_rmdup_subsamp (line 1829) | def parser_trim_rmdup_subsamp(parser=argparse.ArgumentParser()):
function full_parser (line 1855) | def full_parser():
function main (line 1859) | def main():
FILE: src/viral_ngs/reports.py
function get_assembly_stats (line 35) | def get_assembly_stats(sample,
function genome_coverage_stats_only (line 131) | def genome_coverage_stats_only(mapped_bam, chr_name=None, cov_thresholds...
function assembly_stats (line 144) | def assembly_stats(samples, outFile, cov_thresholds, assembly_dir, assem...
function parser_assembly_stats (line 164) | def parser_assembly_stats(parser=argparse.ArgumentParser()):
function _get_samples_from_bam (line 195) | def _get_samples_from_bam(bam):
function _get_chrs_from_bam (line 198) | def _get_chrs_from_bam(bam):
function parser_coverage_only (line 202) | def parser_coverage_only(parser=argparse.ArgumentParser()):
function coverage_only (line 213) | def coverage_only(mapped_bams, out_report, cov_thresholds=(1, 5, 20, 100)):
function consolidate_fastqc (line 244) | def consolidate_fastqc(inDirs, outFile):
function parser_consolidate_fastqc (line 266) | def parser_consolidate_fastqc(parser=argparse.ArgumentParser()):
function get_bam_info (line 277) | def get_bam_info(bamstats_dir):
function get_lib_info (line 290) | def get_lib_info(runfile):
function get_earliest_date (line 306) | def get_earliest_date(inDir):
function consolidate_spike_count (line 312) | def consolidate_spike_count(in_dir, out_file):
function parser_consolidate_spike_count (line 328) | def parser_consolidate_spike_count(parser=argparse.ArgumentParser()):
function aggregate_spike_count (line 339) | def aggregate_spike_count(in_dir, out_file):
function parser_aggregate_spike_count (line 371) | def parser_aggregate_spike_count(parser=argparse.ArgumentParser()):
function aggregate_alignment_counts (line 380) | def aggregate_alignment_counts(in_reports, out_file):
function parser_aggregate_alignment_counts (line 408) | def parser_aggregate_alignment_counts(parser=argparse.ArgumentParser()):
function parser_plot_coverage_common (line 420) | def parser_plot_coverage_common(parser=argparse.ArgumentParser()): # ...
function plot_coverage (line 543) | def plot_coverage(
function parser_plot_coverage (line 758) | def parser_plot_coverage(parser=argparse.ArgumentParser()):
function align_and_plot_coverage (line 774) | def align_and_plot_coverage(
function parser_align_and_plot_coverage (line 890) | def parser_align_and_plot_coverage(parser=argparse.ArgumentParser()):
function parser_fastqc (line 952) | def parser_fastqc(parser=argparse.ArgumentParser()):
function full_parser (line 964) | def full_parser():
function main (line 968) | def main():
FILE: src/viral_ngs/taxon_filter.py
function parser_deplete (line 58) | def parser_deplete(parser=argparse.ArgumentParser()):
function main_deplete (line 106) | def main_deplete(args):
function filter_lastal_bam (line 179) | def filter_lastal_bam(
function parser_filter_lastal_bam (line 226) | def parser_filter_lastal_bam(parser=argparse.ArgumentParser()):
function deplete_bmtagger_bam (line 291) | def deplete_bmtagger_bam(inBam, db, outBam, srprism_memory=7168):
function parser_deplete_bam_bmtagger (line 340) | def parser_deplete_bam_bmtagger(parser=argparse.ArgumentParser()):
function main_deplete_bam_bmtagger (line 356) | def main_deplete_bam_bmtagger(args):
function multi_db_deplete_bam (line 377) | def multi_db_deplete_bam(inBam, refDbs, deplete_method, outBam, **kwargs):
function _run_blastn_chunk (line 413) | def _run_blastn_chunk(db, input_fasta, out_hits, blast_threads, task=Non...
function blastn_chunked_fasta (line 428) | def blastn_chunked_fasta(fasta, db, out_hits, chunkSize=1000000, threads...
function deplete_blastn_bam (line 518) | def deplete_blastn_bam(inBam, db, outBam, threads=None, chunkSize=1000000):
function chunk_blast_hits (line 545) | def chunk_blast_hits(inFasta, db, blast_hits_output, threads=None, chunk...
function parser_chunk_blast_hits (line 564) | def parser_chunk_blast_hits(parser=argparse.ArgumentParser()):
function parser_deplete_blastn_bam (line 578) | def parser_deplete_blastn_bam(parser=argparse.ArgumentParser()):
function main_deplete_blastn_bam (line 590) | def main_deplete_blastn_bam(args):
function extract_build_or_use_database (line 607) | def extract_build_or_use_database(db, db_build_command, db_extension_to_...
function deplete_bwa_bam (line 647) | def deplete_bwa_bam(inBam, db, outBam, threads=None, clear_tags=True, ta...
function parser_deplete_bwa_bam (line 675) | def parser_deplete_bwa_bam(parser=argparse.ArgumentParser()):
function main_deplete_bwa_bam (line 690) | def main_deplete_bwa_bam(args):
function deplete_minimap2_bam (line 710) | def deplete_minimap2_bam(inBam, db, outBam, threads=None):
function parser_deplete_minimap2_bam (line 736) | def parser_deplete_minimap2_bam(parser=argparse.ArgumentParser()):
function main_deplete_minimap2_bam (line 746) | def main_deplete_minimap2_bam(args):
function lastal_build_db (line 771) | def lastal_build_db(inputFasta, outputDirectory, outputFilePrefix):
function parser_lastal_build_db (line 783) | def parser_lastal_build_db(parser=argparse.ArgumentParser()):
function merge_compressed_files (line 801) | def merge_compressed_files(inFiles, outFile, sep=''):
function bwa_build_db (line 821) | def bwa_build_db(inputFasta, outputDirectory, outputFilePrefix):
function parser_bwa_build_db (line 848) | def parser_bwa_build_db(parser=argparse.ArgumentParser()):
function blastn_build_db (line 868) | def blastn_build_db(inputFasta, outputDirectory, outputFilePrefix):
function parser_blastn_build_db (line 892) | def parser_blastn_build_db(parser=argparse.ArgumentParser()):
function bmtagger_build_db (line 911) | def bmtagger_build_db(inputFasta, outputDirectory, outputFilePrefix, wor...
function parser_bmtagger_build_db (line 941) | def parser_bmtagger_build_db(parser=argparse.ArgumentParser()):
function full_parser (line 967) | def full_parser():
function main (line 971) | def main():
FILE: tests/__init__.py
function assert_equal_contents (line 38) | def assert_equal_contents(testCase, filename1, filename2):
function assert_equal_bam_reads (line 43) | def assert_equal_bam_reads(testCase, bam_filename1, bam_filename2):
function assert_md5_equal_to_line_in_file (line 92) | def assert_md5_equal_to_line_in_file(testCase, filename, checksum_file, ...
class TestCaseWithTmp (line 114) | class TestCaseWithTmp(unittest.TestCase):
method assertEqualContents (line 117) | def assertEqualContents(self, f1, f2):
method assertEqualFasta (line 120) | def assertEqualFasta(self, f1, f2):
method assertEqualFastaSeqs (line 126) | def assertEqualFastaSeqs(self, f1, f2):
method input (line 132) | def input(self, fname):
method inputs (line 136) | def inputs(self, *fnames):
method assertEqualSamHeaders (line 140) | def assertEqualSamHeaders(self, tested_samfile, expected_samfile, othe...
function assert_valid_feature_table (line 190) | def assert_valid_feature_table(testCase, tbl_file, fasta_file, temp_dir):
function assert_none_executable (line 211) | def assert_none_executable():
FILE: tests/conftest.py
function timer (line 19) | def timer():
function pytest_addoption (line 25) | def pytest_addoption(parser):
function pytest_configure (line 40) | def pytest_configure(config):
function pytest_collection_modifyitems (line 49) | def pytest_collection_modifyitems(config, items):
function _tmpdir_aux (line 67) | def _tmpdir_aux(base_dir, scope, name):
function tmpdir_session (line 74) | def tmpdir_session(request, tmpdir_factory):
function tmpdir_module (line 81) | def tmpdir_module(request, tmpdir_session):
function tmpdir_class (line 87) | def tmpdir_class(request, tmpdir_module):
function tmpdir_function (line 94) | def tmpdir_function(request, tmpdir_class, monkeypatch):
function monkeypatch_function_result (line 103) | def monkeypatch_function_result(monkeypatch):
class FixtureReporter (line 146) | class FixtureReporter:
method __init__ (line 148) | def __init__(self, config):
method pytest_fixture_setup (line 156) | def pytest_fixture_setup(self, fixturedef, request):
method pytest_terminal_summary (line 167) | def pytest_terminal_summary(self, terminalreporter, exitstatus):
FILE: tests/unit/assemble/test_assembly.py
function makeFasta (line 31) | def makeFasta(seqs, outFasta):
class TestCommandHelp (line 37) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 39) | def test_help_parser_for_each_command(self):
class TestRefineAssemble (line 45) | class TestRefineAssemble(TestCaseWithTmp):
method _align_with_minimap2 (line 48) | def _align_with_minimap2(self, refFasta, inBam):
method test_empty_input_bam_assembly (line 57) | def test_empty_input_bam_assembly(self):
method test_aligned_empty_input_bam_assembly (line 74) | def test_aligned_empty_input_bam_assembly(self):
method test_empty_input_fasta_assembly (line 97) | def test_empty_input_fasta_assembly(self):
method test_empty_input_succeed (line 111) | def test_empty_input_succeed(self):
class TestNormalizeCoverage (line 126) | class TestNormalizeCoverage(TestCaseWithTmp):
method test_help_parser (line 129) | def test_help_parser(self):
method test_normalize_coverage_on_aligned_bam (line 135) | def test_normalize_coverage_on_aligned_bam(self):
class TestAssembleSpades (line 159) | class TestAssembleSpades(TestCaseWithTmp):
method test_assembly (line 162) | def test_assembly(self):
method test_assembly_with_previously_assembled_contigs (line 174) | def test_assembly_with_previously_assembled_contigs(self):
method test_empty_input_succeed (line 188) | def test_empty_input_succeed(self):
method test_always_succeed (line 196) | def test_always_succeed(self):
class TestAmbiguityBases (line 206) | class TestAmbiguityBases(unittest.TestCase):
method test_non_failure (line 208) | def test_non_failure(self):
class TestUndirectedGraph (line 220) | class TestUndirectedGraph(unittest.TestCase):
method test_simple (line 221) | def test_simple(self):
method test_disconnected (line 229) | def test_disconnected(self):
method test_both (line 236) | def test_both(self):
class TestOrderAndOrient (line 251) | class TestOrderAndOrient(TestCaseWithTmp):
method test_varicella_big (line 254) | def test_varicella_big(self):
method test_lassa_multisegment (line 266) | def test_lassa_multisegment(self):
method test_lassa_multisegment_refsel (line 277) | def test_lassa_multisegment_refsel(self):
method test_influenza_multisegment (line 291) | def test_influenza_multisegment(self):
method test_ebov_palindrome (line 302) | def test_ebov_palindrome(self):
method test_ebov_palindrome_refsel (line 315) | def test_ebov_palindrome_refsel(self):
method test_hiv_wraparound (line 326) | def test_hiv_wraparound(self):
method test_alternate_contigs (line 339) | def test_alternate_contigs(self):
method test_lassa_protein (line 355) | def test_lassa_protein(self):
method test_multi_overlap (line 367) | def test_multi_overlap(self):
method test_ambig_align (line 379) | def test_ambig_align(self):
method test_ambig_align_ebov (line 396) | def test_ambig_align_ebov(self):
method test_obscure_mummer3_bug (line 413) | def test_obscure_mummer3_bug(self):
method test_not_all_segments_fail (line 426) | def test_not_all_segments_fail(self):
method test_not_all_segments_succeed (line 436) | def test_not_all_segments_succeed(self):
method test_empty_output_succeed (line 446) | def test_empty_output_succeed(self):
class TestGap2Seq (line 459) | class TestGap2Seq(TestCaseWithTmp):
method test_gapfill (line 462) | def test_gapfill(self):
method test_empty_fasta_input (line 473) | def test_empty_fasta_input(self):
class TestImputeFromReference (line 483) | class TestImputeFromReference(TestCaseWithTmp):
method test_varicella_big_muscle (line 487) | def test_varicella_big_muscle(self):
method test_varicella_big_mummer (line 504) | def test_varicella_big_mummer(self):
method test_small_muscle (line 522) | def test_small_muscle(self):
method test_small_mafft (line 539) | def test_small_mafft(self):
method test_small_mummer (line 556) | def test_small_mummer(self):
method test_empty_fasta_input (line 573) | def test_empty_fasta_input(self):
class TestSkaniReferenceSelection (line 588) | class TestSkaniReferenceSelection(TestCaseWithTmp):
method test_skani_contigs_to_refs (line 591) | def test_skani_contigs_to_refs(self):
method test_skani_no_big_contigs (line 618) | def test_skani_no_big_contigs(self):
method test_skani_no_matches (line 639) | def test_skani_no_matches(self):
method test_output_sorted_by_product (line 660) | def test_output_sorted_by_product(self):
method test_sort_skani_table_empty_file (line 682) | def test_sort_skani_table_empty_file(self):
method test_sort_skani_table_header_only (line 699) | def test_sort_skani_table_header_only(self):
class TestMutableSequence (line 721) | class TestMutableSequence(unittest.TestCase):
method test_bad_coords (line 724) | def test_bad_coords(self):
method test_good_coords (line 730) | def test_good_coords(self):
method test_modify_one (line 738) | def test_modify_one(self):
method test_modify_blank (line 756) | def test_modify_blank(self):
method test_modify_insertions (line 762) | def test_modify_insertions(self):
method test_modify_deletions (line 771) | def test_modify_deletions(self):
method test_modify_deletions_simple (line 787) | def test_modify_deletions_simple(self):
method test_modify_deletions_remember (line 792) | def test_modify_deletions_remember(self):
class TestManualSnpCaller (line 802) | class TestManualSnpCaller(unittest.TestCase):
method test_missing_dp (line 805) | def test_missing_dp(self):
method test_dp_inaccurate (line 811) | def test_dp_inaccurate(self):
method test_invariant_sites (line 823) | def test_invariant_sites(self):
method test_het_edgecases (line 841) | def test_het_edgecases(self):
method test_indels (line 872) | def test_indels(self):
method test_vcf_to_seqs_indels1 (line 887) | def test_vcf_to_seqs_indels1(self):
method test_vcf_to_seqs_indels2 (line 896) | def test_vcf_to_seqs_indels2(self):
class TestDeambigAndTrimFasta (line 909) | class TestDeambigAndTrimFasta(TestCaseWithTmp):
method run_method (line 912) | def run_method(self, inseqs, parser_fun):
method test_trim_fasta (line 920) | def test_trim_fasta(self):
method test_deambig_fasta (line 931) | def test_deambig_fasta(self):
class TestContigChooser (line 948) | class TestContigChooser(unittest.TestCase):
method test_no_seqs (line 951) | def test_no_seqs(self):
method test_one_seq (line 956) | def test_one_seq(self):
method test_most_popular_seq (line 961) | def test_most_popular_seq(self):
method test_most_popular_seq_len (line 969) | def test_most_popular_seq_len(self):
method test_same_as_ref_len (line 985) | def test_same_as_ref_len(self):
class TestWgsimTool (line 991) | class TestWgsimTool(TestCaseWithTmp):
method test_slice_fasta_whole_sequence (line 994) | def test_slice_fasta_whole_sequence(self):
method test_slice_fasta_with_coordinates (line 1013) | def test_slice_fasta_with_coordinates(self):
method test_slice_fasta_no_params (line 1031) | def test_slice_fasta_no_params(self):
method test_coverage_to_read_pairs (line 1050) | def test_coverage_to_read_pairs(self):
class TestSimulateIlluminaReads (line 1075) | class TestSimulateIlluminaReads(TestCaseWithTmp):
method setUp (line 1078) | def setUp(self):
method count_bam_reads (line 1086) | def count_bam_reads(self, bam_file):
method expected_read_count (line 1094) | def expected_read_count(self, coverage, seq_length, read_length=150):
method test_simulate_uniform_coverage (line 1102) | def test_simulate_uniform_coverage(self):
method test_simulate_per_sequence_coverage (line 1131) | def test_simulate_per_sequence_coverage(self):
method test_simulate_bed_coverage (line 1164) | def test_simulate_bed_coverage(self):
function _find_script (line 1204) | def _find_script(script_name):
class TestFastaTrimTerminalAmbigs (line 1217) | class TestFastaTrimTerminalAmbigs(TestCaseWithTmp):
method test_script_runs_successfully (line 1220) | def test_script_runs_successfully(self):
method test_script_with_3rules_option (line 1247) | def test_script_with_3rules_option(self):
FILE: tests/unit/assemble/test_assembly_integration.py
function _align_with_minimap2 (line 23) | def _align_with_minimap2(refFasta, inBam):
class TestRefineAssembly (line 33) | class TestRefineAssembly(TestCaseWithTmp):
method test_ebov_refine1 (line 34) | def test_ebov_refine1(self):
method test_ebov_refine2 (line 52) | def test_ebov_refine2(self):
class TestOrderOrientAndImputeFromReference (line 71) | class TestOrderOrientAndImputeFromReference(TestCaseWithTmp):
method setUp (line 73) | def setUp(self):
method tearDown (line 85) | def tearDown(self):
method test_impute_from_oriented_muscle (line 89) | def test_impute_from_oriented_muscle(self):
method test_impute_from_oriented_mafft (line 92) | def test_impute_from_oriented_mafft(self):
method test_impute_from_oriented_mummer (line 95) | def test_impute_from_oriented_mummer(self):
method influenza_impute (line 99) | def influenza_impute(self, aligner):
FILE: tests/unit/assemble/test_util_vcf.py
class StubGenome (line 22) | class StubGenome:
method __init__ (line 28) | def __init__(self, chromlist):
method chrlens (line 33) | def chrlens(self):
class TestGenomePosition (line 37) | class TestGenomePosition(unittest.TestCase):
method test_fail_OOB_get_gpos (line 40) | def test_fail_OOB_get_gpos(self):
method test_fail_OOB_get_chr_pos (line 48) | def test_fail_OOB_get_chr_pos(self):
method test_fail_non_int_pos (line 55) | def test_fail_non_int_pos(self):
method test_spotcheck_edges (line 63) | def test_spotcheck_edges(self):
method test_equality_1chrGenome (line 72) | def test_equality_1chrGenome(self):
method test_equality_3chrGenome (line 80) | def test_equality_3chrGenome(self):
method test_gpos_inbounds (line 96) | def test_gpos_inbounds(self):
method test_chr_pos_inbounds (line 107) | def test_chr_pos_inbounds(self):
method test_unique_gpos (line 119) | def test_unique_gpos(self):
method test_unique_chr_pos (line 130) | def test_unique_chr_pos(self):
class TestVcfContigHeaderParsing (line 141) | class TestVcfContigHeaderParsing(unittest.TestCase):
method test_simple_contig_header (line 144) | def test_simple_contig_header(self):
method test_contig_header_with_assembly (line 149) | def test_contig_header_with_assembly(self):
method test_contig_header_with_multiple_extra_attrs (line 154) | def test_contig_header_with_multiple_extra_attrs(self):
method test_multiple_contigs_mixed_formats (line 159) | def test_multiple_contigs_mixed_formats(self):
method test_contig_header_without_length_raises (line 168) | def test_contig_header_without_length_raises(self):
method test_invalid_header_raises (line 172) | def test_invalid_header_raises(self):
class TestVcfReaderPositions (line 177) | class TestVcfReaderPositions(unittest.TestCase):
method setUp (line 180) | def setUp(self):
method test_sample_names (line 188) | def test_sample_names(self):
method test_get_one_base (line 195) | def test_get_one_base(self):
method test_get_positions_edges (line 204) | def test_get_positions_edges(self):
method test_get_range_edges (line 210) | def test_get_range_edges(self):
FILE: tests/unit/classify/fixtures.py
function krona (line 10) | def krona():
function db_type (line 17) | def db_type(request):
function krona_db (line 22) | def krona_db(request, tmpdir_module, krona, db_type):
function taxonomy_db (line 37) | def taxonomy_db(request, tmpdir_module, db_type):
FILE: tests/unit/classify/test_integration_kb.py
function kb_inputs (line 20) | def kb_inputs():
function kb_bam (line 33) | def kb_bam(tmp_path_factory, kb_inputs):
function kb_count_result (line 45) | def kb_count_result(tmp_path_factory, kb_inputs, kb_bam):
function kb_extract_result (line 71) | def kb_extract_result(tmp_path_factory, kb_inputs, kb_bam):
function kb_ref_result (line 92) | def kb_ref_result(tmp_path_factory, kb_inputs):
function _run_metagenomics (line 111) | def _run_metagenomics(parser_fn, argv, cwd=None):
function test_kb_help_reports_usage (line 124) | def test_kb_help_reports_usage():
function test_kb_count_produces_h5ad (line 132) | def test_kb_count_produces_h5ad(kb_count_result):
function test_kb_extract_yields_expected_reads (line 138) | def test_kb_extract_yields_expected_reads(kb_extract_result):
function test_kb_ref_builds_index (line 142) | def test_kb_ref_builds_index(kb_ref_result):
FILE: tests/unit/classify/test_integration_kraken2.py
function is_gz_file (line 22) | def is_gz_file(filepath):
function input_bam (line 32) | def input_bam(db_type):
function kraken2_tool (line 37) | def kraken2_tool():
function db_type (line 44) | def db_type(request):
function kraken2_db (line 49) | def kraken2_db(request, tmpdir_module, kraken2_tool, db_type):
function test_kraken2 (line 65) | def test_kraken2(kraken2_db, input_bam):
function test_kraken2_krona (line 94) | def test_kraken2_krona(kraken2_db, krona_db, input_bam):
function test_kraken2_on_empty (line 121) | def test_kraken2_on_empty(kraken2_db, input_bam):
FILE: tests/unit/classify/test_integration_taxon_filter.py
class TestDepleteHuman (line 22) | class TestDepleteHuman(TestCaseWithTmp):
method setUp (line 32) | def setUp(self):
method test_deplete_human (line 45) | def test_deplete_human(self):
method test_deplete_human_aligned_input (line 76) | def test_deplete_human_aligned_input(self):
method test_deplete_empty (line 106) | def test_deplete_empty(self):
FILE: tests/unit/classify/test_kmer_utils.py
class TestCommandHelp (line 29) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 31) | def test_help_parser_for_each_command(self):
function _seq_as_str (line 40) | def _seq_as_str(s): # pylint: disable=invalid-name
function _yield_seq_recs (line 48) | def _yield_seq_recs(seq_file):
function _list_seq_recs (line 59) | def _list_seq_recs(seq_file):
function _yield_seqs_as_strs (line 64) | def _yield_seqs_as_strs(seqs):
function _list_seqs_as_strs (line 75) | def _list_seqs_as_strs(seqs):
function _getargs (line 80) | def _getargs(args, valid_args):
function _strip_mate_num (line 85) | def _strip_mate_num(rec_id):
class KmcPy (line 92) | class KmcPy(object):
method _revcomp (line 100) | def _revcomp(self, kmer):
method _canonicalize (line 105) | def _canonicalize(self, kmer):
method _compute_kmers_iter (line 109) | def _compute_kmers_iter(self, seq_strs, kmer_size, single_strand, **ig...
method _compute_kmers (line 131) | def _compute_kmers(self, *args, **kw):
method compute_kmer_counts (line 139) | def compute_kmer_counts(self, seq_files, kmer_size, min_occs, max_occs,
method _filter_kmer_counts (line 149) | def _filter_kmer_counts(self, counts, min_occs=None, max_occs=None, co...
method filter_reads (line 158) | def filter_reads(self, db_kmer_counts, in_reads, kmer_size, single_str...
method binary_op (line 216) | def binary_op(self, op, kmer_counts_1, kmer_counts_2, result_counter_c...
function _inp (line 237) | def _inp(fname):
function _stringify (line 241) | def _stringify(arg):
function _do_build_kmer_db (line 245) | def _do_build_kmer_db(t_dir, val_cache, seq_files, kmer_db_opts):
function dict_module (line 294) | def dict_module():
function kmer_db_fixture (line 298) | def kmer_db_fixture(request, tmpdir_module, dict_module):
function kmer_db_fixture2 (line 302) | def kmer_db_fixture2(request, tmpdir_module, dict_module):
function test_build_kmer_db (line 307) | def test_build_kmer_db(kmer_db_fixture):
function _test_build_kmer_db (line 310) | def _test_build_kmer_db(kmer_db_fixture):
function test_build_kmer_db_combo (line 346) | def test_build_kmer_db_combo(kmer_db_fixture):
function _test_filter_reads (line 352) | def _test_filter_reads(kmer_db_fixture, reads_file, filter_opts, tmpdir_...
function test_filter_with_empty_db (line 392) | def test_filter_with_empty_db(kmer_db_fixture, reads_file, filter_opts, ...
function test_filter_reads (line 401) | def test_filter_reads(kmer_db_fixture, reads_file, filter_opts, tmpdir_f...
function test_kmer_set_counts (line 407) | def test_kmer_set_counts(kmer_db_fixture, tmpdir_function, set_to_val):
function test_kmers_binary_op (line 417) | def test_kmers_binary_op(kmer_db_fixture, kmer_db_fixture2, op, tmpdir_f...
FILE: tests/unit/classify/test_metagenomics.py
class TestCommandHelp (line 29) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 31) | def test_help_parser_for_each_command(self):
class TestKronaCalls (line 37) | class TestKronaCalls(TestCaseWithTmp):
method setUp (line 39) | def setUp(self):
method test_krona_import_taxonomy (line 48) | def test_krona_import_taxonomy(self):
function taxa_db_simple (line 58) | def taxa_db_simple():
function taxa_db (line 66) | def taxa_db(parents, names, ranks):
function parents (line 75) | def parents():
function names (line 90) | def names():
function ranks (line 108) | def ranks():
function simple_m8 (line 125) | def simple_m8():
function test_tree_level_lookup (line 131) | def test_tree_level_lookup(parents):
function test_push_up_tree_hits (line 141) | def test_push_up_tree_hits(parents):
function test_parents_to_children (line 160) | def test_parents_to_children(parents):
function test_rank_code (line 165) | def test_rank_code():
function test_blast_records (line 172) | def test_blast_records(simple_m8):
function test_blast_lca (line 182) | def test_blast_lca(taxa_db_simple, simple_m8):
function test_paired_query_id (line 201) | def test_paired_query_id():
function test_translate_gi_to_tax_id (line 224) | def test_translate_gi_to_tax_id(taxa_db_simple):
function test_ancestor_lookup (line 234) | def test_ancestor_lookup(taxa_db_simple):
function test_kraken_dfs_report (line 238) | def test_kraken_dfs_report(taxa_db):
function test_coverage_lca (line 255) | def test_coverage_lca(taxa_db):
function test_krakenuniq (line 263) | def test_krakenuniq():
class TestBamFilter (line 276) | class TestBamFilter(TestCaseWithTmp):
method test_bam_filter_simple (line 277) | def test_bam_filter_simple(self):
method test_bam_filter_by_tax_id (line 298) | def test_bam_filter_by_tax_id(self):
FILE: tests/unit/classify/test_taxon_filter.py
class TestCommandHelp (line 34) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 36) | def test_help_parser_for_each_command(self):
class TestFilterLastal (line 43) | class TestFilterLastal(TestCaseWithTmp):
method setUp (line 45) | def setUp(self):
method test_filter_lastal_bam_polio (line 54) | def test_filter_lastal_bam_polio(self):
method test_lastal_empty_input (line 63) | def test_lastal_empty_input(self):
method test_lastal_empty_output (line 73) | def test_lastal_empty_output(self):
method test_lastal_unbuilt_db (line 84) | def test_lastal_unbuilt_db(self):
class TestBmtagger (line 97) | class TestBmtagger(TestCaseWithTmp):
method setUp (line 105) | def setUp(self):
method test_deplete_bmtagger_bam (line 112) | def test_deplete_bmtagger_bam(self):
method test_deplete_bmtagger_fasta_db (line 122) | def test_deplete_bmtagger_fasta_db(self):
method test_deplete_bmtagger_tar_db (line 132) | def test_deplete_bmtagger_tar_db(self):
method test_bmtagger_empty_input (line 145) | def test_bmtagger_empty_input(self):
method test_bmtagger_empty_output (line 153) | def test_bmtagger_empty_output(self):
class TestBlastnDbBuild (line 163) | class TestBlastnDbBuild(TestCaseWithTmp):
method test_blastn_db_build (line 165) | def test_blastn_db_build(self):
method test_blastn_db_build_gz (line 193) | def test_blastn_db_build_gz(self):
class TestBmtaggerDbBuild (line 238) | class TestBmtaggerDbBuild(TestCaseWithTmp):
method test_bmtagger_db_build (line 240) | def test_bmtagger_db_build(self):
method test_bmtagger_db_build_gz (line 277) | def test_bmtagger_db_build_gz(self):
class TestLastalDbBuild (line 312) | class TestLastalDbBuild(TestCaseWithTmp):
method test_lastal_db_build (line 314) | def test_lastal_db_build(self):
class TestDepleteBlastnBam (line 346) | class TestDepleteBlastnBam(TestCaseWithTmp):
method setUp (line 355) | def setUp(self):
method test_deplete_blastn_bam (line 381) | def test_deplete_blastn_bam(self):
method test_deplete_blastn_bam_chunked (line 402) | def test_deplete_blastn_bam_chunked(self):
method test_blastn_empty_input (line 423) | def test_blastn_empty_input(self):
method test_blastn_empty_output (line 434) | def test_blastn_empty_output(self):
class TestDepleteMinimap2Bam (line 446) | class TestDepleteMinimap2Bam(TestCaseWithTmp):
method setUp (line 452) | def setUp(self):
method test_deplete_minimap2_bam (line 459) | def test_deplete_minimap2_bam(self):
method test_minimap2_empty_input (line 470) | def test_minimap2_empty_input(self):
method test_minimap2_empty_output (line 482) | def test_minimap2_empty_output(self):
class TestDepletePipeline (line 495) | class TestDepletePipeline(TestCaseWithTmp):
method setUp (line 500) | def setUp(self):
method test_deplete_pipeline_with_minimap (line 506) | def test_deplete_pipeline_with_minimap(self):
method test_deplete_pipeline_empty_minimap_dbs (line 531) | def test_deplete_pipeline_empty_minimap_dbs(self):
FILE: tests/unit/classify/test_taxonomy.py
function test_taxonomy_subset_zaire (line 8) | def test_taxonomy_subset_zaire(request, tmpdir_factory):
FILE: tests/unit/classify/test_tools_kb_python.py
function kb_tool (line 13) | def kb_tool():
function kb_inputs (line 19) | def kb_inputs():
function test_build_invokes_kb_ref_with_expected_arguments (line 32) | def test_build_invokes_kb_ref_with_expected_arguments(kb_tool, kb_inputs):
function test_classify_runs_kb_count_single_end_from_bam (line 51) | def test_classify_runs_kb_count_single_end_from_bam(kb_tool, kb_inputs):
function test_classify_runs_kb_count_with_fastq_input (line 100) | def test_classify_runs_kb_count_with_fastq_input(kb_tool, kb_inputs):
function test_classify_returns_early_when_bam_is_empty (line 125) | def test_classify_returns_early_when_bam_is_empty(kb_tool, kb_inputs):
FILE: tests/unit/classify/test_tools_kma.py
function kma_tool (line 13) | def kma_tool():
function kma_inputs (line 19) | def kma_inputs():
function test_build_invokes_kma_index_with_expected_arguments (line 29) | def test_build_invokes_kma_index_with_expected_arguments(kma_tool, kma_i...
function test_classify_single_end_from_bam (line 41) | def test_classify_single_end_from_bam(kma_tool, kma_inputs):
function test_classify_paired_end_from_bam (line 84) | def test_classify_paired_end_from_bam(kma_tool, kma_inputs):
function test_classify_returns_early_when_bam_is_empty (line 127) | def test_classify_returns_early_when_bam_is_empty(kma_tool, kma_inputs):
FILE: tests/unit/classify/test_tools_krona.py
class TestToolKrona (line 13) | class TestToolKrona(TestCaseWithTmp):
method setUp (line 15) | def setUp(self):
method test_import_taxonomy (line 26) | def test_import_taxonomy(self):
method test_create_db (line 51) | def test_create_db(self):
FILE: tests/unit/core/test_conftest.py
function test_monkeypatch_function_result (line 13) | def test_monkeypatch_function_result(monkeypatch_function_result):
FILE: tests/unit/core/test_file_utils.py
class TestCommandHelp (line 16) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 17) | def test_help_parser_for_each_command(self):
class TestTarballMerger (line 22) | class TestTarballMerger(TestCaseWithTmp):
method setUp (line 23) | def setUp(self):
method test_simple_merge (line 30) | def test_simple_merge(self):
class TestTsvJoin (line 41) | class TestTsvJoin(TestCaseWithTmp):
method test_join (line 43) | def test_join(self):
FILE: tests/unit/core/test_illumina.py
class TestCommandHelp (line 23) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 25) | def test_help_parser_for_each_command(self):
class TestSampleSheet (line 31) | class TestSampleSheet(TestCaseWithTmp):
method test_miseq (line 33) | def test_miseq(self):
method test_broad_platform (line 39) | def test_broad_platform(self):
method test_walkup_submission_no_header_no_lf (line 47) | def test_walkup_submission_no_header_no_lf(self):
method test_walkup_submission (line 53) | def test_walkup_submission(self):
method test_walkup_submission_no_lf (line 59) | def test_walkup_submission_no_lf(self):
method test_tabfile (line 65) | def test_tabfile(self):
method test_dup_index_collapse_at_init (line 71) | def test_dup_index_collapse_at_init(self):
method test_dup_index_collapse (line 81) | def test_dup_index_collapse(self):
method test_tabfile_win_endings (line 113) | def test_tabfile_win_endings(self):
method test_gz_tabfile_win_endings (line 119) | def test_gz_tabfile_win_endings(self):
method test_tabfile_macos9_endings (line 125) | def test_tabfile_macos9_endings(self):
method test_tabfile_extras (line 131) | def test_tabfile_extras(self):
method test_tabfile_extras_win (line 137) | def test_tabfile_extras_win(self):
method test_blank_line_in_tabular_section (line 143) | def test_blank_line_in_tabular_section(self):
method test_picard_block (line 149) | def test_picard_block(self):
method test_rev_comp_barcode_values (line 155) | def test_rev_comp_barcode_values(self):
method test_rev_comp_barcode_value_barcode2_at_load (line 162) | def test_rev_comp_barcode_value_barcode2_at_load(self):
method test_rev_comp_barcode_values_specified_at_load_two_columns (line 170) | def test_rev_comp_barcode_values_specified_at_load_two_columns(self):
method test_rev_comp_barcode_values_specified_one_column (line 179) | def test_rev_comp_barcode_values_specified_one_column(self):
method test_rev_comp_barcode_values_specified_two_columns (line 190) | def test_rev_comp_barcode_values_specified_two_columns(self):
method test_rev_comp_barcode_values_undo (line 201) | def test_rev_comp_barcode_values_undo(self):
method test_rev_comp_barcode_values_not_inplace (line 210) | def test_rev_comp_barcode_values_not_inplace(self):
class TestRunInfo (line 220) | class TestRunInfo(TestCaseWithTmp):
method test_miseq (line 222) | def test_miseq(self):
method test_hiseq (line 234) | def test_hiseq(self):
method test_novaseq (line 246) | def test_novaseq(self):
method test_nextseq550 (line 258) | def test_nextseq550(self):
method test_novaseq_x_plus (line 270) | def test_novaseq_x_plus(self):
method test_nextseq_1000_2000_p1 (line 282) | def test_nextseq_1000_2000_p1(self):
method test_novel_tile_count_but_known_fcid (line 294) | def test_novel_tile_count_but_known_fcid(self):
method test_novel_fcid_but_known_tile_count (line 299) | def test_novel_fcid_but_known_tile_count(self):
method test_novel_tile_count_and_fcid (line 304) | def test_novel_tile_count_and_fcid(self):
class TestIlluminaDir (line 310) | class TestIlluminaDir(TestCaseWithTmp):
method test_directory (line 312) | def test_directory(self):
method test_tarball_normal (line 318) | def test_tarball_normal(self):
method test_tarball_indented (line 327) | def test_tarball_indented(self):
method test_tarball_sample_sheet (line 332) | def test_tarball_sample_sheet(self):
method test_tarball_uncompressed (line 341) | def test_tarball_uncompressed(self):
method test_tarball_deep_dir_tree (line 347) | def test_tarball_deep_dir_tree(self):
method test_zip_archive (line 353) | def test_zip_archive(self):
method test_tarball_run_info (line 359) | def test_tarball_run_info(self):
method test_tarball_fail_missing_data (line 371) | def test_tarball_fail_missing_data(self):
class TestDifficultSampleNames (line 379) | class TestDifficultSampleNames(TestCaseWithTmp):
method test_paired_1 (line 381) | def test_paired_1(self):
method test_inline_commas_strings (line 401) | def test_inline_commas_strings(self):
class TestIlluminaBarcodeHelper (line 417) | class TestIlluminaBarcodeHelper(TestCaseWithTmp):
method test_one_correction (line 418) | def test_one_correction(self):
method test_ambiguous (line 432) | def test_ambiguous(self):
method test_single_index_run (line 446) | def test_single_index_run(self):
method test_single_index_i5_only_run (line 460) | def test_single_index_i5_only_run(self):
method test_few_assigned (line 474) | def test_few_assigned(self):
class TestMiseqToBam (line 483) | class TestMiseqToBam(TestCaseWithTmp):
method test_paired_1 (line 485) | def test_paired_1(self):
method test_paired_2 (line 505) | def test_paired_2(self):
method test_paired_custom_seq_center (line 525) | def test_paired_custom_seq_center(self):
method test_fail_missing_pair (line 545) | def test_fail_missing_pair(self):
method test_fail_backwards_pair (line 553) | def test_fail_backwards_pair(self):
method test_fail_mismatched_pair (line 562) | def test_fail_mismatched_pair(self):
method test_fail_oob_index (line 571) | def test_fail_oob_index(self):
method test_fail_bad_format (line 580) | def test_fail_bad_format(self):
class TestSplitcodeDemuxIntegration (line 596) | class TestSplitcodeDemuxIntegration(TestCaseWithTmp):
method setUp (line 607) | def setUp(self):
method create_test_bam_with_inline_barcodes (line 612) | def create_test_bam_with_inline_barcodes(self, output_bam, barcode_rea...
method create_expected_output_bam (line 660) | def create_expected_output_bam(self, output_bam, sample_name, barcode,...
method test_splitcode_demux_basic (line 717) | def test_splitcode_demux_basic(self):
class TestParseIlluminaFastqFilename (line 827) | class TestParseIlluminaFastqFilename(unittest.TestCase):
method test_standard_dragen_format_r1 (line 834) | def test_standard_dragen_format_r1(self):
method test_standard_dragen_format_r2 (line 848) | def test_standard_dragen_format_r2(self):
method test_sample_name_with_underscores (line 857) | def test_sample_name_with_underscores(self):
method test_different_flowcells (line 870) | def test_different_flowcells(self):
method test_different_lane_numbers (line 883) | def test_different_lane_numbers(self):
method test_different_sample_numbers (line 891) | def test_different_sample_numbers(self):
method test_different_chunk_numbers (line 902) | def test_different_chunk_numbers(self):
method test_with_full_path (line 912) | def test_with_full_path(self):
method test_without_gz_extension (line 921) | def test_without_gz_extension(self):
method test_simple_format_without_flowcell (line 929) | def test_simple_format_without_flowcell(self):
method test_simple_format_r2 (line 945) | def test_simple_format_r2(self):
method test_malformed_invalid_format (line 955) | def test_malformed_invalid_format(self):
method test_empty_filename (line 962) | def test_empty_filename(self):
method test_malformed_missing_read_number (line 967) | def test_malformed_missing_read_number(self):
method test_index_reads_not_supported (line 974) | def test_index_reads_not_supported(self):
class TestNormalizeBarcode (line 985) | class TestNormalizeBarcode(unittest.TestCase):
method test_uppercase_conversion (line 988) | def test_uppercase_conversion(self):
method test_whitespace_trimming (line 994) | def test_whitespace_trimming(self):
method test_combined_normalization (line 1000) | def test_combined_normalization(self):
method test_already_normalized (line 1005) | def test_already_normalized(self):
method test_empty_string (line 1010) | def test_empty_string(self):
method test_valid_characters_only (line 1015) | def test_valid_characters_only(self):
method test_invalid_characters (line 1023) | def test_invalid_characters(self):
method test_mixed_case_with_n (line 1038) | def test_mixed_case_with_n(self):
method test_typical_illumina_barcodes (line 1043) | def test_typical_illumina_barcodes(self):
method test_none_input (line 1055) | def test_none_input(self):
method test_non_string_input (line 1060) | def test_non_string_input(self):
class TestBarcodeOrientationAutoDetection (line 1069) | class TestBarcodeOrientationAutoDetection(unittest.TestCase):
method test_direct_match (line 1081) | def test_direct_match(self):
method test_barcode2_revcomp_match (line 1094) | def test_barcode2_revcomp_match(self):
method test_no_match_returns_empty (line 1107) | def test_no_match_returns_empty(self):
method test_single_barcode_matching (line 1119) | def test_single_barcode_matching(self):
method test_case_insensitive_matching (line 1129) | def test_case_insensitive_matching(self):
method test_multiple_samples_same_outer_barcodes (line 1139) | def test_multiple_samples_same_outer_barcodes(self):
method test_barcode2_revcomp_with_multiple_samples (line 1152) | def test_barcode2_revcomp_with_multiple_samples(self):
method test_n_wildcard_in_fastq_barcode (line 1165) | def test_n_wildcard_in_fastq_barcode(self):
method test_n_wildcard_in_barcode2 (line 1182) | def test_n_wildcard_in_barcode2(self):
method test_n_wildcard_with_revcomp (line 1194) | def test_n_wildcard_with_revcomp(self):
method test_multiple_n_wildcards (line 1212) | def test_multiple_n_wildcards(self):
method test_high_n_fraction_returns_empty (line 1223) | def test_high_n_fraction_returns_empty(self):
method test_high_n_fraction_in_bc2_returns_empty (line 1235) | def test_high_n_fraction_in_bc2_returns_empty(self):
method test_exactly_50_percent_n_allowed (line 1247) | def test_exactly_50_percent_n_allowed(self):
method test_ambiguous_n_match_returns_empty (line 1267) | def test_ambiguous_n_match_returns_empty(self):
method test_n_in_samplesheet_does_not_match_with_strict (line 1280) | def test_n_in_samplesheet_does_not_match_with_strict(self):
method test_n_in_samplesheet_matches_with_fuzzy (line 1297) | def test_n_in_samplesheet_matches_with_fuzzy(self):
method test_n_wildcard_no_false_positives (line 1311) | def test_n_wildcard_no_false_positives(self):
class TestBarcodeFuzzyMatching (line 1325) | class TestBarcodeFuzzyMatching(unittest.TestCase):
method test_exact_match_zero_mismatches (line 1328) | def test_exact_match_zero_mismatches(self):
method test_one_mismatch_allowed (line 1333) | def test_one_mismatch_allowed(self):
method test_two_mismatches_rejected_at_threshold_1 (line 1338) | def test_two_mismatches_rejected_at_threshold_1(self):
method test_two_mismatches_allowed_at_threshold_2 (line 1343) | def test_two_mismatches_allowed_at_threshold_2(self):
method test_n_not_counted_as_mismatch (line 1348) | def test_n_not_counted_as_mismatch(self):
method test_n_plus_real_mismatch (line 1353) | def test_n_plus_real_mismatch(self):
method test_length_mismatch (line 1359) | def test_length_mismatch(self):
method test_zero_max_mismatches_is_strict (line 1363) | def test_zero_max_mismatches_is_strict(self):
method test_all_n_matches_anything (line 1374) | def test_all_n_matches_anything(self):
class TestFuzzyMatchInOrientation (line 1380) | class TestFuzzyMatchInOrientation(unittest.TestCase):
method test_fuzzy_match_one_mismatch_bc1 (line 1383) | def test_fuzzy_match_one_mismatch_bc1(self):
method test_fuzzy_match_one_mismatch_bc2 (line 1394) | def test_fuzzy_match_one_mismatch_bc2(self):
method test_fuzzy_match_rejected_at_zero_tolerance (line 1404) | def test_fuzzy_match_rejected_at_zero_tolerance(self):
method test_fuzzy_prefers_exact_over_fuzzy (line 1415) | def test_fuzzy_prefers_exact_over_fuzzy(self):
method test_fuzzy_ambiguous_same_distance (line 1427) | def test_fuzzy_ambiguous_same_distance(self):
method test_fuzzy_match_with_revcomp (line 1440) | def test_fuzzy_match_with_revcomp(self):
class TestSamplesheetAuthoritativeBarcodes (line 1454) | class TestSamplesheetAuthoritativeBarcodes(unittest.TestCase):
method test_matched_bc_contains_samplesheet_values (line 1457) | def test_matched_bc_contains_samplesheet_values(self):
method test_matched_bc_with_n_contains_samplesheet_values (line 1468) | def test_matched_bc_with_n_contains_samplesheet_values(self):
method test_matched_bc_with_mismatch_contains_samplesheet_values (line 1480) | def test_matched_bc_with_mismatch_contains_samplesheet_values(self):
method test_matched_bc_with_revcomp_contains_samplesheet_values (line 1492) | def test_matched_bc_with_revcomp_contains_samplesheet_values(self):
class TestConsensusBarcodeFromFastq (line 1507) | class TestConsensusBarcodeFromFastq(unittest.TestCase):
method setUp (line 1510) | def setUp(self):
method tearDown (line 1513) | def tearDown(self):
method _write_fastq (line 1516) | def _write_fastq(self, path, reads):
method test_identical_reads (line 1530) | def test_identical_reads(self):
method test_consensus_resolves_single_n (line 1542) | def test_consensus_resolves_single_n(self):
method test_consensus_majority_vote (line 1553) | def test_consensus_majority_vote(self):
method test_consensus_all_n_stays_n (line 1565) | def test_consensus_all_n_stays_n(self):
method test_empty_fastq_returns_none (line 1576) | def test_empty_fastq_returns_none(self):
method test_single_barcode_no_plus (line 1585) | def test_single_barcode_no_plus(self):
method test_single_read (line 1597) | def test_single_read(self):
method test_consensus_resolves_mismatch (line 1608) | def test_consensus_resolves_mismatch(self):
method test_consensus_tie_returns_n (line 1621) | def test_consensus_tie_returns_n(self):
method test_unparseable_headers_raises (line 1634) | def test_unparseable_headers_raises(self):
class TestParseBarcode (line 1648) | class TestParseBarcode(unittest.TestCase):
method test_dual_index (line 1651) | def test_dual_index(self):
method test_single_index (line 1658) | def test_single_index(self):
method test_invalid_header (line 1665) | def test_invalid_header(self):
class TestBuildRunInfoJson (line 1671) | class TestBuildRunInfoJson(TestCaseWithTmp):
method test_build_with_all_parameters (line 1674) | def test_build_with_all_parameters(self):
method test_build_with_minimal_parameters (line 1706) | def test_build_with_minimal_parameters(self):
method test_integer_to_string_conversion (line 1731) | def test_integer_to_string_conversion(self):
method test_consistency_with_existing_output_schema (line 1750) | def test_consistency_with_existing_output_schema(self):
method test_none_values_handled (line 1777) | def test_none_values_handled(self):
class TestIlluminaMetadata (line 1796) | class TestIlluminaMetadata(TestCaseWithTmp):
method setUp (line 1811) | def setUp(self):
method test_runinfo_xml_parsing (line 1826) | def test_runinfo_xml_parsing(self):
method test_samplesheet_parsing (line 1841) | def test_samplesheet_parsing(self):
method test_metadata_json_generation (line 1859) | def test_metadata_json_generation(self):
method test_metadata_generation_with_optional_params (line 1929) | def test_metadata_generation_with_optional_params(self):
method test_metadata_consistency_with_existing_demux (line 1954) | def test_metadata_consistency_with_existing_demux(self):
method test_invalid_runinfo_path (line 1988) | def test_invalid_runinfo_path(self):
method test_invalid_samplesheet_path (line 1997) | def test_invalid_samplesheet_path(self):
method test_illumina_metadata_via_parser (line 2006) | def test_illumina_metadata_via_parser(self):
method test_three_barcode_samplesheet (line 2052) | def test_three_barcode_samplesheet(self):
method test_mixed_two_and_three_barcode_samplesheet (line 2125) | def test_mixed_two_and_three_barcode_samplesheet(self):
method test_three_barcode_barcode_uniqueness (line 2191) | def test_three_barcode_barcode_uniqueness(self):
method test_optional_lane_parameter (line 2241) | def test_optional_lane_parameter(self):
method test_optional_lane_backwards_compatibility (line 2293) | def test_optional_lane_backwards_compatibility(self):
method test_metadata_with_append_run_id (line 2338) | def test_metadata_with_append_run_id(self):
method test_metadata_without_append_run_id (line 2383) | def test_metadata_without_append_run_id(self):
method test_metadata_append_run_id_via_parser (line 2420) | def test_metadata_append_run_id_via_parser(self):
method test_illumina_metadata_runinfo_only (line 2458) | def test_illumina_metadata_runinfo_only(self):
method test_illumina_metadata_sample_output_requires_samplesheet (line 2493) | def test_illumina_metadata_sample_output_requires_samplesheet(self):
method test_illumina_metadata_optional_samplesheet_via_parser (line 2520) | def test_illumina_metadata_optional_samplesheet_via_parser(self):
class TestSplitcodeDemuxFastqs (line 2553) | class TestSplitcodeDemuxFastqs(TestCaseWithTmp):
method setUp (line 2578) | def setUp(self):
method test_parse_fastq_filename_from_test_data (line 2596) | def test_parse_fastq_filename_from_test_data(self):
method test_barcode_normalization_on_samplesheet (line 2608) | def test_barcode_normalization_on_samplesheet(self):
method test_basic_demux_workflow (line 2627) | def test_basic_demux_workflow(self):
method test_barcode_matching_perfect_match (line 2672) | def test_barcode_matching_perfect_match(self):
method test_unmatched_barcodes (line 2710) | def test_unmatched_barcodes(self):
method test_empty_barcode_sample (line 2750) | def test_empty_barcode_sample(self):
method test_output_schema_consistency (line 2792) | def test_output_schema_consistency(self):
method test_multi_pool_samplesheet_collapsibility_check (line 2830) | def test_multi_pool_samplesheet_collapsibility_check(self):
method test_fastq_filename_parsing (line 2894) | def test_fastq_filename_parsing(self):
method test_two_barcode_sample_bypass_splitcode (line 2925) | def test_two_barcode_sample_bypass_splitcode(self):
method test_splitcode_demux_fastqs_via_parser (line 2984) | def test_splitcode_demux_fastqs_via_parser(self):
method test_i5_reverse_complement_3bc_demux (line 3020) | def test_i5_reverse_complement_3bc_demux(self):
method test_splitcode_demux_3bc_with_n_bases_in_bc2_and_i5_rc (line 3102) | def test_splitcode_demux_3bc_with_n_bases_in_bc2_and_i5_rc(self):
method test_no_barcode_match_produces_zero_bams (line 3193) | def test_no_barcode_match_produces_zero_bams(self):
method test_append_run_id_3bc (line 3264) | def test_append_run_id_3bc(self):
method test_append_run_id_2bc (line 3308) | def test_append_run_id_2bc(self):
method test_append_run_id_requires_flowcell (line 3345) | def test_append_run_id_requires_flowcell(self):
method test_splitcode_demux_fastqs_metadata_output (line 3369) | def test_splitcode_demux_fastqs_metadata_output(self):
method test_splitcode_demux_fastqs_metadata_lane_from_fastq (line 3426) | def test_splitcode_demux_fastqs_metadata_lane_from_fastq(self):
method test_splitcode_demux_fastqs_metadata_run_matches_bam (line 3462) | def test_splitcode_demux_fastqs_metadata_run_matches_bam(self):
method test_splitcode_demux_fastqs_2bc_metadata (line 3504) | def test_splitcode_demux_fastqs_2bc_metadata(self):
class TestMergeDemuxMetrics (line 3555) | class TestMergeDemuxMetrics(TestCaseWithTmp):
method test_merge_demux_metrics_via_parser (line 3562) | def test_merge_demux_metrics_via_parser(self):
FILE: tests/unit/core/test_read_utils.py
class TestCommandHelp (line 26) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 28) | def test_help_parser_for_each_command(self):
class TestBwamemIdxstats (line 34) | class TestBwamemIdxstats(TestCaseWithTmp):
method setUp (line 36) | def setUp(self):
method test_bwamem_idxstats (line 45) | def test_bwamem_idxstats(self):
method test_bwamem_idxstats_with_filtering (line 55) | def test_bwamem_idxstats_with_filtering(self):
method test_bwamem_idxstats_no_bam_output (line 74) | def test_bwamem_idxstats_no_bam_output(self):
class TestMinimap2Idxstats (line 83) | class TestMinimap2Idxstats(TestCaseWithTmp):
method setUp (line 86) | def setUp(self):
method test_minimap2_idxstats (line 92) | def test_minimap2_idxstats(self):
method test_minimap2_idxstats_with_readlist (line 104) | def test_minimap2_idxstats_with_readlist(self):
class TestFastqBam (line 129) | class TestFastqBam(TestCaseWithTmp):
method test_fastq_bam (line 132) | def test_fastq_bam(self):
method test_fastq_to_bam_empty_inputs (line 195) | def test_fastq_to_bam_empty_inputs(self):
class TestRmdupUnaligned (line 243) | class TestRmdupUnaligned(TestCaseWithTmp):
method test_mvicuna_canned_input (line 245) | def test_mvicuna_canned_input(self):
method test_mvicuna_empty_input (line 259) | def test_mvicuna_empty_input(self):
method test_cdhit_canned_input (line 270) | def test_cdhit_canned_input(self):
method test_cdhit_empty_input (line 284) | def test_cdhit_empty_input(self):
class TestReadIdStore (line 295) | class TestReadIdStore(TestCaseWithTmp):
method test_add_from_fastq_paired (line 298) | def test_add_from_fastq_paired(self):
method test_add_from_fastq_single_end (line 314) | def test_add_from_fastq_single_end(self):
method test_deduplication (line 327) | def test_deduplication(self):
method test_write_to_file (line 340) | def test_write_to_file(self):
method test_write_to_file_with_downsampling (line 361) | def test_write_to_file_with_downsampling(self):
method test_empty_fastq (line 382) | def test_empty_fastq(self):
method test_add_single (line 394) | def test_add_single(self):
method test_extend (line 407) | def test_extend(self):
method test_extend_generator (line 421) | def test_extend_generator(self):
method test_contains (line 430) | def test_contains(self):
method test_iter (line 439) | def test_iter(self):
method test_delitem (line 448) | def test_delitem(self):
method test_discard (line 465) | def test_discard(self):
method test_shrink_to_subsample_basic (line 479) | def test_shrink_to_subsample_basic(self):
method test_shrink_to_subsample_larger_than_store (line 496) | def test_shrink_to_subsample_larger_than_store(self):
method test_shrink_to_subsample_randomness (line 507) | def test_shrink_to_subsample_randomness(self):
method test_filter_bam_by_ids_include (line 530) | def test_filter_bam_by_ids_include(self):
method test_filter_bam_by_ids_exclude (line 555) | def test_filter_bam_by_ids_exclude(self):
method test_filter_bam_by_ids_header_preserved (line 582) | def test_filter_bam_by_ids_header_preserved(self):
method test_filter_bam_by_ids_empty_input (line 619) | def test_filter_bam_by_ids_empty_input(self):
method test_filter_bam_by_ids_empty_store_include (line 633) | def test_filter_bam_by_ids_empty_store_include(self):
method test_filter_bam_by_ids_empty_store_exclude (line 652) | def test_filter_bam_by_ids_empty_store_exclude(self):
class TestRmdupBbnorm (line 669) | class TestRmdupBbnorm(TestCaseWithTmp):
method setUp (line 672) | def setUp(self):
method test_bbnorm_canned_input (line 676) | def test_bbnorm_canned_input(self):
method test_bbnorm_empty_input (line 690) | def test_bbnorm_empty_input(self):
method test_bbnorm_multi_library (line 700) | def test_bbnorm_multi_library(self):
method test_bbnorm_single_end (line 715) | def test_bbnorm_single_end(self):
method test_bbnorm_min_input_reads_skip (line 730) | def test_bbnorm_min_input_reads_skip(self):
method test_bbnorm_min_input_reads_process (line 744) | def test_bbnorm_min_input_reads_process(self):
method test_bbnorm_max_output_reads_downsample (line 760) | def test_bbnorm_max_output_reads_downsample(self):
class TestMvicuna (line 780) | class TestMvicuna(TestCaseWithTmp):
method test_mvicuna (line 793) | def test_mvicuna(self):
class TestAlignAndFix (line 813) | class TestAlignAndFix(TestCaseWithTmp):
method setUp (line 814) | def setUp(self):
method test_novoalign (line 821) | def test_novoalign(self):
method test_bwa (line 824) | def test_bwa(self):
method test_minimap2 (line 827) | def test_minimap2(self):
method simple_execution (line 830) | def simple_execution(self, aligner):
method test_empty_reads (line 839) | def test_empty_reads(self):
method test_dup_marker_sambamba (line 847) | def test_dup_marker_sambamba(self):
method test_dup_marker_picard_explicit (line 861) | def test_dup_marker_picard_explicit(self):
method test_dup_marker_default_is_sambamba (line 875) | def test_dup_marker_default_is_sambamba(self):
method test_align_and_fix_full_sambamba_pipeline (line 881) | def test_align_and_fix_full_sambamba_pipeline(self):
class TestDownsampleBams (line 900) | class TestDownsampleBams(TestCaseWithTmp):
method setUp (line 901) | def setUp(self):
method test_normalization_to_lowest_cardinality (line 915) | def test_normalization_to_lowest_cardinality(self):
method test_downsample_to_target_count (line 929) | def test_downsample_to_target_count(self):
method test_downsample_to_target_count_without_subdir (line 942) | def test_downsample_to_target_count_without_subdir(self):
method test_downsample_with_dedup_after (line 954) | def test_downsample_with_dedup_after(self):
method test_downsample_with_dedup_before (line 968) | def test_downsample_with_dedup_before(self):
method test_downsample_to_too_large_target_count (line 981) | def test_downsample_to_too_large_target_count(self):
class TestTrimRmdupSubsamp (line 991) | class TestTrimRmdupSubsamp(TestCaseWithTmp):
method test_subsamp_empty (line 998) | def test_subsamp_empty(self):
method test_subsamp_small_50 (line 1008) | def test_subsamp_small_50(self):
method test_subsamp_small_90 (line 1018) | def test_subsamp_small_90(self):
method test_subsamp_small_200 (line 1029) | def test_subsamp_small_200(self):
method test_subsamp_big_500 (line 1039) | def test_subsamp_big_500(self):
FILE: tests/unit/core/test_tools.py
function stub_conda (line 18) | def stub_conda():
function tool_class (line 23) | def tool_class(request):
function test_tool_install (line 27) | def test_tool_install(tool_class):
FILE: tests/unit/core/test_tools_bbmap.py
class TestToolBBMap (line 15) | class TestToolBBMap(TestCaseWithTmp):
method setUp (line 17) | def setUp(self):
method test_align (line 23) | def test_align(self):
method test_bbnorm_paired_interleaved (line 36) | def test_bbnorm_paired_interleaved(self):
method test_bbnorm_single_end (line 71) | def test_bbnorm_single_end(self):
FILE: tests/unit/core/test_tools_bwa.py
class TestToolBwa (line 13) | class TestToolBwa(TestCaseWithTmp):
method setUp (line 15) | def setUp(self):
method test_working_bam (line 30) | def test_working_bam(self):
method test_corrupt_bam (line 40) | def test_corrupt_bam(self):
FILE: tests/unit/core/test_tools_fastqc.py
class HTMLValidator (line 14) | class HTMLValidator(HTMLParser):
method __init__ (line 16) | def __init__(self):
method error (line 21) | def error(self, message):
class TestToolFastQC (line 26) | class TestToolFastQC(TestCaseWithTmp):
method _validate_html (line 28) | def _validate_html(self, html_path):
method _validate_zip (line 40) | def _validate_zip(self, zip_path):
method test_fastqc_nonempty_bam (line 50) | def test_fastqc_nonempty_bam(self):
method test_fastqc_empty_bam (line 78) | def test_fastqc_empty_bam(self):
method test_fastqc_without_zip (line 113) | def test_fastqc_without_zip(self):
FILE: tests/unit/core/test_tools_minimap2.py
class TestToolMinimap2 (line 15) | class TestToolMinimap2(TestCaseWithTmp):
method setUp (line 17) | def setUp(self):
method test_human_bam (line 25) | def test_human_bam(self):
method test_ebola_bam (line 31) | def test_ebola_bam(self):
method test_corrupt_bam (line 38) | def test_corrupt_bam(self):
class TestMinimap2Idxstats (line 45) | class TestMinimap2Idxstats(TestCaseWithTmp):
method setUp (line 48) | def setUp(self):
method test_idxstats_basic (line 55) | def test_idxstats_basic(self):
method test_idxstats_with_readlist (line 85) | def test_idxstats_with_readlist(self):
method test_idxstats_no_readlist (line 118) | def test_idxstats_no_readlist(self):
method test_idxstats_empty_input (line 133) | def test_idxstats_empty_input(self):
method test_idxstats_empty_reference (line 151) | def test_idxstats_empty_reference(self):
method test_idxstats_multi_reference (line 165) | def test_idxstats_multi_reference(self):
FILE: tests/unit/core/test_tools_novoalign.py
class TestToolNovoalign (line 21) | class TestToolNovoalign(TestCaseWithTmp):
method setUp (line 23) | def setUp(self):
method test_index (line 29) | def test_index(self):
method test_align (line 39) | def test_align(self):
method test_align_filter (line 51) | def test_align_filter(self):
method test_multi_read_groups (line 63) | def test_multi_read_groups(self):
method test_multi_read_groups_filter (line 130) | def test_multi_read_groups_filter(self):
FILE: tests/unit/core/test_tools_picard.py
class TestToolPicard (line 18) | class TestToolPicard(TestCaseWithTmp):
method test_fasta_index (line 20) | def test_fasta_index(self):
method test_messy_fasta_index (line 39) | def test_messy_fasta_index(self):
method test_sam_downsample (line 56) | def test_sam_downsample(self):
method test_revert_bam_empty_input (line 72) | def test_revert_bam_empty_input(self):
FILE: tests/unit/core/test_tools_sambamba.py
class TestToolSambamba (line 19) | class TestToolSambamba(TestCaseWithTmp):
method setUp (line 22) | def setUp(self):
method test_sambamba_installed (line 27) | def test_sambamba_installed(self):
method test_version (line 32) | def test_version(self):
class TestSambambaSort (line 40) | class TestSambambaSort(TestCaseWithTmp):
method setUp (line 43) | def setUp(self):
method test_sort_coordinate (line 48) | def test_sort_coordinate(self):
method test_sort_queryname (line 64) | def test_sort_queryname(self):
method test_sort_with_threads (line 80) | def test_sort_with_threads(self):
class TestSambambaIndex (line 94) | class TestSambambaIndex(TestCaseWithTmp):
method setUp (line 97) | def setUp(self):
method test_index_basic (line 102) | def test_index_basic(self):
method test_index_with_threads (line 119) | def test_index_with_threads(self):
method test_index_file_exists (line 130) | def test_index_file_exists(self):
class TestSambambaMerge (line 149) | class TestSambambaMerge(TestCaseWithTmp):
method setUp (line 152) | def setUp(self):
method test_merge_two_bams (line 157) | def test_merge_two_bams(self):
method test_merge_preserves_reads (line 175) | def test_merge_preserves_reads(self):
class TestSambambaFlagstat (line 195) | class TestSambambaFlagstat(TestCaseWithTmp):
method setUp (line 198) | def setUp(self):
method test_flagstat_basic (line 203) | def test_flagstat_basic(self):
method test_flagstat_returns_dict (line 212) | def test_flagstat_returns_dict(self):
method test_flagstat_empty_bam (line 221) | def test_flagstat_empty_bam(self):
class TestSambambaMarkdup (line 231) | class TestSambambaMarkdup(TestCaseWithTmp):
method setUp (line 234) | def setUp(self):
method test_markdup_basic (line 239) | def test_markdup_basic(self):
method test_markdup_with_threads (line 260) | def test_markdup_with_threads(self):
method test_markdup_empty_input (line 272) | def test_markdup_empty_input(self):
method test_markdup_removes_duplicates (line 287) | def test_markdup_removes_duplicates(self):
FILE: tests/unit/core/test_tools_samtools.py
class TestToolSamtools (line 17) | class TestToolSamtools(TestCaseWithTmp):
method test_count_bam (line 19) | def test_count_bam(self):
method test_fasta_index (line 24) | def test_fasta_index(self):
method test_messy_fasta_index (line 35) | def test_messy_fasta_index(self):
method test_isEmpty (line 51) | def test_isEmpty(self):
method test_sam_downsample (line 58) | def test_sam_downsample(self):
method test_filterByCigarString (line 73) | def test_filterByCigarString(self):
method test_bam2fa (line 89) | def test_bam2fa(self):
class TestSamtoolsImport (line 100) | class TestSamtoolsImport(TestCaseWithTmp):
method test_import_paired_fastq_basic (line 103) | def test_import_paired_fastq_basic(self):
method test_import_with_full_read_group (line 123) | def test_import_with_full_read_group(self):
method test_import_rg_defaults (line 157) | def test_import_rg_defaults(self):
method test_import_read_flags (line 185) | def test_import_read_flags(self):
method test_import_read_rg_tag (line 205) | def test_import_read_rg_tag(self):
method test_import_empty_fastq (line 225) | def test_import_empty_fastq(self):
method test_import_multithreaded (line 259) | def test_import_multithreaded(self):
FILE: tests/unit/core/test_tools_splitcode.py
class TestSplitcodeLookupTable (line 21) | class TestSplitcodeLookupTable(TestCaseWithTmp):
method test_basic_single_pool (line 24) | def test_basic_single_pool(self):
method test_zero_reads_pool (line 87) | def test_zero_reads_pool(self):
method test_multi_pool (line 119) | def test_multi_pool(self):
method test_append_run_id (line 164) | def test_append_run_id(self):
method test_unique_library_ids_per_sample (line 194) | def test_unique_library_ids_per_sample(self):
class TestSplitcodeIntegration (line 255) | class TestSplitcodeIntegration(TestCaseWithTmp):
method setUp (line 263) | def setUp(self):
method tearDown (line 268) | def tearDown(self):
method create_test_bam_with_inline_barcodes (line 272) | def create_test_bam_with_inline_barcodes(self, output_bam, num_reads=1...
method test_run_splitcode_on_pool_basic (line 309) | def test_run_splitcode_on_pool_basic(self):
method test_run_splitcode_on_pool_empty_output (line 388) | def test_run_splitcode_on_pool_empty_output(self):
method test_splitcode_json_output_format (line 469) | def test_splitcode_json_output_format(self):
method test_splitcode_output_file_locations (line 537) | def test_splitcode_output_file_locations(self):
method test_splitcode_barcode_trimming (line 600) | def test_splitcode_barcode_trimming(self):
method test_splitcode_with_append_run_id (line 661) | def test_splitcode_with_append_run_id(self):
class TestGenerateSplitcodeConfigAndKeepFiles (line 728) | class TestGenerateSplitcodeConfigAndKeepFiles(TestCaseWithTmp):
method setUp (line 736) | def setUp(self):
method tearDown (line 740) | def tearDown(self):
method test_basic_single_pool (line 744) | def test_basic_single_pool(self):
method test_variable_barcode_lengths (line 802) | def test_variable_barcode_lengths(self):
method test_hamming_distance_parameter (line 826) | def test_hamming_distance_parameter(self):
method test_r1_trim_bp_right_of_barcode (line 859) | def test_r1_trim_bp_right_of_barcode(self):
method test_multi_pool_filtering (line 892) | def test_multi_pool_filtering(self):
method test_empty_pool_raises_error (line 925) | def test_empty_pool_raises_error(self):
method test_config_keep_id_matching (line 944) | def test_config_keep_id_matching(self):
method test_output_prefix_path_construction (line 981) | def test_output_prefix_path_construction(self):
method test_complex_realistic_scenario (line 1006) | def test_complex_realistic_scenario(self):
class TestSplitcodeSummaryJSONErrorHandling (line 1064) | class TestSplitcodeSummaryJSONErrorHandling(TestCaseWithTmp):
method setUp (line 1072) | def setUp(self):
method tearDown (line 1076) | def tearDown(self):
method test_missing_json_file_provides_debugging_info (line 1080) | def test_missing_json_file_provides_debugging_info(self):
method test_malformed_json_provides_file_preview (line 1116) | def test_malformed_json_provides_file_preview(self):
class TestConvertSplitcodeMetricsToPicardStyle (line 1147) | class TestConvertSplitcodeMetricsToPicardStyle(TestCaseWithTmp):
method setUp (line 1166) | def setUp(self):
method tearDown (line 1170) | def tearDown(self):
method test_basic_conversion (line 1174) | def test_basic_conversion(self):
method test_missing_required_column_raises_error (line 1234) | def test_missing_required_column_raises_error(self):
method test_empty_required_column_raises_error (line 1261) | def test_empty_required_column_raises_error(self):
method test_barcode_combination_without_inline (line 1288) | def test_barcode_combination_without_inline(self):
method test_library_name_fallback (line 1322) | def test_library_name_fallback(self):
method test_library_name_uses_barcode_name_fallback (line 1353) | def test_library_name_uses_barcode_name_fallback(self):
method test_combine_innerbarcode_unmatched_false (line 1383) | def test_combine_innerbarcode_unmatched_false(self):
method test_combine_innerbarcode_unmatched_true_with_report_within_pools_true (line 1417) | def test_combine_innerbarcode_unmatched_true_with_report_within_pools_...
method test_combine_innerbarcode_unmatched_true_with_report_within_pools_false (line 1460) | def test_combine_innerbarcode_unmatched_true_with_report_within_pools_...
method test_stats_computation_global (line 1498) | def test_stats_computation_global(self):
method test_stats_computation_within_pools (line 1536) | def test_stats_computation_within_pools(self):
method test_pct_matches_computation (line 1583) | def test_pct_matches_computation(self):
method test_normalized_matches_excludes_all_n (line 1621) | def test_normalized_matches_excludes_all_n(self):
method test_custom_demux_function_name (line 1657) | def test_custom_demux_function_name(self):
method test_zero_reads_handling (line 1686) | def test_zero_reads_handling(self):
FILE: tests/unit/core/test_tools_trimmomatic.py
class TestTrimmomatic (line 12) | class TestTrimmomatic(TestCaseWithTmp):
method test_trimmomatic_paired (line 14) | def test_trimmomatic_paired(self):
method test_trimmomatic_paired_maxinfo (line 30) | def test_trimmomatic_paired_maxinfo(self):
method test_trimmomatic_single (line 46) | def test_trimmomatic_single(self):
FILE: tests/unit/core/test_util_file.py
function testTempFiles (line 29) | def testTempFiles():
function test_check_paths (line 71) | def test_check_paths(tmpdir):
function test_uncompressed_file_type (line 93) | def test_uncompressed_file_type():
function test_string_to_file_name (line 108) | def test_string_to_file_name():
function compressed_input_file (line 129) | def compressed_input_file(request):
function expected_plaintext (line 133) | def expected_plaintext():
function test_decompress_shutil_copyfileobj (line 136) | def test_decompress_shutil_copyfileobj(request, expected_plaintext, comp...
function test_decompress_line_by_line (line 142) | def test_decompress_line_by_line(request, expected_plaintext, compressed...
class TestExtractTarball (line 150) | class TestExtractTarball(TestCaseWithTmp):
method setUp (line 151) | def setUp(self):
method test_simple_extract (line 159) | def test_simple_extract(self):
class TestTarballMerger (line 167) | class TestTarballMerger(TestCaseWithTmp):
method setUp (line 168) | def setUp(self):
method test_simple_merge (line 177) | def test_simple_merge(self):
method test_merge_with_extract (line 197) | def test_merge_with_extract(self):
method test_merge_with_extract_repack_from_disk (line 227) | def test_merge_with_extract_repack_from_disk(self):
method test_piped_in_merge (line 258) | def test_piped_in_merge(self):
method capsys (line 282) | def capsys(self, capsys):
method test_piped_out_merge (line 285) | def test_piped_out_merge(self):
method test_merge_piped_in_and_out (line 309) | def test_merge_piped_in_and_out(self):
FILE: tests/unit/core/test_util_misc.py
class TestRunAndPrint (line 13) | class TestRunAndPrint(unittest.TestCase):
method testBasicRunSuccess (line 15) | def testBasicRunSuccess(self):
method testBasicRunFailDontCare (line 33) | def testBasicRunFailDontCare(self):
method testBasicRunFailAndCatch (line 43) | def testBasicRunFailAndCatch(self):
class TestFeatureSorter (line 58) | class TestFeatureSorter(unittest.TestCase):
method testBasicSortingWithOverlap (line 60) | def testBasicSortingWithOverlap(self):
method testBasicIntervalsWithOverlap (line 75) | def testBasicIntervalsWithOverlap(self):
method testDisjointAndOverlappingIntervals (line 92) | def testDisjointAndOverlappingIntervals(self):
method testMultiChrWindowedFeatures (line 112) | def testMultiChrWindowedFeatures(self):
method testOpenWindowRight (line 128) | def testOpenWindowRight(self):
method testOpenWindowLeft (line 145) | def testOpenWindowLeft(self):
method testMultiChrWithPayloadIntervals (line 161) | def testMultiChrWithPayloadIntervals(self):
class TestConfigIncludes (line 184) | class TestConfigIncludes(unittest.TestCase):
method testConfigIncludes (line 186) | def testConfigIncludes(self):
function test_as_type (line 213) | def test_as_type():
function test_subdict (line 251) | def test_subdict(iter_d, iter_subset):
function test_chk (line 275) | def test_chk():
function test_available_cpu_count (line 285) | def test_available_cpu_count(monkeypatch_function_result):
FILE: tests/unit/phylo/test_interhost.py
class TestCommandHelp (line 14) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 16) | def test_help_parser_for_each_command(self):
function makeTempFasta (line 22) | def makeTempFasta(seqs):
class TestCoordMapper (line 30) | class TestCoordMapper(TestCaseWithTmp):
method setUp (line 32) | def setUp(self):
method test_no_indels (line 39) | def test_no_indels(self):
method test_map_indels (line 44) | def test_map_indels(self):
method test_side_param (line 56) | def test_side_param(self):
method test_oob_errors (line 66) | def test_oob_errors(self):
method test_invalid_pos_error (line 72) | def test_invalid_pos_error(self):
method test_invalid_chr_error (line 78) | def test_invalid_chr_error(self):
method test_unequal_genomes_error (line 84) | def test_unequal_genomes_error(self):
method test_map_chr_only (line 91) | def test_map_chr_only(self):
class TestCoordMapperMultipleSeqs (line 100) | class TestCoordMapperMultipleSeqs(TestCaseWithTmp):
method setUp (line 102) | def setUp(self):
method test_legacy_call (line 117) | def test_legacy_call(self):
method test_no_indels (line 125) | def test_no_indels(self):
method test_map_indels (line 130) | def test_map_indels(self):
method test_side_param (line 142) | def test_side_param(self):
method test_oob_errors (line 152) | def test_oob_errors(self):
method test_invalid_pos_error (line 158) | def test_invalid_pos_error(self):
method test_invalid_chr_error (line 164) | def test_invalid_chr_error(self):
method test_unequal_genomes_error (line 168) | def test_unequal_genomes_error(self):
method test_duplicate_chr_names_error (line 175) | def test_duplicate_chr_names_error(self):
method test_multiple_input_genomes (line 182) | def test_multiple_input_genomes(self):
method test_single_chr_error (line 191) | def test_single_chr_error(self):
method test_map_chr_only (line 198) | def test_map_chr_only(self):
class TestSpecificAlignments (line 208) | class TestSpecificAlignments(TestCaseWithTmp):
method test_basic_alignment (line 214) | def test_basic_alignment(self):
method test_unequal_len (line 219) | def test_unequal_len(self):
method test_no_real_bases_in_sample (line 225) | def test_no_real_bases_in_sample(self):
method test_no_real_bases_at_position (line 238) | def test_no_real_bases_at_position(self):
method test_aligned_gaps (line 250) | def test_aligned_gaps(self):
method test_adjacent_gaps (line 264) | def test_adjacent_gaps(self):
method test_one_real_base (line 291) | def test_one_real_base(self):
method test_exactly_two_pairs (line 300) | def test_exactly_two_pairs(self):
FILE: tests/unit/phylo/test_intrahost.py
class TestCommandHelp (line 29) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 31) | def test_help_parser_for_each_command(self):
function makeTempFasta (line 37) | def makeTempFasta(seqs):
class MockVphaserOutput (line 45) | class MockVphaserOutput:
method __init__ (line 50) | def __init__(self):
method add_snp (line 54) | def add_snp(self, chrom, pos, acounts, libinfo=None):
method add_indel (line 77) | def add_indel(self, chrom, pos, acounts, libinfo=None):
method __iter__ (line 119) | def __iter__(self):
class TestIntrahostFilters (line 132) | class TestIntrahostFilters(unittest.TestCase):
method test_single_strand_bias_hard_filter (line 141) | def test_single_strand_bias_hard_filter(self):
class TestPerSample (line 156) | class TestPerSample(TestCaseWithTmp):
method test_vphaser_one_sample (line 163) | def test_vphaser_one_sample(self):
class VcfMergeRunner (line 185) | class VcfMergeRunner:
method __init__ (line 189) | def __init__(self, ref_genome=None):
method set_ref (line 199) | def set_ref(self, genome):
method add_genome (line 202) | def add_genome(self, sample_name, genome):
method add_snp (line 209) | def add_snp(self, sample, chrom, pos, acounts, libinfo=None):
method add_indel (line 216) | def add_indel(self, sample, chrom, pos, acounts, libinfo=None):
method dump_isnv_tmp_file (line 225) | def dump_isnv_tmp_file(self, sample):
method run_and_get_vcf_rows (line 232) | def run_and_get_vcf_rows(self, retree=1, omit_samplenames=False):
method multi_align_samples (line 249) | def multi_align_samples(self, retree=1):
class TestVcfMerge (line 293) | class TestVcfMerge(TestCaseWithTmp):
method test_empty_output (line 300) | def test_empty_output(self):
method test_headers_with_two_samps (line 319) | def test_headers_with_two_samps(self):
method test_simple_snps (line 337) | def test_simple_snps(self):
method test_simple_snps_guess_samplenames (line 365) | def test_simple_snps_guess_samplenames(self):
method test_snps_with_varying_read_depth (line 393) | def test_snps_with_varying_read_depth(self):
method test_snps_downstream_of_indels (line 421) | def test_snps_downstream_of_indels(self):
method test_sample_major_allele_not_ref_allele (line 439) | def test_sample_major_allele_not_ref_allele(self):
method test_backfill_sample_from_assembly (line 453) | def test_backfill_sample_from_assembly(self):
method test_simple_insertions (line 475) | def test_simple_insertions(self):
method test_simple_deletions (line 502) | def test_simple_deletions(self):
method test_deletion_spans_deletion (line 528) | def test_deletion_spans_deletion(self):
method test_insertion_spans_deletion (line 545) | def test_insertion_spans_deletion(self):
method test_snp_within_insertion (line 570) | def test_snp_within_insertion(self):
method test_2snps_within_insertion_same_sample (line 594) | def test_2snps_within_insertion_same_sample(self):
method test_deletion_past_end_of_some_consensus (line 609) | def test_deletion_past_end_of_some_consensus(self):
method test_snp_past_end_of_some_consensus (line 639) | def test_snp_past_end_of_some_consensus(self):
method test_deletion_within_insertion (line 660) | def test_deletion_within_insertion(self):
method test_insertion_within_insertion (line 690) | def test_insertion_within_insertion(self):
method test_indel_collapse (line 714) | def test_indel_collapse(self):
FILE: tests/unit/phylo/test_ncbi.py
class TestCommandHelp (line 19) | class TestCommandHelp(unittest.TestCase):
method test_help_parser_for_each_command (line 21) | def test_help_parser_for_each_command(self):
class TestFeatureReader (line 26) | class TestFeatureReader(TestCaseWithTmp):
method setUp (line 27) | def setUp(self):
method test_read_seq_id_simple (line 31) | def test_read_seq_id_simple(self):
method test_read_seq_id_different_fnames (line 37) | def test_read_seq_id_different_fnames(self):
method test_read_seq_id_refseq (line 43) | def test_read_seq_id_refseq(self):
method test_read_seq_id_ddbj (line 47) | def test_read_seq_id_ddbj(self):
method test_seq_location_str_format (line 51) | def test_seq_location_str_format(self):
class TestFeatureTransfer (line 63) | class TestFeatureTransfer(TestCaseWithTmp):
method setUp (line 64) | def setUp(self):
method test_synthetic_feature_table (line 68) | def test_synthetic_feature_table(self):
method test_synthetic_feature_table_oob_clip (line 85) | def test_synthetic_feature_table_oob_clip(self):
method test_synthetic_feature_table_ignore_ambiguous_edges (line 111) | def test_synthetic_feature_table_ignore_ambiguous_edges(self):
method test_severely_truncated_assembly_oob_clip (line 129) | def test_severely_truncated_assembly_oob_clip(self):
method test_partial_symbols_column_placement (line 180) | def test_partial_symbols_column_placement(self):
method test_internal_partials_dropped (line 229) | def test_internal_partials_dropped(self):
method test_lasv_oob_clip (line 264) | def test_lasv_oob_clip(self):
FILE: tests/unit/phylo/test_tools.py
function tool_class (line 10) | def tool_class(request):
function test_tool_install (line 14) | def test_tool_install(tool_class):
FILE: tests/unit/phylo/test_tools_vphaser2.py
class TestVPhaser2 (line 16) | class TestVPhaser2(TestCaseWithTmp):
method test_vphaser2 (line 18) | def test_vphaser2(self):
FILE: tests/unit/phylo/test_util_vcf.py
class StubGenome (line 22) | class StubGenome:
method __init__ (line 28) | def __init__(self, chromlist):
method chrlens (line 33) | def chrlens(self):
class TestGenomePosition (line 37) | class TestGenomePosition(unittest.TestCase):
method test_fail_OOB_get_gpos (line 40) | def test_fail_OOB_get_gpos(self):
method test_fail_OOB_get_chr_pos (line 48) | def test_fail_OOB_get_chr_pos(self):
method test_fail_non_int_pos (line 55) | def test_fail_non_int_pos(self):
method test_spotcheck_edges (line 63) | def test_spotcheck_edges(self):
method test_equality_1chrGenome (line 72) | def test_equality_1chrGenome(self):
method test_equality_3chrGenome (line 80) | def test_equality_3chrGenome(self):
method test_gpos_inbounds (line 96) | def test_gpos_inbounds(self):
method test_chr_pos_inbounds (line 107) | def test_chr_pos_inbounds(self):
method test_unique_gpos (line 119) | def test_unique_gpos(self):
method test_unique_chr_pos (line 130) | def test_unique_chr_pos(self):
class TestVcfReaderPositions (line 141) | class TestVcfReaderPositions(unittest.TestCase):
method setUp (line 144) | def setUp(self):
method test_sample_names (line 152) | def test_sample_names(self):
method test_get_one_base (line 159) | def test_get_one_base(self):
method test_get_positions_edges (line 168) | def test_get_positions_edges(self):
method test_get_range_edges (line 174) | def test_get_range_edges(self):
Copy disabled (too large)
Download .json
Condensed preview — 719 files, each showing path, character count, and a content snippet. Download the .json file for the full structured content (11,765K chars).
[
{
"path": ".agents/skills/claude-on-vertex-ci/SKILL.md",
"chars": 8851,
"preview": "# Claude on Vertex AI from GitHub Actions\n\nReusable infrastructure for invoking Claude (via [`anthropics/claude-code-act"
},
{
"path": ".agents/skills/container-vulns/SKILL.md",
"chars": 5230,
"preview": "# Container Vulnerability Management\n\nGuidance for scanning, triaging, and mitigating container image vulnerabilities\nin"
},
{
"path": ".agents/skills/dsub-batch-jobs/SKILL.md",
"chars": 4698,
"preview": "# Running Batch Jobs on GCP via dsub\n\nUse dsub to run one-off compute jobs on Google Cloud when your analysis requires\nm"
},
{
"path": ".agents/skills/regression-testing/SKILL.md",
"chars": 6295,
"preview": "# Assembly Regression Testing\n\nEnd-to-end regression testing for assembly pipeline changes against Terra submissions.\n\n#"
},
{
"path": ".agents/skills/regression-testing/compare_sample_pair.py",
"chars": 19306,
"preview": "#!/usr/bin/env python3\n\"\"\"Compare assembly outputs between old and new code for a single sample pair.\n\nTakes two GCS URI"
},
{
"path": ".agents/skills/regression-testing/discover_pairs.py",
"chars": 5677,
"preview": "#!/usr/bin/env python3\n\"\"\"Discover comparable old/new sample pairs by crawling GCS Cromwell output directories.\n\nFor eac"
},
{
"path": ".agents/skills/regression-testing/generate_report.py",
"chars": 20993,
"preview": "#!/usr/bin/env python3\n\"\"\"Generate regression testing report with plots from per-sample JSON results.\n\nAggregates all co"
},
{
"path": ".agents/skills/regression-testing/run_vadr.sh",
"chars": 1947,
"preview": "#!/bin/bash\n# Run VADR on a single FASTA file.\n# Inputs (env vars set by dsub):\n# FASTA - input FASTA file (local"
},
{
"path": ".claude/rules/container-vulns.md",
"chars": 295,
"preview": "---\npaths:\n - \"docker/**\"\n - \".trivyignore\"\n - \".trivy-ignore-policy.rego\"\n - \"vulnerability-mitigation-status.md\"\n "
},
{
"path": ".codecov.yml",
"chars": 1065,
"preview": "# Codecov configuration\n# https://docs.codecov.com/docs/codecov-yaml\n\ncoverage:\n status:\n project:\n default:\n "
},
{
"path": ".dockerignore",
"chars": 18,
"preview": "*.pyc\n__pycache__\n"
},
{
"path": ".gitattributes",
"chars": 33,
"preview": "*.rules linguist-language=Python\n"
},
{
"path": ".github/actions/create-manifest/action.yml",
"chars": 2501,
"preview": "name: 'Create Multi-Arch Manifest'\ndescription: 'Create multi-arch Docker manifest'\ninputs:\n ghcr-repo:\n description"
},
{
"path": ".github/actions/pull-with-retry/action.yml",
"chars": 927,
"preview": "name: 'Pull Docker Image with Retry'\ndescription: 'Pull a Docker image with retry logic'\ninputs:\n image:\n descriptio"
},
{
"path": ".github/actions/setup-docker-build/action.yml",
"chars": 928,
"preview": "name: 'Setup Docker Build'\ndescription: 'Checkout, setup buildx, and login to GHCR'\ninputs:\n github-token:\n descript"
},
{
"path": ".github/copilot-instructions.md",
"chars": 1235,
"preview": "# Copilot Instructions\n\nThis file provides guidance to GitHub Copilot when working with code in this repository.\n\n**IMPO"
},
{
"path": ".github/workflows/audit-quay-tags.yml",
"chars": 1930,
"preview": "name: Audit Quay.io Tags\n\non:\n schedule:\n - cron: '0 8 * * 1' # Monday 8:00 UTC\n workflow_dispatch:\n\npermissions: "
},
{
"path": ".github/workflows/cleanup-images.yml",
"chars": 2352,
"preview": "name: Cleanup Feature Branch Images\n\non:\n delete:\n\njobs:\n cleanup-quay-images:\n # Only run for branch deletions (no"
},
{
"path": ".github/workflows/container-scan.yml",
"chars": 16490,
"preview": "name: Scheduled Container Vulnerability Scan\n\non:\n schedule:\n # Weekly scan of main branch mega image every Monday a"
},
{
"path": ".github/workflows/docker.yml",
"chars": 58720,
"preview": "name: Build and Test\n\non:\n push:\n branches:\n - main\n - '**'\n tags:\n - '**'\n pull_request:\n bra"
},
{
"path": ".github/workflows/docs.yml",
"chars": 1149,
"preview": "name: Documentation\n\non:\n push:\n branches:\n - main\n - '**'\n paths:\n - 'docs/**'\n - 'src/viral"
},
{
"path": ".gitignore",
"chars": 1139,
"preview": "VERSION\ntest/output/\n\n# Sphinx documentation\ndocs/_build/\n\n# Mac OSX\n.DS_Store\n\n# Byte-compiled / optimized / DLL files\n"
},
{
"path": ".readthedocs.yml",
"chars": 713,
"preview": "# .readthedocs.yml\n# Read the Docs configuration file\n# See https://docs.readthedocs.io/en/stable/config-file/v2.html fo"
},
{
"path": ".trivy-ignore-policy.rego",
"chars": 15070,
"preview": "#\n# bioinformatics-platform.rego\n#\n# Conservative Trivy ignore policy for bioinformatics container images\n# running on g"
},
{
"path": ".trivyignore",
"chars": 4381,
"preview": "# Trivy ignore file — per-CVE exceptions with justifications\n#\n# This file documents individual CVE exceptions that cann"
},
{
"path": "AGENTS.md",
"chars": 17146,
"preview": "# AGENTS.md\n\nThis document provides guidance for AI assistants (Claude Code, GitHub Copilot, etc.) working on this repos"
},
{
"path": "CLAUDE.md",
"chars": 1217,
"preview": "# CLAUDE.md\n\nThis file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.\n\n**I"
},
{
"path": "LICENSE",
"chars": 1083,
"preview": "MIT License\n\nCopyright (c) 2014-2025 Broad Institute, Inc.\n\nPermission is hereby granted, free of charge, to any person "
},
{
"path": "README.md",
"chars": 5922,
"preview": "# viral-ngs\n\n[](htt"
},
{
"path": "docker/Dockerfile.assemble",
"chars": 3279,
"preview": "# viral-ngs assemble image\n# Builds on core, adds genome assembly tools\n#\n# This provides:\n# - All core bioinformatics t"
},
{
"path": "docker/Dockerfile.baseimage",
"chars": 3555,
"preview": "# viral-ngs baseimage\n# Base image with micromamba, Python, and general utilities for viral-ngs tools\n#\n# This provides "
},
{
"path": "docker/Dockerfile.classify",
"chars": 2136,
"preview": "# viral-ngs classify image\n# Builds on core, adds metagenomic classification tools\n#\n# This provides:\n# - All core bioin"
},
{
"path": "docker/Dockerfile.core",
"chars": 2027,
"preview": "# viral-ngs core image\n# Builds on baseimage, adds core bioinformatics tools and viral-ngs package\n#\n# This provides:\n# "
},
{
"path": "docker/Dockerfile.mega",
"chars": 4025,
"preview": "# viral-ngs mega image\n# Builds on core, adds ALL tools (assembly, classification, phylogenetics)\n#\n# This provides:\n# -"
},
{
"path": "docker/Dockerfile.phylo",
"chars": 3100,
"preview": "# viral-ngs phylo image\n# Builds on core, adds phylogenetic analysis tools\n#\n# This provides:\n# - All core bioinformatic"
},
{
"path": "docker/install-conda-deps.sh",
"chars": 3691,
"preview": "#!/bin/bash\n#\n# Install conda dependencies from one or more requirements files.\n#\n# Usage: install-conda-deps.sh [option"
},
{
"path": "docker/requirements/assemble-x86.txt",
"chars": 338,
"preview": "# Assembly tools - x86-only (ARM64 build has resolver conflicts)\n# These packages are skipped on ARM64 architecture\n# In"
},
{
"path": "docker/requirements/assemble.txt",
"chars": 780,
"preview": "# Conda dependencies for viral-ngs assemble module\n#\n# These are bioinformatics tools used for genome assembly.\n# Instal"
},
{
"path": "docker/requirements/baseimage.txt",
"chars": 942,
"preview": "# Conda dependencies for viral-ngs baseimage\n#\n# These are general-purpose tools installed in the base image that are\n# "
},
{
"path": "docker/requirements/classify-x86.txt",
"chars": 396,
"preview": "# Classification tools - x86-only (no ARM64 builds available)\n# These packages are skipped on ARM64 architecture\n# Insta"
},
{
"path": "docker/requirements/classify.txt",
"chars": 205,
"preview": "# Metagenomic classification tools\n# Note: x86-only packages (bmtagger, kallisto, kb-python) are in classify-x86.txt\nann"
},
{
"path": "docker/requirements/core-x86.txt",
"chars": 421,
"preview": "# x86-only bioinformatics tools\n# These packages are only available on x86_64 architecture (no ARM64 builds in bioconda)"
},
{
"path": "docker/requirements/core.txt",
"chars": 982,
"preview": "# Conda dependencies for viral-ngs core\n#\n# All Python runtime dependencies are installed via conda for faster\n# install"
},
{
"path": "docker/requirements/phylo-x86.txt",
"chars": 242,
"preview": "# Phylogenetic tools - x86-only (no ARM64 builds available)\n# These packages are skipped on ARM64 architecture\n\n# table2"
},
{
"path": "docker/requirements/phylo.txt",
"chars": 129,
"preview": "# Phylogenetic analysis tools\nbamtools>=2.5.3\nlofreq>=2.1.5\nmafft>=7.525\nmummer4>=4.0.1\nmuscle>=3.8,<4\nsnpeff>=5.2\nvphas"
},
{
"path": "docker/scripts/calc_mem.py",
"chars": 5703,
"preview": "#!/usr/bin/env python\n\n\"\"\"Calculate the memory allocated to the process, taking account of cgroups.\nPrint result to stdo"
},
{
"path": "docker/scripts/fasta-trim-terminal-ambigs.pl",
"chars": 11580,
"preview": "#!/usr/bin/env perl\n\nuse strict;\nuse warnings;\nuse Getopt::Long;\nuse Bio::Easel::MSA;\nuse Bio::Easel::SqFile;\nrequire \"s"
},
{
"path": "docs/Makefile",
"chars": 6786,
"preview": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS = --jobs auto\n"
},
{
"path": "docs/assembly.rst",
"chars": 206,
"preview": "assembly.py - viral sequence assembly from NGS reads\n====================================================\n\n.. argparse::"
},
{
"path": "docs/broad_utils.rst",
"chars": 224,
"preview": "broad_utils.py - for data generated at the Broad Institute\n==========================================================\n\n."
},
{
"path": "docs/cmdline.rst",
"chars": 211,
"preview": "Command line tools\n==================\n\n.. toctree::\n\n illumina\n read_utils\n assembly\n taxon_filter\n metagenomi"
},
{
"path": "docs/conf.py",
"chars": 9518,
"preview": "#!/usr/bin/env python3\n# -*- coding: utf-8 -*-\n#\n# viral-ngs documentation build configuration file, created by\n# sphinx"
},
{
"path": "docs/description.rst",
"chars": 167,
"preview": "Description of the methods\n==========================\n\nThis is a base module that provides basic utility functions,\nsome"
},
{
"path": "docs/file_utils.rst",
"chars": 233,
"preview": "file_utils.py - utilities to perform various file manipulations\n========================================================"
},
{
"path": "docs/illumina.rst",
"chars": 178,
"preview": "illumina.py - for raw Illumina outputs\n======================================\n\n.. argparse::\n :module: viral_ngs.ill"
},
{
"path": "docs/index.rst",
"chars": 432,
"preview": ".. viral-ngs documentation master file, created by\n sphinx-quickstart on Fri Jan 16 00:23:17 2015.\n You can adapt th"
},
{
"path": "docs/interhost.rst",
"chars": 228,
"preview": "interhost.py - SNP calling, multi-alignment, and phylogenetics\n========================================================="
},
{
"path": "docs/intrahost.rst",
"chars": 214,
"preview": "intrahost.py - intrahost variant calling and annotation\n=======================================================\n\n.. argp"
},
{
"path": "docs/kmer_utils.rst",
"chars": 216,
"preview": "kmer_utils.py - commands for working with sets of kmers\n=======================================================\n\n.. argp"
},
{
"path": "docs/metagenomics.rst",
"chars": 214,
"preview": "metagenomics.py - utilities for metagenomic analyses\n====================================================\n\n.. argparse::"
},
{
"path": "docs/ncbi.rst",
"chars": 174,
"preview": "ncbi.py - NCBI Genbank and SRA utilities\n========================================\n\n.. argparse::\n :module: viral_ngs"
},
{
"path": "docs/read_utils.rst",
"chars": 228,
"preview": "read_utils.py - utilities that manipulate bam and fastq files\n=========================================================="
},
{
"path": "docs/reports.rst",
"chars": 196,
"preview": "reports.py - produce various metrics and reports\n================================================\n\n.. argparse::\n :mo"
},
{
"path": "docs/requirements.txt",
"chars": 343,
"preview": "jinja2==3.1.4 # https://github.com/readthedocs/readthedocs.org/issues/9037#issuecomment-1077818554\nSphinx==7.4.7 #overri"
},
{
"path": "docs/taxon_filter.rst",
"chars": 218,
"preview": "taxon_filter.py - filter reads by taxonomic membership\n======================================================\n\n.. argpar"
},
{
"path": "pyproject.toml",
"chars": 2545,
"preview": "[build-system]\nrequires = [\"setuptools>=61.0\", \"setuptools-scm\"]\nbuild-backend = \"setuptools.build_meta\"\n\n[project]\nname"
},
{
"path": "src/viral_ngs/__init__.py",
"chars": 714,
"preview": "\"\"\"\nviral-ngs: Consolidated tools for viral NGS data analysis.\n\nThis package provides utilities for viral genome sequenc"
},
{
"path": "src/viral_ngs/assemble/__init__.py",
"chars": 427,
"preview": "\"\"\"\nviral_ngs.assemble - Tool wrappers for genome assembly.\n\nContains wrappers for assembly-related bioinformatics tools"
},
{
"path": "src/viral_ngs/assemble/freebayes.py",
"chars": 1687,
"preview": "'''\n FreeBayes variant caller — a Bayesian genetic variant detector.\n\n Replaces GATK3 UnifiedGenotyper for consens"
},
{
"path": "src/viral_ngs/assemble/gap2seq.py",
"chars": 5841,
"preview": "'''\n Gap2Seq - assembly gap closing tool\n'''\n\nimport itertools\nimport functools\nimport operator\nimport logging\nimport"
},
{
"path": "src/viral_ngs/assemble/mafft.py",
"chars": 5364,
"preview": "'''\n MAFFT - Multiple alignment program for amino acid or nucleotide sequences\n http://mafft.cbrc.jp/alignment/sof"
},
{
"path": "src/viral_ngs/assemble/mummer.py",
"chars": 29162,
"preview": "'''\n The MUMMER aligner\n http://mummer.sourceforge.net/\n'''\n\nimport logging\nimport viral_ngs.core\nimport viral_ngs"
},
{
"path": "src/viral_ngs/assemble/muscle.py",
"chars": 1821,
"preview": "'''\n The MUSCLE aligner\n http://www.drive5.com/muscle\n'''\n\n\nimport viral_ngs.core\nimport viral_ngs.core.file\nimpor"
},
{
"path": "src/viral_ngs/assemble/rasusa.py",
"chars": 1460,
"preview": "'''\n Rasusa — randomly subsample sequencing reads or alignments to a\n specified coverage depth.\n\n https://githu"
},
{
"path": "src/viral_ngs/assemble/skani.py",
"chars": 7615,
"preview": "'''\n SKANI - accurate, fast nucleotide identity calculation for MAGs and databases\n https://github.com/bluenote-15"
},
{
"path": "src/viral_ngs/assemble/spades.py",
"chars": 6449,
"preview": "'''\n Tool wrapper for SPAdes, St. Petersburg Assembler ( http://cab.spbu.ru/software/spades/ )\n'''\n\nimport logging\nim"
},
{
"path": "src/viral_ngs/assemble/vcf.py",
"chars": 18037,
"preview": "'''This gives a number of useful quick methods for dealing with VCF files.\n'''\n\n__author__ = \"dpark@broadinstitute.org\"\n"
},
{
"path": "src/viral_ngs/assemble/wgsim.py",
"chars": 7122,
"preview": "'''\n WGSIM - read simulator for next-generation sequencing\n https://github.com/lh3/wgsim\n'''\n\n__author__ = \"dpark@"
},
{
"path": "src/viral_ngs/assembly.py",
"chars": 81725,
"preview": "#!/usr/bin/env python3\n''' This script contains a number of utilities for viral sequence assembly\n from NGS reads. P"
},
{
"path": "src/viral_ngs/broad_utils.py",
"chars": 4143,
"preview": "#!/usr/bin/env python3\n\"\"\"\nUtilities for getting sequences out of the Broad walk-up sequencing pipeline.\nThese utilities"
},
{
"path": "src/viral_ngs/classify/__init__.py",
"chars": 256,
"preview": "\"\"\"viral_ngs.classify - Metagenomic classification tools and utilities.\"\"\"\n\nfrom . import blast\nfrom . import bmtagger\nf"
},
{
"path": "src/viral_ngs/classify/blast.py",
"chars": 6179,
"preview": "\"Tools in the blast+ suite.\"\n\nimport logging\nimport os\nimport shutil\nimport subprocess\n\nfrom viral_ngs import core\nfrom "
},
{
"path": "src/viral_ngs/classify/bmtagger.py",
"chars": 4412,
"preview": "\"core.Tool for bmtagger.sh.\"\n\nfrom viral_ngs import core\nfrom viral_ngs.core import file\nimport os\nimport logging\nimport"
},
{
"path": "src/viral_ngs/classify/kb.py",
"chars": 22022,
"preview": "'''\nkb_python classification tool\n'''\nimport itertools\nimport glob\nimport logging\nimport os\nimport os.path\nimport shutil"
},
{
"path": "src/viral_ngs/classify/kma.py",
"chars": 4762,
"preview": "'''\nKMA k-mer alignment tool\n'''\nimport logging\nimport os\nimport os.path\nimport shutil\nimport subprocess\n\nfrom viral_ngs"
},
{
"path": "src/viral_ngs/classify/kmc.py",
"chars": 15629,
"preview": "'''\n Tool wrapper for the KMC kmer counter\n ( http://sun.aei.polsl.pl/REFRESH/index.php?page=projects&project=kmc&"
},
{
"path": "src/viral_ngs/classify/kraken2.py",
"chars": 8622,
"preview": "'''\nKRAKEN metagenomics classifier\n'''\nimport collections\nimport concurrent.futures\nimport itertools\nimport logging\nimpo"
},
{
"path": "src/viral_ngs/classify/krona.py",
"chars": 3911,
"preview": "from viral_ngs import core\nimport os.path\nimport subprocess\nimport shutil\nfrom builtins import super\nfrom viral_ngs.core"
},
{
"path": "src/viral_ngs/classify/last.py",
"chars": 5616,
"preview": "\"Tools in the 'last' suite.\"\n\n# built-ins\nimport os\nimport logging\nimport shutil\nimport subprocess\n\n# within this module"
},
{
"path": "src/viral_ngs/classify/taxonomy.py",
"chars": 15819,
"preview": "\"\"\"\nNCBI Taxonomy database utilities.\n\nThis module provides classes and functions for working with NCBI taxonomy data,\ni"
},
{
"path": "src/viral_ngs/core/__init__.py",
"chars": 5625,
"preview": "'''Core module containing Tool base classes, utilities, and bioinformatics tool wrappers.\n\nThis module consolidates what"
},
{
"path": "src/viral_ngs/core/bbmap.py",
"chars": 4528,
"preview": "'''\n Tool wrapper for the BBMap aligner and related tools.\n'''\n\nimport logging\nimport os\nimport os.path\nimport shutil"
},
{
"path": "src/viral_ngs/core/bwa.py",
"chars": 13720,
"preview": "'''\n The BWA aligner.\n\n'''\n\nfrom collections import defaultdict\nimport logging\nimport os\nimport os.path\nimport subpro"
},
{
"path": "src/viral_ngs/core/cdhit.py",
"chars": 1664,
"preview": "'''\nCD-HIT\n'''\nfrom builtins import super\nimport itertools\nimport logging\nimport os\nimport os.path\nimport shlex\nimport s"
},
{
"path": "src/viral_ngs/core/cmd.py",
"chars": 13053,
"preview": "'''This gives a main() function that serves as a nice wrapper\naround other commands and presents the ability to serve up"
},
{
"path": "src/viral_ngs/core/errors.py",
"chars": 299,
"preview": "#!/usr/bin/env python\n\nclass QCError(RuntimeError):\n '''Indicates a failure at a QC step.'''\n \n def __init__(se"
},
{
"path": "src/viral_ngs/core/fastqc.py",
"chars": 2137,
"preview": "'''\n FastQC\n'''\n\nimport logging\nimport os\nimport os.path\nimport shutil\nimport subprocess\nimport sys\nimport zipfile\n\nf"
},
{
"path": "src/viral_ngs/core/file.py",
"chars": 50115,
"preview": "'''This gives a number of useful quick methods for dealing with\ntab-text files and gzipped files, as well as fasta files"
},
{
"path": "src/viral_ngs/core/illumina_indices.py",
"chars": 391231,
"preview": "#!/usr/bin/env python\n\n\"\"\"\n Related only to sequence data within this file: \n \"Oligonucleotide sequences Copyr"
},
{
"path": "src/viral_ngs/core/illumina_utils.py",
"chars": 37206,
"preview": "#!/usr/bin/env python3\n\"\"\"\nUtilities for Illumina data handling, including directory management,\nrun information parsing"
},
{
"path": "src/viral_ngs/core/minimap2.py",
"chars": 18228,
"preview": "'''\n The minimap2 aligner.\n\n'''\n\nimport collections\nimport logging\nimport os\nimport os.path\nimport shutil\nimport subp"
},
{
"path": "src/viral_ngs/core/misc.py",
"chars": 53070,
"preview": "'''A few miscellaneous tools. '''\nimport array\nimport bisect\nimport collections\nimport collections.abc\nimport contextlib"
},
{
"path": "src/viral_ngs/core/mvicuna.py",
"chars": 3954,
"preview": "\"Tool for mvicuna.\"\n\nimport logging\nimport os\nimport subprocess\nimport shutil\n\nfrom . import samtools, picard # was: fr"
},
{
"path": "src/viral_ngs/core/novoalign.py",
"chars": 9049,
"preview": "'''\n Novoalign aligner by Novocraft\n\n This is commercial software that has different licenses depending\n on use"
},
{
"path": "src/viral_ngs/core/picard.py",
"chars": 28593,
"preview": "'''\n Tools in the Picard suite\n'''\n\nimport logging\nimport os\nimport os.path\nimport tempfile\nimport shutil\nimport subp"
},
{
"path": "src/viral_ngs/core/prinseq.py",
"chars": 4308,
"preview": "\"Tool for prinseq.\"\n\nimport logging\nimport os.path\nimport shutil\nimport subprocess\nfrom . import samtools, picard # was"
},
{
"path": "src/viral_ngs/core/priorities.py",
"chars": 7635,
"preview": "#!/usr/bin/env python3\n\"\"\"\nMask initial bases from alignment FASTA\n\"\"\"\nimport argparse\nfrom random import shuffle\nfrom c"
},
{
"path": "src/viral_ngs/core/sambamba.py",
"chars": 9323,
"preview": "'''\n Sambamba - high-performance BAM processing tool.\n\n Sambamba is a tool for processing BAM files that provides "
},
{
"path": "src/viral_ngs/core/samtools.py",
"chars": 19680,
"preview": "'''\n The Samtools package.\n\n TO DO: much of this stuff can be all eliminated by using pysam instead, as\n pysam "
},
{
"path": "src/viral_ngs/core/splitcode.py",
"chars": 56225,
"preview": "import csv\nimport glob\nimport json\nimport logging\nimport os\nimport shutil\nimport subprocess\nimport tempfile\n\nimport matp"
},
{
"path": "src/viral_ngs/core/stats.py",
"chars": 7089,
"preview": "'''A few pure-python statistical tools to avoid the need to install scipy. '''\n__author__ = \"dpark@broadinstitute.org, i"
},
{
"path": "src/viral_ngs/core/trimmomatic.py",
"chars": 3466,
"preview": "\"Tool for trimmomatic.\"\n\nimport logging\nimport os\nimport shutil\nimport subprocess\nfrom . import samtools, picard # was:"
},
{
"path": "src/viral_ngs/core/version.py",
"chars": 6122,
"preview": "#!/usr/bin/python\n''' This gets the git version into python-land\n'''\n\n__author__ = \"dpark@broadinstitute.org\"\n__version_"
},
{
"path": "src/viral_ngs/file_utils.py",
"chars": 10651,
"preview": "#!/usr/bin/env python3\n\"\"\"\nUtilities for dealing with files.\n\"\"\"\n\n__author__ = \"tomkinsc@broadinstitute.org\"\n__commands_"
},
{
"path": "src/viral_ngs/illumina.py",
"chars": 165532,
"preview": "#!/usr/bin/env python3\n\"\"\"\nUtilities for demultiplexing Illumina data.\n\"\"\"\n\n__author__ = \"dpark@broadinstitute.org\"\n__co"
},
{
"path": "src/viral_ngs/interhost.py",
"chars": 9633,
"preview": "#!/usr/bin/env python\n''' This script contains a number of utilities for SNP calling, multi-alignment,\n phylogenetics"
},
{
"path": "src/viral_ngs/intrahost.py",
"chars": 59200,
"preview": "#!/usr/bin/env python\n'''This script contains a number of utilities for intrahost variant calling\nand annotation for vir"
},
{
"path": "src/viral_ngs/kmer_utils.py",
"chars": 10441,
"preview": "#!/usr/bin/env python3\n\n\"\"\"Commands for working with sets of kmers\"\"\"\n\n\nfrom __future__ import print_function\n__author__"
},
{
"path": "src/viral_ngs/metagenomics.py",
"chars": 67314,
"preview": "#!/usr/bin/env python3\n''' This script contains a number of utilities for metagenomic analyses.\n'''\nfrom __future__ impo"
},
{
"path": "src/viral_ngs/ncbi.py",
"chars": 40847,
"preview": "#!/usr/bin/env python\n'''This script contains a number of utilities for submitting our analyses\nto NCBI's Genbank and SR"
},
{
"path": "src/viral_ngs/phylo/__init__.py",
"chars": 498,
"preview": "\"\"\"\nviral_ngs.phylo - Phylogenetic analysis tools and utilities.\n\nThis package contains tool wrappers and utilities for:"
},
{
"path": "src/viral_ngs/phylo/feature_table.py",
"chars": 14530,
"preview": "#!/usr/bin/env python\n\nimport re\nimport os\n\nfrom . import feature_table_types\n\nclass SeqPosition(object):\n def __init"
},
{
"path": "src/viral_ngs/phylo/feature_table_types.py",
"chars": 46591,
"preview": "#!/usr/bin/env python\n\n# This is intended to assist with parsing Sequin-format feature tables.\n# \n# Note: Molecule scope"
},
{
"path": "src/viral_ngs/phylo/genbank.py",
"chars": 8560,
"preview": "#!/usr/bin/python\n\n# built-ins\nimport time\nimport os\nimport re\nimport logging\n\n# third-party\nfrom Bio import Entrez\n\nlog"
},
{
"path": "src/viral_ngs/phylo/mafft.py",
"chars": 5314,
"preview": "'''\n MAFFT - Multiple alignment program for amino acid or nucleotide sequences\n http://mafft.cbrc.jp/alignment/sof"
},
{
"path": "src/viral_ngs/phylo/mummer.py",
"chars": 28816,
"preview": "'''\n The MUMMER aligner\n http://mummer.sourceforge.net/\n'''\n\nimport logging\nfrom viral_ngs import core\nfrom viral_"
},
{
"path": "src/viral_ngs/phylo/muscle.py",
"chars": 1816,
"preview": "'''\n The MUSCLE aligner\n http://www.drive5.com/muscle\n'''\n\n\nfrom viral_ngs import core\nfrom viral_ngs.core import "
},
{
"path": "src/viral_ngs/phylo/snpeff.py",
"chars": 10159,
"preview": "'''\nsnpEff - a tool for annotating genetic consequences of variants in VCF format\nhttp://snpeff.sourceforge.net/\n'''\n\n# "
},
{
"path": "src/viral_ngs/phylo/vcf.py",
"chars": 17417,
"preview": "'''This gives a number of useful quick methods for dealing with VCF files.\n'''\n\n__author__ = \"dpark@broadinstitute.org\"\n"
},
{
"path": "src/viral_ngs/phylo/vphaser2.py",
"chars": 2870,
"preview": "'''\n V-Phaser 2 variant caller\n'''\n\nimport logging\nimport subprocess\nimport os\nimport tempfile\nimport shutil\nimport p"
},
{
"path": "src/viral_ngs/py.typed",
"chars": 0,
"preview": ""
},
{
"path": "src/viral_ngs/read_utils.py",
"chars": 77954,
"preview": "#!/usr/bin/env python3\n\"\"\"\nUtilities for working with sequence reads, such as converting formats and\nfixing mate pairs.\n"
},
{
"path": "src/viral_ngs/reports.py",
"chars": 40141,
"preview": "#!/usr/bin/env python3\n''' Functions to create reports from genomics pipeline data.\n'''\n\n__author__ = \"dpark@broadinstit"
},
{
"path": "src/viral_ngs/taxon_filter.py",
"chars": 41384,
"preview": "#!/usr/bin/env python3\n'''This script contains a number of utilities for filtering NGS reads based\non membership or non-"
},
{
"path": "tests/__init__.py",
"chars": 8964,
"preview": "'''utilities for tests'''\n\n__author__ = \"irwin@broadinstitute.org\"\n\n# built-ins\nimport filecmp\nimport os\nimport platform"
},
{
"path": "tests/conftest.py",
"chars": 6879,
"preview": "import operator\nimport os\nimport shutil\nimport sys\nimport tempfile\nimport time\nimport contextlib\nimport string\nimport in"
},
{
"path": "tests/input/5kb_human_from_chr6.fasta",
"chars": 5123,
"preview": ">human_5000bp random human subsequence\nGAAGTTTGGATTATGATATGCTCATTAATTATAAAACAGACTTCAGTAGTCAGAGAATAG\nTACACATAGGCCATTGAGCA"
},
{
"path": "tests/input/G5012.3.fasta",
"chars": 19196,
"preview": ">G5012.3\nATTTTCCTCTCATTGAAATTTATATCGGAATTTAAATTGAAATTGTTACTGTAATCATAC\nCTGGTTTGTTTCAGAGCCATATCACCAAGATAGAGAACAACCTAGGTCTC"
},
{
"path": "tests/input/README.md",
"chars": 1167,
"preview": "Description of input files:\n\n- ebola.fasta is a collection of Ebolavirus genomes from many Ebolavirus \nspecies (EBOV, SU"
},
{
"path": "tests/input/TestAssembleSpades/clipDb.fasta",
"chars": 18932,
"preview": ">NuGEN\nTCTATAGTTTAGGTAACTTTGTGTTTGA\n>TruSeq3_IndexedAdapter\nAGATCGGAAGAGCACACGTCTGAACTCCAGTCAC\n>TruSeq3_UniversalAdapter"
},
{
"path": "tests/input/TestAssembleSpades/trinity_contigs.fasta",
"chars": 1611,
"preview": ">comp0_c0_seq1 len=381 path=[0:0-380]\nTGAGACTCGGCGTCATCCAGACTTTCTGAGGACGAGAATCCATACTCGGAATTTTGTGAT\nTCCGAGCAATTTGTCTTTAAA"
},
{
"path": "tests/input/TestBmtagger/expected.Match.1.fastq",
"chars": 353,
"preview": "@from_chr1_1/1\nTCAATAAAAAAAAAAAAGAAAGAAAAAAAAATTCTCCTCATTTTTGTTGT\n+\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n@"
},
{
"path": "tests/input/TestBmtagger/expected.Match.2.fastq",
"chars": 353,
"preview": "@from_chr1_1/2\nAATTATATTATTTCTTTGATAATTTCCTCTCCTCTTGTTTCTTTGTTTCT\n+\nABABABABABABABABABABABABABABABABABABABABABABABABAB\n@"
},
{
"path": "tests/input/TestBmtagger/expected.NoMatch.1.fastq",
"chars": 236,
"preview": "@from_ebov_1/1\nGTGTGCTCAGTTGAAAATCCCTTGTCAACACCTAGGTCTTATCACATCAC\n+\nBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\n@"
},
{
"path": "tests/input/TestBmtagger/expected.NoMatch.2.fastq",
"chars": 236,
"preview": "@from_ebov_1/2\nTTATTGTTAAAGGACAGCATTAGTTCACAGTCAAACAAGCAAGATTGAGA\n+\nBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBCBC\n@"
},
{
"path": "tests/input/TestBmtagger/humanChr1Subset.fa",
"chars": 207,
"preview": ">chr1\nTCAATAAAAAAAAAAAAGAAAGAAAAAAAAATTCTCCTCATTTTTGTTGTTTTGAATTTTCAGTGATGAATCTGGAGTGGTTCTTTTTTTATTTAATGTGGTTATTTTTTGAAG"
},
{
"path": "tests/input/TestBmtagger/humanChr9Subset.fa",
"chars": 207,
"preview": ">chr9\nACAAGTGTGCCTTTGAAGGAGGAGGAGGAAGAGAGGAAGAAAGGAGAAGTGAAGACGGAGACTCTCTTTGGGCAAAGCGGGGCTTTCAGCACGTTCGGTGGTACAGCTCTGACC"
},
{
"path": "tests/input/TestBmtagger/in1.fastq",
"chars": 589,
"preview": "@from_chr1_1/1\nTCAATAAAAAAAAAAAAGAAAGAAAAAAAAATTCTCCTCATTTTTGTTGT\n+\nAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n@"
},
{
"path": "tests/input/TestBmtagger/in2.fastq",
"chars": 589,
"preview": "@from_chr1_1/2\nAATTATATTATTTCTTTGATAATTTCCTCTCCTCTTGTTTCTTTGTTTCT\n+\nABABABABABABABABABABABABABABABABABABABABABABABABAB\n@"
},
{
"path": "tests/input/TestBmtaggerDbBuild/expected/TestBmtaggerDbBuild.srprism.idx.md5",
"chars": 33,
"preview": "2e9748024dabadd49dc0862aae5f7180\n"
},
{
"path": "tests/input/TestBmtaggerDbBuild/expected/TestBmtaggerDbBuild.srprism.imp",
"chars": 1269,
"preview": "20\nBundibugyo_ebolavirus_complete_genome\nBundibugyo_ebolavirus_isolate_EboBund-14_2012_complete_genome\nCote_dIvoire_ebol"
},
{
"path": "tests/input/TestBmtaggerDbBuild/expected/TestBmtaggerDbBuild.srprism.map.md5",
"chars": 33,
"preview": "7ecc55c01c27f9569f3926587d5b1743\n"
},
{
"path": "tests/input/TestBmtaggerDbBuild/expected/TestBmtaggerDbBuild.srprism.rmp",
"chars": 0,
"preview": ""
},
{
"path": "tests/input/TestBmtaggerDbBuild/expected/TestBmtaggerDbBuild.srprism.ss.md5",
"chars": 33,
"preview": "143908e3c1d360f5c3f1dae078a8f811\n"
},
{
"path": "tests/input/TestBmtaggerDbBuild/expected/TestBmtaggerDbBuild.srprism.ssa",
"chars": 1,
"preview": "R"
},
{
"path": "tests/input/TestDepleteBlastnBam/expected.sam",
"chars": 368,
"preview": "@HD\tVN:1.4\tSO:queryname\n@RG\tID:A\tPL:9.75\tLB:Alexandria\tSM:FreeSample\tCN:KareemAbdul-Jabbar\nfrom_ebov_1\t77\t*\t0\t0\t*\t*\t0\t0\t"
},
{
"path": "tests/input/TestDepleteBlastnBam/humanChr1Subset.fa",
"chars": 207,
"preview": ">chr1\nTCAATAAAAAAAAAAAAGAAAGAAAAAAAAATTCTCCTCATTTTTGTTGTTTTGAATTTTCAGTGATGAATCTGGAGTGGTTCTTTTTTTATTTAATGTGGTTATTTTTTGAAG"
},
{
"path": "tests/input/TestDepleteBlastnBam/humanChr9Subset.fa",
"chars": 207,
"preview": ">chr9\nACAAGTGTGCCTTTGAAGGAGGAGGAGGAAGAGAGGAAGAAAGGAGAAGTGAAGACGGAGACTCTCTTTGGGCAAAGCGGGGCTTTCAGCACGTTCGGTGGTACAGCTCTGACC"
},
{
"path": "tests/input/TestDepleteHuman/partial_pan-viral-9seqs-with-human-random-subset.fasta",
"chars": 31387,
"preview": ">gi|190684443|gb|EU255973.1| Hepatitis C virus subtype 1a isolate HCV-1a/US/BID-V180/1990, complete genome\nGAGCCATGGCGTT"
},
{
"path": "tests/input/TestDifficultSampleNames/RunInfo.xml",
"chars": 681,
"preview": "<?xml version=\"1.0\"?>\r\n<RunInfo xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema"
},
{
"path": "tests/input/TestDifficultSampleNames/SampleSheet-inline-commas-strings.csv",
"chars": 1859,
"preview": "[Header],,,,,,,,,\r\nIEMFileVersion,4,,,,,,,,\r\nInvestigator Name,Adrianne,,,,,,,,\r\nExperiment Name,Zika run,,,,,,,,\r\nDate,"
},
{
"path": "tests/input/TestDifficultSampleNames/SampleSheet.csv",
"chars": 2408,
"preview": "[Header],,,,,,,,,\r\nIEMFileVersion,4,,,,,,,,\r\nInvestigator Name,James,,,,,,,,\r\nExperiment Name,EBoV HS ,,,,,,,,\r\nDate,8/1"
},
{
"path": "tests/input/TestFastaFetch/JQ610675.1.fa",
"chars": 4071,
"preview": ">JQ610675.1 Orungo virus isolate UGMP 359 segment 1, complete sequence\nGTATAATTAGGAATGGTCATCACTGTGGAAGGTGCGTGGCTGGTGAGAC"
},
{
"path": "tests/input/TestFastaFetch/JQ610675.1.fasta",
"chars": 4071,
"preview": ">JQ610675.1 Orungo virus isolate UGMP 359 segment 1, complete sequence\nGTATAATTAGGAATGGTCATCACTGTGGAAGGTGCGTGGCTGGTGAGAC"
},
{
"path": "tests/input/TestFastaFetch/JQ610676.1.fa",
"chars": 3047,
"preview": ">JQ610676.1 Orungo virus isolate UGMP 359 segment 2, complete sequence\nGTATAATTGATCCGCGATGGCTTTCGAGCGTACGGTAGCCTTAACAAGA"
},
{
"path": "tests/input/TestFastaFetch/JQ610676.1.fasta",
"chars": 3047,
"preview": ">JQ610676.1 Orungo virus isolate UGMP 359 segment 2, complete sequence\nGTATAATTGATCCGCGATGGCTTTCGAGCGTACGGTAGCCTTAACAAGA"
},
{
"path": "tests/input/TestFastaFetch/JQ610677.1.fa",
"chars": 2893,
"preview": ">JQ610677.1 Orungo virus isolate UGMP 359 segment 3, complete sequence\nGTATATATTCCTTCGCGATGGCGCATGCGGATGCTAAAGGAAGTGATCC"
},
{
"path": "tests/input/TestFastaFetch/JQ610677.1.fasta",
"chars": 2893,
"preview": ">JQ610677.1 Orungo virus isolate UGMP 359 segment 3, complete sequence\nGTATATATTCCTTCGCGATGGCGCATGCGGATGCTAAAGGAAGTGATCC"
},
{
"path": "tests/input/TestFastaFetch/JQ610678.1.fa",
"chars": 2057,
"preview": ">JQ610678.1 Orungo virus isolate UGMP 359 segment 4, complete sequence\nGTATATTTCGAAATGGAGCCCTTTGCTGTGCTGCACCTATCAGCCAAGC"
},
{
"path": "tests/input/TestFastaFetch/JQ610678.1.fasta",
"chars": 2057,
"preview": ">JQ610678.1 Orungo virus isolate UGMP 359 segment 4, complete sequence\nGTATATTTCGAAATGGAGCCCTTTGCTGTGCTGCACCTATCAGCCAAGC"
},
{
"path": "tests/input/TestFastaFetch/JQ610679.1.fa",
"chars": 1832,
"preview": ">JQ610679.1 Orungo virus isolate UGMP 359 segment 5, complete sequence\nGTAAAAAGAATCCTCTGCGTTCACTGGGCACGAACATGGATCAATTCTT"
},
{
"path": "tests/input/TestFastaFetch/JQ610679.1.fasta",
"chars": 1832,
"preview": ">JQ610679.1 Orungo virus isolate UGMP 359 segment 5, complete sequence\nGTAAAAAGAATCCTCTGCGTTCACTGGGCACGAACATGGATCAATTCTT"
},
{
"path": "tests/input/TestFastaFetch/JQ610680.1.fa",
"chars": 1700,
"preview": ">JQ610680.1 Orungo virus isolate UGMP 359 segment 6, complete sequence\nGTATAAAATACCTCGGTCATCATGGGCAAATTCGTAAAGGCCCTGAAGA"
},
{
"path": "tests/input/TestFastaFetch/JQ610680.1.fasta",
"chars": 1700,
"preview": ">JQ610680.1 Orungo virus isolate UGMP 359 segment 6, complete sequence\nGTATAAAATACCTCGGTCATCATGGGCAAATTCGTAAAGGCCCTGAAGA"
},
{
"path": "tests/input/TestFastaFetch/JQ610681.1.fa",
"chars": 1255,
"preview": ">JQ610681.1 Orungo virus isolate UGMP 359 segment 7, complete sequence\nGTATAAATATCTAGAGATGGACGCCATTGCGGCACGTGCTCTGTCCGTG"
},
{
"path": "tests/input/TestFastaFetch/JQ610681.1.fasta",
"chars": 1255,
"preview": ">JQ610681.1 Orungo virus isolate UGMP 359 segment 7, complete sequence\nGTATAAATATCTAGAGATGGACGCCATTGCGGCACGTGCTCTGTCCGTG"
},
{
"path": "tests/input/TestFastaFetch/JQ610682.1.fa",
"chars": 1177,
"preview": ">JQ610682.1 Orungo virus isolate UGMP 359 segment 8, complete sequence\nGTAAAAAATCTCCTCTCTTTTCACGATGGCTCAAGAAGTTAAGAGAAAG"
},
{
"path": "tests/input/TestFastaFetch/JQ610682.1.fasta",
"chars": 1177,
"preview": ">JQ610682.1 Orungo virus isolate UGMP 359 segment 8, complete sequence\nGTAAAAAATCTCCTCTCTTTTCACGATGGCTCAAGAAGTTAAGAGAAAG"
},
{
"path": "tests/input/TestFastaFetch/JQ610683.1.fa",
"chars": 1011,
"preview": ">JQ610683.1 Orungo virus isolate UGMP 359 segment 9, complete sequence\nGTATAAAACATCCTCCCATGTCATGTTGGTGTTGGCACCGGGTGATCTC"
},
{
"path": "tests/input/TestFastaFetch/JQ610683.1.fasta",
"chars": 1011,
"preview": ">JQ610683.1 Orungo virus isolate UGMP 359 segment 9, complete sequence\nGTATAAAACATCCTCCCATGTCATGTTGGTGTTGGCACCGGGTGATCTC"
},
{
"path": "tests/input/TestFastaFetch/JQ610684.1.fa",
"chars": 845,
"preview": ">JQ610684.1 Orungo virus isolate UGMP 359 segment 10, complete sequence\nGTATAAAATTCCGTTCGCAATGTACCGTGACTTGCTGAATATTCACAC"
},
{
"path": "tests/input/TestFastaFetch/JQ610684.1.fasta",
"chars": 845,
"preview": ">JQ610684.1 Orungo virus isolate UGMP 359 segment 10, complete sequence\nGTATAAAATTCCGTTCGCAATGTACCGTGACTTGCTGAATATTCACAC"
},
{
"path": "tests/input/TestFastaFetch/orungo.fa",
"chars": 19888,
"preview": ">JQ610675.1 Orungo virus isolate UGMP 359 segment 1, complete sequence\nGTATAATTAGGAATGGTCATCACTGTGGAAGGTGCGTGGCTGGTGAGAC"
},
{
"path": "tests/input/TestFastaFetch/orungo.fasta",
"chars": 19888,
"preview": ">JQ610675.1 Orungo virus isolate UGMP 359 segment 1, complete sequence\nGTATAATTAGGAATGGTCATCACTGTGGAAGGTGCGTGGCTGGTGAGAC"
},
{
"path": "tests/input/TestFastqBam/expected.fastq1",
"chars": 111,
"preview": "@myseq/1\nCAATAAAAAAAAAAAAGAAAGAAAAAAAAATTCTCCTCATTTTTGTTGT\n+\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n"
},
{
"path": "tests/input/TestFastqBam/expected.java1_7.sam",
"chars": 356,
"preview": "@HD\tVN:1.4\tSO:queryname\n@RG\tID:A\tPL:9.75\tLB:Alexandria\tSM:FreeSample\tCN:KareemAbdul-Jabbar\nmyseq\t77\t*\t0\t0\t*\t*\t0\t0\tTCAATA"
},
{
"path": "tests/input/TestFastqBam/expected.java1_8.sam",
"chars": 356,
"preview": "@HD\tVN:1.4\tSO:queryname\n@RG\tID:A\tLB:Alexandria\tSM:FreeSample\tCN:KareemAbdul-Jabbar\tPL:9.75\nmyseq\t77\t*\t0\t0\t*\t*\t0\t0\tTCAATA"
},
{
"path": "tests/input/TestFastqBam/expected.java1_8_v1.5.sam",
"chars": 356,
"preview": "@HD\tVN:1.5\tSO:queryname\n@RG\tID:A\tSM:FreeSample\tLB:Alexandria\tPL:9.75\tCN:KareemAbdul-Jabbar\nmyseq\t77\t*\t0\t0\t*\t*\t0\t0\tTCAATA"
},
{
"path": "tests/input/TestFastqBam/in1.fastq",
"chars": 113,
"preview": "@myseq/1\nTCAATAAAAAAAAAAAAGAAAGAAAAAAAAATTCTCCTCATTTTTGTTGT\n+\n\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\n"
},
{
"path": "tests/input/TestFastqBam/in2.fastq",
"chars": 113,
"preview": "@myseq/2\nAATTATATTATTTCTTTGATAATTTCCTCTCCTCTTGTTTCTTTGTTTCT\n+\n\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\"#\n"
},
{
"path": "tests/input/TestFastqBam/inHeader.txt",
"chars": 98,
"preview": "@HD\tVN:1.4\tSO:queryname\n@RG\tID:A\tPL:txtPlatform\tLB:txtLib\tDT:2014-11-10\tSM:txtSample\tCN:txtCenter\n"
},
{
"path": "tests/input/TestFeatureReader/GU481072.1.tbl",
"chars": 265,
"preview": ">Feature gb|GU481072.1|\n52\t1524\tgene\n\t\t\tgene_desc\tGPC\n52\t1524\tCDS\n\t\t\tproduct\tglycoprotein precursor\n\t\t\ttransl_table\t1\n\t\t"
},
{
"path": "tests/input/TestFeatureReader/GU481073.1.tbl",
"chars": 242,
"preview": ">Feature gb|GU481073.1|\n46\t345\tgene\n\t\t\tgene_desc\tZ\n46\t345\tCDS\n\t\t\tproduct\tZ protein\n\t\t\ttransl_table\t1\n\t\t\tprotein_id\tgb|AD"
},
{
"path": "tests/input/TestFeatureReader/KM821772.1.tbl",
"chars": 232,
"preview": ">Feature gb|KM821772.1|\n57\t356\tgene\n\t\t\tgene\tZ\n57\t356\tCDS\n\t\t\tproduct\tZ protein\n\t\t\ttransl_table\t1\n\t\t\tprotein_id\tgb|AIT1712"
},
{
"path": "tests/input/TestFeatureReader/KM821773.1.tbl",
"chars": 255,
"preview": ">Feature gb|KM821773.1|\n48\t1523\tgene\n\t\t\tgene\tGPC\n48\t1523\tCDS\n\t\t\tproduct\tglycoprotein precursor\n\t\t\ttransl_table\t1\n\t\t\tprot"
},
{
"path": "tests/input/TestFeatureReader/LC889323.1.tbl",
"chars": 142,
"preview": ">Feature dbj|LC889323.1|\n1\t2280\tgene\n\t\t\tgene\ttest_gene\n\t\t\tlocus_tag\tTEST_gp1\n1\t2280\tCDS\n\t\t\tproduct\ttest protein\n\t\t\tprote"
},
{
"path": "tests/input/TestFeatureReader/NC_026438.1.tbl",
"chars": 173,
"preview": ">Feature ref|NC_026438.1|\n1\t2280\tgene\n\t\t\tgene\tPB2\n\t\t\tlocus_tag\tUJ99_s1gp1\n\t\t\tdb_xref\tGeneID:23308131\n1\t2280\tCDS\n\t\t\tprodu"
},
{
"path": "tests/input/TestFeatureReader/test1-S.tbl",
"chars": 255,
"preview": ">Feature gb|KM821998.1|\n59\t1531\tgene\n\t\t\tgene\tGPC\n59\t1531\tCDS\n\t\t\tproduct\tglycoprotein precursor\n\t\t\ttransl_table\t1\n\t\t\tprot"
},
{
"path": "tests/input/TestFeatureReader/test2-L.tbl",
"chars": 232,
"preview": ">Feature gb|KM821997.1|\n76\t375\tgene\n\t\t\tgene\tZ\n76\t375\tCDS\n\t\t\tproduct\tZ protein\n\t\t\ttransl_table\t1\n\t\t\tprotein_id\tgb|AIT1757"
},
{
"path": "tests/input/TestFeatureTableFetch/JQ610675.1.table",
"chars": 123,
"preview": ">Feature gb|JQ610675.1|\n13\t3921\tCDS\n\t\t\tproduct\tVP1\n\t\t\tprot_desc\tRNA-dependent RNA polymerase\n\t\t\tprotein_id\tgb|AFX73387.1"
}
]
// ... and 519 more files (download for full content)
About this extraction
This page contains the full source code of the broadinstitute/viral-ngs GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 719 files (10.3 MB), approximately 2.7M tokens, and a symbol index with 2098 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.