Showing preview only (1,890K chars total). Download the full file or copy to clipboard to get everything.
Repository: chapmanb/cloudbiolinux
Branch: master
Commit: cd1b727402e2
Files: 458
Total size: 1.7 MB
Directory structure:
gitextract_z4wsjush/
├── .gitignore
├── .gitmodules
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── cloudbio/
│ ├── __init__.py
│ ├── biodata/
│ │ ├── __init__.py
│ │ ├── galaxy.py
│ │ ├── genomes.py
│ │ ├── ggd.py
│ │ └── rnaseq.py
│ ├── cloudbiolinux.py
│ ├── cloudman.py
│ ├── config_management/
│ │ ├── __init__.py
│ │ ├── chef.py
│ │ ├── puppet.py
│ │ └── utils.py
│ ├── custom/
│ │ ├── __init__.py
│ │ ├── bio_general.py
│ │ ├── bio_nextgen.py
│ │ ├── bio_proteomics.py
│ │ ├── bio_proteomics_wine.py
│ │ ├── cloudman.py
│ │ ├── distributed.py
│ │ ├── galaxy.py
│ │ ├── galaxy_tools.py
│ │ ├── galaxyp.py
│ │ ├── java.py
│ │ ├── millstone.py
│ │ ├── phylogeny.py
│ │ ├── python.py
│ │ ├── shared.py
│ │ ├── system.py
│ │ ├── vcr.py
│ │ └── versioncheck.py
│ ├── deploy/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── main.py
│ │ ├── plugins/
│ │ │ ├── __init__.py
│ │ │ ├── cloudman.py
│ │ │ ├── galaxy.py
│ │ │ └── gvl.py
│ │ ├── util.py
│ │ ├── vmlauncher/
│ │ │ ├── __init__.py
│ │ │ ├── config.md
│ │ │ └── transfer.py
│ │ └── volume.py
│ ├── distribution.py
│ ├── fabutils.py
│ ├── flavor/
│ │ ├── __init__.py
│ │ └── config.py
│ ├── galaxy/
│ │ ├── __init__.py
│ │ ├── applications.py
│ │ ├── r.py
│ │ ├── tools.py
│ │ └── utils.py
│ ├── libraries.py
│ ├── manifest.py
│ ├── package/
│ │ ├── __init__.py
│ │ ├── brew.py
│ │ ├── conda.py
│ │ ├── cpan.py
│ │ ├── deb.py
│ │ ├── nix.py
│ │ ├── rpm.py
│ │ └── shared.py
│ └── utils.py
├── config/
│ ├── README.md
│ ├── biodata.yaml
│ ├── chef/
│ │ └── cookbooks/
│ │ └── .gitkeep
│ ├── chef_recipes.yaml
│ ├── custom.yaml
│ ├── fabricrc.txt
│ ├── haskell-libs.yaml
│ ├── main.yaml
│ ├── node_extra.json
│ ├── packages-debian.yaml
│ ├── packages-homebrew.yaml
│ ├── packages-nix.yaml
│ ├── packages-scientificlinux.yaml
│ ├── packages-yum.yaml
│ ├── packages.yaml
│ ├── perl-libs.yaml
│ ├── puppet/
│ │ └── modules/
│ │ └── .gitkeep
│ ├── puppet_classes.yaml
│ ├── python-libs.yaml
│ ├── r-libs.yaml
│ └── ruby-libs.yaml
├── contrib/
│ ├── __init__.py
│ └── flavor/
│ ├── __init__.py
│ ├── biocloudcentral/
│ │ └── main.yaml
│ ├── biopython/
│ │ ├── custom.yaml
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ ├── packages-homebrew.yaml
│ │ ├── packages.yaml
│ │ └── python-libs.yaml
│ ├── boinc/
│ │ ├── __init__.py
│ │ ├── boincflavor.py
│ │ ├── fabricrc_debian.txt
│ │ └── main.yaml
│ ├── cloudman/
│ │ ├── README.md
│ │ ├── cloudman/
│ │ │ └── main.yaml
│ │ ├── cloudman_and_galaxy/
│ │ │ └── main.yaml
│ │ ├── cloudman_and_galaxyp/
│ │ │ └── main.yaml
│ │ ├── cloudman_desktop_and_galaxyp/
│ │ │ ├── main.yaml
│ │ │ └── ruby-libs.yaml
│ │ ├── migration_checklist.md
│ │ └── tools.yaml
│ ├── cwl_dockers/
│ │ └── packages-bcbio-alignment.yaml
│ ├── demo/
│ │ ├── README.md
│ │ ├── custom.yaml
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ └── packages-homebrew.yaml
│ ├── edx_course/
│ │ ├── custom.yaml
│ │ ├── edx_setup.sh
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ ├── packages-homebrew.yaml
│ │ └── python-libs.yaml
│ ├── globus/
│ │ └── main.yaml
│ ├── millstone/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── custom.yaml
│ │ ├── installer.py
│ │ ├── main.yaml
│ │ ├── millstoneflavor.py
│ │ └── python-libs.yaml
│ ├── minimal/
│ │ ├── fabricrc_debian.txt
│ │ └── main.yaml
│ ├── neuro/
│ │ ├── __init__.py
│ │ ├── custom.yaml
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ ├── neuro.py
│ │ └── packages.yaml
│ ├── ngs_pipeline_minimal/
│ │ ├── custom.yaml
│ │ ├── main.yaml
│ │ ├── packages-conda.yaml
│ │ ├── packages-homebrew.yaml
│ │ ├── perl-libs.yaml
│ │ └── r-libs.yaml
│ ├── phylogeny/
│ │ ├── __init__.py
│ │ ├── fabricrc_debian.txt
│ │ ├── fabricrc_ubuntu.txt
│ │ ├── install_debian.sh
│ │ ├── install_ubuntu.sh
│ │ ├── main.yaml
│ │ ├── phylogenyflavor.py
│ │ └── virtualbox.md
│ ├── pjotrp/
│ │ ├── __init__.py
│ │ └── biotest/
│ │ ├── __init__.py
│ │ ├── biotestflavor.py
│ │ ├── fabricrc_debian.txt
│ │ └── main.yaml
│ ├── proteomics/
│ │ ├── galaxyp/
│ │ │ ├── README.md
│ │ │ ├── main.yaml
│ │ │ ├── settings-sample-galaxyp.yaml
│ │ │ └── tools.yaml
│ │ └── swift/
│ │ ├── custom.yaml
│ │ ├── main.yaml
│ │ └── r-libs.yaml
│ ├── seal/
│ │ ├── __init__.py
│ │ ├── fabricrc_sl.txt
│ │ ├── main.yaml
│ │ └── sealflavor.py
│ └── variantviz/
│ ├── custom.yaml
│ ├── fabricrc.txt
│ ├── main.yaml
│ └── packages-yum.yaml
├── contributors.mkd
├── data_fabfile.py
├── deploy/
│ ├── README.md
│ ├── TODO
│ ├── Vagrantfile
│ ├── cloudman.html
│ ├── cloudman.md
│ ├── config/
│ │ └── tool_data_table_conf.xml
│ ├── deploy.sh
│ ├── deploy_bourne.sh
│ ├── deploy_no_deps.sh
│ ├── requirements.txt
│ ├── settings-sample-cm.yaml
│ ├── settings-sample-minimal.yaml
│ ├── settings-sample-oldgalaxyvmlauncher.yaml
│ ├── test_install_galaxy_tool.py
│ └── update_dependencies.sh
├── doc/
│ ├── Makefile
│ ├── hacking.md
│ ├── intro/
│ │ ├── FAQ.tex
│ │ ├── README
│ │ ├── basicTerminology.aux
│ │ ├── basicTerminology.tex
│ │ ├── cloudbl_desktopIntro.aux
│ │ ├── cloudbl_desktopIntro.tex
│ │ ├── getReady.aux
│ │ ├── getReady.tex
│ │ ├── gettingStarted_CloudBioLinux.aux
│ │ ├── gettingStarted_CloudBioLinux.out
│ │ ├── gettingStarted_CloudBioLinux.tex
│ │ ├── gettingStarted_CloudBioLinux.toc
│ │ ├── images/
│ │ │ ├── createAndMountVol-1.odg
│ │ │ ├── nutshell.odg
│ │ │ └── unmountDetach-1.odg
│ │ ├── tips.tex
│ │ ├── usefulLinks.tex
│ │ ├── workingOnCloudBL.aux
│ │ ├── workingOnCloudBL.tex
│ │ ├── workingWithData.aux
│ │ └── workingWithData.tex
│ ├── linux_kvm.md
│ ├── private_cloud.md
│ ├── remote_gui.md
│ ├── source/
│ │ ├── conf.py
│ │ ├── framework.rst
│ │ └── index.rst
│ └── virtualbox.md
├── fabfile.py
├── ggd-recipes/
│ ├── BDGP6/
│ │ ├── gtf.yaml
│ │ ├── mirbase.yaml
│ │ ├── seq.yaml
│ │ └── transcripts.yaml
│ ├── GRCh37/
│ │ ├── 1000g.yaml
│ │ ├── 1000g_omni_snps.yaml
│ │ ├── 1000g_snps.yaml
│ │ ├── ACMG56_genes.yaml
│ │ ├── GA4GH_problem_regions.yaml
│ │ ├── GRCh37_NCBI2ensembl.txt
│ │ ├── MIG.yaml
│ │ ├── RADAR.yaml
│ │ ├── af_only_gnomad.yaml
│ │ ├── ancestral.yaml
│ │ ├── battenberg.yaml
│ │ ├── capture_regions.yaml
│ │ ├── clinvar.yaml
│ │ ├── cosmic.yaml
│ │ ├── dbnsfp.yaml
│ │ ├── dbscsnv.yaml
│ │ ├── dbsnp.yaml
│ │ ├── dream-syn3.yaml
│ │ ├── dream-syn4.yaml
│ │ ├── ericscript.yaml
│ │ ├── esp.yaml
│ │ ├── exac.yaml
│ │ ├── fusion-blacklist.yaml
│ │ ├── genesplicer.yaml
│ │ ├── giab-NA12878-NA24385-somatic.yaml
│ │ ├── giab-NA12878.yaml
│ │ ├── giab-NA24143.yaml
│ │ ├── giab-NA24149.yaml
│ │ ├── giab-NA24385.yaml
│ │ ├── giab-NA24631.yaml
│ │ ├── giab-NA24694.yaml
│ │ ├── giab-NA24695.yaml
│ │ ├── gnomad.yaml
│ │ ├── gnomad_exome.yaml
│ │ ├── gnomad_sv.yaml
│ │ ├── hapmap.yaml
│ │ ├── mills_indels.yaml
│ │ ├── prioritize.yaml
│ │ ├── qsignature.yaml
│ │ ├── seq.yaml
│ │ ├── topmed.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ ├── varpon.yaml
│ │ ├── vcfanno.yaml
│ │ └── viral.yaml
│ ├── GRCz11/
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ ├── README.md
│ ├── Sscrofa11.1/
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ ├── TAIR10/
│ │ └── mirbase.yaml
│ ├── canFam3/
│ │ ├── dbsnp.yaml
│ │ ├── mirbase.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ ├── hg19/
│ │ ├── 1000g.yaml
│ │ ├── 1000g_omni_snps.yaml
│ │ ├── 1000g_snps.yaml
│ │ ├── ACMG56_genes.yaml
│ │ ├── GA4GH_problem_regions.yaml
│ │ ├── MIG.yaml
│ │ ├── RADAR.yaml
│ │ ├── af_only_gnomad.yaml
│ │ ├── battenberg.yaml
│ │ ├── capture_regions.yaml
│ │ ├── clinvar.yaml
│ │ ├── cosmic.yaml
│ │ ├── dbsnp.yaml
│ │ ├── effects_transcripts.yaml
│ │ ├── esp.yaml
│ │ ├── exac.yaml
│ │ ├── fusion-blacklist.yaml
│ │ ├── giab-NA12878.yaml
│ │ ├── giab-NA24143.yaml
│ │ ├── giab-NA24149.yaml
│ │ ├── giab-NA24385.yaml
│ │ ├── giab-NA24631.yaml
│ │ ├── gnomad.yaml
│ │ ├── gnomad_exome.yaml
│ │ ├── gnomad_genome.grch37_to_hg19.sh
│ │ ├── gtf.yaml
│ │ ├── hapmap.yaml
│ │ ├── mills_indels.yaml
│ │ ├── mirbase.yaml
│ │ ├── platinum-genome-NA12878.yaml
│ │ ├── prioritize.yaml
│ │ ├── purecn_mappability.yaml
│ │ ├── rmsk.yaml
│ │ ├── seq.yaml
│ │ ├── simple_repeat.yaml
│ │ ├── topmed.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ ├── varpon.yaml
│ │ └── viral.yaml
│ ├── hg38/
│ │ ├── 1000g_indels.yaml
│ │ ├── 1000g_omni_snps.yaml
│ │ ├── 1000g_snps.yaml
│ │ ├── ACMG56_genes.yaml
│ │ ├── RADAR.yaml
│ │ ├── README.md
│ │ ├── af_only_gnomad.yaml
│ │ ├── bwa.yaml
│ │ ├── canonical_cancer_99.txt
│ │ ├── capture_regions.yaml
│ │ ├── ccds.yaml
│ │ ├── clinvar.yaml
│ │ ├── coverage.yaml
│ │ ├── dbnsfp.yaml
│ │ ├── dbscsnv.yaml
│ │ ├── dbsnp.yaml
│ │ ├── dream-syn3-crossmap.yaml
│ │ ├── dream-syn4-crossmap.yaml
│ │ ├── effects_transcripts.yaml
│ │ ├── ericscript.yaml
│ │ ├── esp.yaml
│ │ ├── exac.yaml
│ │ ├── fusion-blacklist.yaml
│ │ ├── genesplicer.yaml
│ │ ├── genotype2phenotype.yaml
│ │ ├── giab-NA12878-NA24385-somatic.yaml
│ │ ├── giab-NA12878-crossmap.yaml
│ │ ├── giab-NA12878-remap.yaml
│ │ ├── giab-NA12878.yaml
│ │ ├── giab-NA24143.yaml
│ │ ├── giab-NA24149.yaml
│ │ ├── giab-NA24385.yaml
│ │ ├── giab-NA24631.yaml
│ │ ├── giab-NA24694.yaml
│ │ ├── giab-NA24695.yaml
│ │ ├── gnomad.yaml
│ │ ├── gnomad_exome.yaml
│ │ ├── gnomad_fields_to_keep.txt
│ │ ├── gtf.yaml
│ │ ├── hapmap_snps.yaml
│ │ ├── hisat2.yaml
│ │ ├── mills_indels.yaml
│ │ ├── mirbase.yaml
│ │ ├── platinum-genome-NA12878.yaml
│ │ ├── prioritize.yaml
│ │ ├── purecn_mappability.yaml
│ │ ├── qsignature.yaml
│ │ ├── rmsk.yaml
│ │ ├── salmon-decoys.yaml
│ │ ├── seq.yaml
│ │ ├── simple_repeat.yaml
│ │ ├── topmed.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ ├── varpon.yaml
│ │ ├── vcfanno.yaml
│ │ └── viral.yaml
│ ├── hg38-noalt/
│ │ ├── RADAR.yaml
│ │ ├── README.md
│ │ ├── bowtie2.yaml
│ │ ├── bwa.yaml
│ │ ├── gtf.yaml
│ │ ├── mirbase.yaml
│ │ ├── seq.yaml
│ │ └── transcripts.yaml
│ ├── mm10/
│ │ ├── dbsnp.yaml
│ │ ├── mirbase.yaml
│ │ ├── prioritize.yaml
│ │ ├── problem_regions.yaml
│ │ ├── rmsk.yaml
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ └── vcfanno.yaml
│ ├── rn6/
│ │ ├── mirbase.yaml
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ └── sacCer3/
│ ├── seq.yaml
│ └── transcripts.yaml
├── installed_files/
│ ├── bash_history
│ ├── bash_login
│ ├── ec2autorun.py
│ ├── galaxy_default.template
│ ├── galaxy_init
│ ├── galaxyp_nginx.conf.template
│ ├── image_user_data
│ ├── ipython_config.py
│ ├── jwmrc.xml
│ ├── nginx.conf.template
│ ├── nginx_init
│ ├── novnc_default.template
│ ├── novnc_init
│ ├── pg_ctl
│ ├── proftpd.conf.template
│ ├── protvis_default.template
│ ├── protvis_init
│ ├── psql
│ ├── setupnx.sh
│ ├── tool_data_table_conf.xml
│ ├── vncserver_default.template
│ ├── vncserver_init
│ ├── xstartup
│ ├── xvfb_default
│ └── xvfb_init
├── manifest/
│ ├── custom-packages.yaml
│ ├── debian-packages.yaml
│ ├── python-packages.yaml
│ └── r-packages.yaml
├── setup.py
├── test/
│ ├── README
│ ├── test_biolinux
│ ├── test_vagrant
│ └── testlib/
│ ├── test_biolinux.rb
│ └── test_support.rb
└── utils/
├── bootstrap.sh
├── cbl_exome_setup.py
├── cbl_installed_software.py
├── convert_to_xz.py
├── cwl2yaml_packages.py
├── get_biolinux_packages.py
├── get_yum_packages.py
├── images_and_snapshots.py
├── prep_esp_hg38.py
├── prepare_cosmic.py
├── prepare_dbsnp.py
├── prepare_tx_gff.py
├── prioritize/
│ ├── AZ300.txt
│ ├── AZ300_with_known.txt
│ ├── az-cancer-panel.txt
│ ├── az300_to_bed.py
│ ├── prep_ccds_genes.py
│ └── prep_prioritize_downloads.sh
├── query_conda_deps.py
├── s3_multipart_upload.py
└── sv/
├── NA24385_crowd_dels.py
└── NA24385_giab_dels.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
tags
cloudbiolinux.egg-info/
build/
doc/build
dist/
*.pem
dpkg.lst
*.log
venv/
venv_cbl/
deploy/build
deploy/keys
deploy/settings.yaml
deploy/.venv-deploy
deploy/.vagrant
.ropeproject
.idea
================================================
FILE: .gitmodules
================================================
[submodule "config/puppet/modules/apache"]
path = config/puppet/modules/apache
url = git://github.com/puppetlabs/puppetlabs-apache.git
[submodule "config/puppet/modules/concat"]
path = config/puppet/modules/concat
url = git://github.com/puppetlabs/puppetlabs-concat.git
[submodule "config/puppet/modules/firewall"]
path = config/puppet/modules/firewall
url = git://github.com/puppetlabs/puppetlabs-firewall.git
[submodule "config/puppet/modules/stdlib"]
path = config/puppet/modules/stdlib
url = git://github.com/puppetlabs/puppetlabs-stdlib.git
[submodule "config/puppet/modules/vcsrepo"]
path = config/puppet/modules/vcsrepo
url = git://github.com/puppetlabs/puppetlabs-vcsrepo.git
[submodule "config/puppet/modules/biocloudcentral"]
path = config/puppet/modules/biocloudcentral
url = git://github.com/bioconfig/puppet-biocloudcentral.git
[submodule "config/puppet/modules/python"]
path = config/puppet/modules/python
url = git://github.com/bioconfig/puppet-python.git
[submodule "config/chef/cookbooks/globus"]
path = config/chef/cookbooks/globus
url = git://github.com/bioconfig/chef-globus.git
[submodule "config/puppet/modules/lwr"]
path = config/puppet/modules/lwr
url = git://github.com/bioconfig/puppet-lwr.git
================================================
FILE: LICENSE.txt
================================================
Copyright (c) 2013 CloudBioLinux contributors
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
================================================
FILE: MANIFEST.in
================================================
include *fabfile.py
include *.md
include *.mkd
include config/*.yaml
include config/fabricrc.txt
include config/README.md
include doc/*.md
include installed_files/*
include utils/*
================================================
FILE: README.rst
================================================
CloudBioLinux is a build and deployment system which installs an easily
customizable selection of bioinformatics and machine learning libraries on a
linux container, bare virtual machine (VM) image, freshly installed PC, or in
the cloud. CloudBioLinux is a curated and community developed set of
instructions for tools provided by operating system packages (debs and RPMs),
external packaging efforts (`bioconda <https://bioconda.github.io/>`_ and
`homebrew-science <https://github.com/Homebrew/homebrew-science>`_)
and language specific library installers (Python, R, Perl and Ruby).
CloudBioLinux included software packages are fully customizable. In
addition to the default configuration, we support custom configuration
builds through flavors. Flavors support overriding default package
installations, making it simple to create derived installs for specific
purposes.
CloudBioLinux is a single install route for `Docker containers <http://www.docker.com/>`_
,desktop VMs such as `VirtualBox <http://digitizor.com/2011/01/07/virtualbox-4-0-install-ubuntu/>`_,
cloud providers such as `Amazon EC2 <http://aws.amazon.com/ec2/>`_ or
desktop machines. This works equally well for other virtual machines and
private cloud environments, including `XEN <http://xen.org/>`_, Linux
`KVM <http://www.linux-kvm.org/>`_,
`Eucalyptus <http://open.eucalyptus.com/>`_ and
`Openstack <http://www.openstack.org/>`_.
Quick start
===========
`bcbio <http://bcbio-nextgen.readthedocs.io/en/latest/>`_ uses CloudBioLinux as
the basis for tool installation and provides a large set of supported and tested
tools. If you're looking to bootstrap a system with tools and data for high
throughput sequencing analysis, we suggest using the `bcbio installer
<http://bcbio-nextgen.readthedocs.io/en/latest/contents/installation.html#automated>`_
which fully wraps CloudBioLinux and provides an easy path to customize install
directories, organisms and biological data installed.
To modify/add data/package recipies to bcbio, edit the below configs:
- bcbio data recipes: https://github.com/chapmanb/cloudbiolinux/tree/master/ggd-recipes
- bcbio data index1: https://github.com/chapmanb/cloudbiolinux/blob/master/config/biodata.yaml
- bcbio data index2: https://github.com/bcbio/bcbio-nextgen/tree/master/config/genomes
- bcbio conda packages index: https://github.com/chapmanb/cloudbiolinux/blob/master/contrib/flavor/ngs_pipeline_minimal/packages-conda.yaml
We recommend using, or developing, a custom flavor to choose tools of interest
to install. The amount of bioinformatics software continues to increase -- there are
`over 1000 recipes in bioconda <https://github.com/bioconda/bioconda-recipes>`_
-- and it's difficult to come up with a default installation that includes
everything for everyone. The ``ngs_pipeline_minimal`` flavor has the set of NGS
analysis tools installed with bcbio and is a good starting point for
understanding the CloudBioLinux install process. To install inside an isolated
conda environment on a bare machine do::
git clone https://github.com/chapmanb/cloudbiolinux.git
cd cloudbiolinux
wget https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh
bash Miniconda2-latest-Linux-x86_64.sh -b -p ~/cblenv
~/cblenv/bin/conda install -y -c bioconda -c conda-forge pip fabric pyyaml
~/cblenv/bin/fab -f fabfile.py -H localhost install_biolinux:flavor=ngs_pipeline_minimal --set keep_isolated=true
Installing CloudBioLinux on a local machine
===========================================
The install process for CloudBioLinux is fully automated through a `Fabric build
file <http://fabfile.org/>`_ written in Python. Everything is fully configurable
through plain text YAML configuration files, and custom build targets allow
installation of a subset of the total available packages.
Setup
-----
Retrieve the CloudBioLinux code base and install fabric::
pip install fabric
git clone git://github.com/chapmanb/cloudbiolinux.git
cd cloudbiolinux
Usage
-----
The basic usage specifies the hostname of a machine accessible via ssh or the
local machine::
fab -f fabfile.py -H localhost install_biolinux
Fabric contains some other useful commandline arguments for customizing
this to your environments:
- ``-c your_fabricrc.txt`` -- Specify the path to a fabricrc
configuration files. This allows customization of install directories
and other server specific details. See the default
``config/fabricrc.txt`` for a full list of options.
- ``-u username`` -- The username on a remote machine, overriding the
default of your current username.
Customization with flavors
--------------------------
In most cases you want to customize a specific set of packages,
or install into an isolated directory without root access, using flavors::
fab -f fabfile.py -H localhost install_biolinux:flavor=my_flavor
``my_flavor`` can be the name of an existing flavor in
``contrib/flavor`` or the path to a directory with customization
information. The files in your flavor directory replace those in the
standard ``config`` directory, allowing replacement of any of the
configuration files like ``main.yaml`` with customized copies.
If you desire even more control, flavors allow custom python hooks. See
``doc/hacking.md`` for more details.
The best place to get started is the `demo flavor
<https://github.com/chapmanb/cloudbiolinux/tree/master/contrib/flavor/demo>`_
included with CloudBioLinux. This installs a small number of common packages
into an isolated directory (``~/tmp/cbl_demo`` by default), without root access.
Run the example with::
fab -f fabfile.py -H localhost install_biolinux:flavor=demo
Specific install targets
------------------------
You can substitute ``install_biolinux`` with more specific targets to
only build portions of CloudBioLinux:
- ``install_biolinux:packages`` -- Install all of the defined system
packages.
- ``install_biolinux:libraries`` -- Install all libraries for various
programming languages.
- ``install_biolinux:brew`` -- Install homebrew packages only.
- ``install_libraries:language`` -- Install libraries for a specific
language.
- ``install_biolinux:custom`` -- Install all custom programs.
- ``install_brew:a_package_name`` -- Install a specific brew package.
- ``install_custom:a_package_name`` -- Install a specific custom
program.
Homebrew package installation
-----------------------------
`Homebrew <https://github.com/Homebrew/homebrew>`_ and `Linuxbrew
<https://github.com/Homebrew/linuxbrew>`_ provide a Ruby-based environment for
installing packages on MacOSX and Linux. The active
`homebrew-science <https://github.com/Homebrew/homebrew-science>`_ packaging
community maintains a number of common scientific tools. We also maintain a
`homebrew-cbl <https://github.com/chapmanb/homebrew-cbl>`_ repository with tools
not yet integrated into homebrew-science.
CloudBioLinux manages installation of the Linuxbrew or Homebrew framework and
pulls in the ``homebrew/science`` and ``chapmanb/cbl`` taps, as well as
injecting your current compilers into the homebrew build scripts. To install a
`supported package
<https://github.com/chapmanb/cloudbiolinux/blob/master/config/packages-homebrew.yaml>`_
using CloudBioLinux::
fab -f fabfile.py -H localhost install_custom:bedtools
Specific package installation
-----------------------------
The custom directory contains installation instructions for programs
that are not available from standard package repositories, written in Python
using the `Fabric <http://fabfile.org/>`_ remote deployment tool. To install
individual `custom packages
<https://github.com/chapmanb/cloudbiolinux/blob/master/config/custom.yaml>`_::
fab -f fabfile.py -H localhost install_custom:your_package_name
We prefer using the Homebrew framework for new packages over writing custom
packages.
Biological data
---------------
We manage a repository of useful public biological data on an `Amazon S3
bucket <http://s3.amazonaws.com/biodata>`_. Currently this includes
whole genomes pre-indexed for a number of popular aligners. Downloading
and installing these saves a ton of time over running the indexing steps
yourself, and eases running next-generation analyses on cloud machines.
A Fabric build script is provided to install this data on your local
machine. A `biodata configuration file in YAML
format <https://github.com/chapmanb/cloudbiolinux/blob/master/config/biodata.yaml>`_,
``config/biodata.yaml``, specifies the genomes of interest and the
aligner indexes to use. The ``config/fabricrc.txt`` file specifies
details about the system and where to install the data.
The basic commandline is::
fab -f data_fabfile.py -H your_machine install_data_s3
and you can pass in custom biodata and fabricrc files with::
fab -f data_fabfile.py -H your_machine -c your_fabricrc.txt install_data_s3:your_biodata.yaml
In addition to downloading and preparing the data, the script will
integrate these files with a Galaxy instance by updating appropriate
Galaxy configuration files. This makes it useful for installing data to
a local or
`cloud-based <https://bitbucket.org/galaxy/galaxy-central/wiki/cloud>`_
Galaxy server.
Not all of the genomes are hosted on the S3 bucket, but are still supported. If your
genome fails to install with install_data_s3, you might be able to download the genome
from Ensembl, etc and prepare it::
fab -f data_fabfile.py -H your_machine -c your_fabricrc.txt install_data:your_biodata.yaml
Using pre-built cloud images
============================
Amazon
------
See the 'Getting Started with CloudBioLinux' guide on the `CloudBioLinux
website <http://cloudbiolinux.org/>`_ for a detailed description. The
short version for users familiar with Amazon is:
- Login to the `Amazon EC2
console <https://console.aws.amazon.com/ec2/home>`_.
- Click Launch Instance, and choose the latest CloudBioLinux AMI from
the `website <http://cloudbiolinux.org/>`_ in the community AMI
section (search for 'CloudBioLinux').
- After launching the instance, find the host details of your running
instance from the Instances section.
- Connect to your machine via ssh or VNC (using the Amazon PEM keys)
Supported environments
======================
Docker
------
`Docker <http://www.docker.com/>`_ provides lightweight local containers for
Linux machines, allowing isolation without the associated overhead of full
virtual machines. Include any of the standard CloudBioLinux commands inside
a `Dockerfile <http://docs.docker.com/reference/builder/>`_ to use CloudBioLinux
to build up the set of tools on your instance. See the
`Dockerfile examples <http://docs.docker.com/installation/#examples>`_ for
information how to write Dockerfiles.
To use a pre-built Docker image made with CloudBioLinux infrastructure, using
this `bcbio-nextgen Dockerfile
<https://github.com/chapmanb/bcbio-nextgen/blob/master/Dockerfile>`_, you can
import the `bcbio-nextgen <https://github.com/chapmanb/bcbio-nextgen>`_
container into your local docker environment::
docker import https://s3.amazonaws.com/bcbio_nextgen/bcbio-nextgen-docker-image.gz chapmanb/bcbio-nextgen-cbl
Amazon
------
A bare Linux image launched in Amazon EC2 is configured from another
machine, i.e. your local desktop, using ssh and cloudbiolinux. See the
Installation section for installing CloudBioLinux with fabric.
Any cloudbiolinux distribution can be used, including Ubuntu, Debian
Linux and CentOS. We recommend using m1.medium or better instance for building a
CloudBioLinux image from scratch, due to resource usage while compiling
software.
1. Go to the cloudbiolinux source and edit the ``config/fabricrc.txt``,
to match the system you plan to install on. Specifically,
``distribution`` and ``dist_name`` parameters specify details about
the type of target.
2. Start an Amazon EC2 base instance and retrieve it's DNS hostname:
- `Alestic Ubuntu images <http://alestic.com/>`_
- `Camptocamp Debian
images <http://www.camptocamp.com/en/infrastructure-solutions/amazon-images>`_
3. From your local machine, have CloudBioLinux install your Amazon
instance:
::
fab -f fabfile.py -H hostname -u username -i private_key_file install_biolinux
4. When finished, use the `Amazon
console <https://console.aws.amazon.com/ec2/home>`_ to create an AMI.
Thereafter make it public so it can be used by others.
Vagrant and VirtualBox
----------------------
Vagrant allows easy deploying and connecting to VirtualBox images. The
setup is ideal for running CloudBioLinux on a desktop computer. Install
`VirtualBox <https://www.virtualbox.org/>`_
and `vagrant <http://vagrantup.com/>`_.
See `the VirtualBox and Vagrant documentation
<https://github.com/chapmanb/cloudbiolinux/blob/master/doc/virtualbox.md>`_ for
details on creating a local virtual machine from scratch with CloudBioLinux.
Through Vagrant additional facilities are available, such as a shared
network drive. It is also possible to tweak the image (e.g. RAM/CPU
settings, and getting the all important guest additions) by firing up
virtualbox itself. For more information, see the
documentation on the `Vagrant website <http://vagrantup.com/>`_.
OpenStack/XEN/KVM/Eucalyptus private Cloud
------------------------------------------
As long as there is an 'ssh' entry to an running VM, CloudBioLinux can
install itself.
For more on private Cloud and CloudBioLinux see ./doc/private\_cloud.md.
EC2 quickstart
==============
This provides a quick cheat sheet of commands for getting up and running
on EC2 using Amazon's command line tools.
Initial set up
--------------
The first time using EC2, you'll need to install the toolkit and
credentials for connecting on your local machine, following the `getting
started
guide <http://docs.amazonwebservices.com/AWSEC2/latest/GettingStartedGuide/>`_.
Login to your `Amazon EC2 account <http://aws.amazon.com/account/>`_ and
go to Security Credentials/X.509. Create a new certificate and download
the public ``cert-*.pem`` and ``private pk-*.pem`` files. Put these in
``~.ec2``.
Install the `ec2 api
tools <http://developer.amazonwebservices.com/connect/entry.jspa?externalID=351&categoryID=88>`_,
which require java.
Set up .zshrc/.bashrc:
::
export EC2_PRIVATE_KEY=~/.ec2/pk-UBH43XTAWVNQMIZRAV3RP5IIBAPBIFVP.pem
export EC2_CERT=~/.ec2/cert-UBH43XTAWVNQMIZRAV3RP5IIBAPBIFVP.pem
export AWS_ACCESS_KEY_ID=<your access key>
export AWS_SECRET_ACCESS_KEY=<your secret access key>
To test, you should be able to run the command:
::
% ec2-describe-regions
Now generate a privatekey for logging in:
::
% ec2-add-keypair yourmachine-keypair
This will produce an RSA private key. You should copy and paste this to
your .ec2 directory for future use:
::
% vim ~/.ec2/id-yourmachine.keypair
% chmod 600 ~/.ec2/id-yourmachine.keypair
Allow ssh and web access to your instances:
::
% ec2-authorize default -p 22
% ec2-authorize default -p 80
Starting an instance
--------------------
Each time you'd like to use EC2, you need to create a remote instance to
work with; the `AWS console <http://alestic.com/>`_ is useful for
managing this process.
When building from scratch with Alestic images, you will need to
increase the size of the root filesystem to fit all of the CloudBioLinux
data and libraries. This is done by starting the instance from the
commandline with:
::
% ec2-run-instances ami-1aad5273 -k kunkel-keypair -t m1.large
-b /dev/sda1=:20
% ec2-describe-instances i-0ca39764
On Ubuntu 10.04, you then need to ssh into the instance and resize the
filesystem with:
::
% sudo resize2fs /dev/sda1
On 11.04 the resize happens automatically and this is not required.
Testing
=======
BioLinux comes with an integration testing frame work - currently based
on Vagrant. Try:
::
cd test
./testing_vagrant --help
Target VMs can be listed with
::
./testing_vagrant --list
Build a minimal VM
::
./testing_vagrant Minimal
Documentation
=============
Additional documentation can be found in the `./doc
directory <https://github.com/chapmanb/cloudbiolinux>`_ in the BioLinux
source tree.
LICENSE
=======
The code is freely available under the `MIT
license <http://www.opensource.org/licenses/mit-license.html>`_.
================================================
FILE: cloudbio/__init__.py
================================================
"""Module level code supporting CloudBioLinux installations.
This provides a reusable architecture allowing definitions and helper code to be used
in other projects.
"""
__import__('pkg_resources').declare_namespace(__name__)
================================================
FILE: cloudbio/biodata/__init__.py
================================================
"""Download, installation and configuration of biological data.
"""
================================================
FILE: cloudbio/biodata/galaxy.py
================================================
"""Retrieve indexed genomes using Galaxy's rsync server resources.
http://wiki.galaxyproject.org/Admin/Data%20Integration
"""
from __future__ import print_function
import os
import shutil
import subprocess
from xml.etree import ElementTree
from cloudbio.custom import shared
# ## Compatibility definitions
server = "rsync://datacache.g2.bx.psu.edu"
index_map = {"bismark": "bismark_index",
"bowtie": "bowtie_index",
"bowtie2": "bowtie2_index",
"bwa": "bwa_index",
"novoalign": "novoalign_index",
"ucsc": "seq",
"seq": "sam_index"}
org_remap = {"phix": "phiX",
"GRCh37": "hg_g1k_v37",
"araTha_tair9": "Arabidopsis_thaliana_TAIR9",
"araTha_tair10": "Arabidopsis_thaliana_TAIR10",
"WS210": "ce10",
"WS220": "ce10"}
galaxy_subdirs = ["", "/microbes"]
# ## Galaxy location files
class LocCols(object):
# Hold all possible .loc file column fields making sure the local
# variable names match column names in Galaxy's tool_data_table_conf.xml
def __init__(self, config, dbkey, file_path):
self.dbkey = dbkey
self.path = file_path
self.value = config.get("value", dbkey)
self.name = config.get("name", dbkey)
self.species = config.get('species', '')
self.index = config.get('index', 'index')
self.formats = config.get('index', 'fastqsanger')
self.dbkey1 = config.get('index', dbkey)
self.dbkey2 = config.get('index', dbkey)
def _get_tool_conf(env, tool_name):
"""
Parse the tool_data_table_conf.xml from installed_files subfolder and extract
values for the 'columns' tag and 'path' parameter for the 'file' tag, returning
those as a dict.
"""
tool_conf = {}
tdtc = ElementTree.parse(env.tool_data_table_conf_file)
tables = tdtc.getiterator('table')
for t in tables:
if tool_name in t.attrib.get('name', ''):
tool_conf['columns'] = t.find('columns').text.replace(' ', '').split(',')
tool_conf['file'] = t.find('file').attrib.get('path', '')
return tool_conf
def _build_galaxy_loc_line(env, dbkey, file_path, config, prefix, tool_name):
"""Prepare genome information to write to a Galaxy *.loc config file.
"""
if tool_name:
str_parts = []
tool_conf = _get_tool_conf(env, tool_name)
loc_cols = LocCols(config, dbkey, file_path)
# Compose the .loc file line as str_parts list by looking for column values
# from the retrieved tool_conf (as defined in tool_data_table_conf.xml).
# Any column values required but missing in the tool_conf are
# supplemented by the defaults defined in LocCols class
for col in tool_conf.get('columns', []):
str_parts.append(config.get(col, getattr(loc_cols, col)))
else:
str_parts = [dbkey, file_path]
if prefix:
str_parts.insert(0, prefix)
return str_parts
def update_loc_file(env, ref_file, line_parts):
"""Add a reference to the given genome to the base index file.
"""
if getattr(env, "galaxy_home", None) is not None:
tools_dir = os.path.join(env.galaxy_home, "tool-data")
if not os.path.exists(tools_dir):
subprocess.check_call("mkdir -p %s" % tools_dir, shell=True)
dt_file = os.path.join(env.galaxy_home, "tool_data_table_conf.xml")
if not os.path.exists(dt_file):
shutil.copy(env.tool_data_table_conf_file, dt_file)
add_str = "\t".join(line_parts)
with shared.chdir(tools_dir):
if not os.path.exists(ref_file):
subprocess.check_call("touch %s" % ref_file, shell=True)
has_line = False
with open(ref_file) as in_handle:
for line in in_handle:
if line.strip() == add_str.strip():
has_line = True
if not has_line:
with open(ref_file, "a") as out_handle:
out_handle.write(add_str + "\n")
def prep_locs(env, gid, indexes, config):
"""Prepare Galaxy location files for all available indexes.
"""
for ref_index_file, cur_index, prefix, tool_name in [
("alignseq.loc", indexes.get("ucsc", None), "seq", None),
("bismark_indices.loc", indexes.get("bismark", None), "", 'bismark_indexes'),
("bowtie2_indices.loc", indexes.get("bowtie2", None), "", 'bowtie2_indexes'),
("bowtie_indices.loc", indexes.get("bowtie", None), "", 'bowtie_indexes'),
("bwa_index.loc", indexes.get("bwa", None), "", 'bwa_indexes'),
("gatk_sorted_picard_index.loc", indexes.get("seq", None), "", "gatk_picard_indexes"),
("mosaik_index.loc", indexes.get("mosaik", None), "", "mosaik_indexes"),
("novoalign_indices.loc", indexes.get("novoalign", None), "", "novoalign_indexes"),
("picard_index.loc", indexes.get("seq", None), "", "picard_indexes"),
("sam_fa_indices.loc", indexes.get("seq", None), "", 'sam_fa_indexes'),
("twobit.loc", indexes.get("ucsc", None), "", None)]:
if cur_index:
str_parts = _build_galaxy_loc_line(env, gid, cur_index, config, prefix, tool_name)
update_loc_file(env, ref_index_file, str_parts)
# ## Finalize downloads
def index_picard(ref_file):
"""Provide a Picard style dict index file for a reference genome.
"""
index_file = "%s.dict" % os.path.splitext(ref_file)[0]
if not os.path.exists(index_file):
subprocess.check_call("picard -Xms500m -Xmx3500m CreateSequenceDictionary REFERENCE={ref} OUTPUT={out}"
.format(ref=ref_file, out=index_file), shell=True)
return index_file
def _finalize_index_seq(fname):
"""Convert UCSC 2bit file into fasta file.
"""
out_fasta = fname + ".fa"
if not os.path.exists(out_fasta):
subprocess.check_call("twoBitToFa {base}.2bit {out}".format(
base=fname, out=out_fasta), shell=True)
finalize_fns = {"ucsc": _finalize_index_seq,
"seq": index_picard}
def _finalize_index(idx, fname):
"""Perform final processing on an rsync'ed index file if necessary.
"""
finalize_fn = finalize_fns.get(idx)
if finalize_fn:
finalize_fn(fname)
# ## Retrieve data from Galaxy
def rsync_genomes(genome_dir, genomes, genome_indexes):
"""Top level entry point to retrieve rsync'ed indexes from Galaxy.
"""
for gid in (x[1] for x in genomes):
galaxy_gid = org_remap.get(gid, gid)
indexes = _get_galaxy_genomes(galaxy_gid, genome_dir, genomes, genome_indexes)
_finalize_index("ucsc", indexes["ucsc"])
for idx, fname in indexes.iteritems():
_finalize_index(idx, fname)
prep_locs(galaxy_gid, indexes, {})
def _get_galaxy_genomes(gid, genome_dir, genomes, genome_indexes):
"""Retrieve the provided genomes and indexes from Galaxy rsync.
"""
out = {}
org_dir = os.path.join(genome_dir, gid)
if not os.path.exists(org_dir):
subprocess.check_call('mkdir -p %s' % org_dir, shell=True)
for idx in genome_indexes:
galaxy_index_name = index_map.get(idx)
index_file = None
if galaxy_index_name:
index_file = _rsync_genome_index(gid, galaxy_index_name, org_dir)
if index_file:
out[idx] = index_file
else:
print("Galaxy does not support {0} for {1}".format(idx, gid))
return out
def _rsync_genome_index(gid, idx, org_dir):
"""Retrieve index for a genome from rsync server, returning path to files.
"""
idx_dir = os.path.join(org_dir, idx)
if not os.path.exists(idx_dir):
org_rsync = None
for subdir in galaxy_subdirs:
test_rsync = "{server}/indexes{subdir}/{gid}/{idx}/".format(
server=server, subdir=subdir, gid=gid, idx=idx)
try:
subprocess.check_output("rsync --list-only {server}".format(server=test_rsync))
org_rsync = test_rsync
except subprocess.CalledProcessError:
pass
if org_rsync is None:
raise ValueError("Could not find genome %s on Galaxy rsync" % gid)
try:
subprocess.check_call("rsync --list-only {server}".format(server=org_rsync), shell=True)
if not os.path.exists(idx_dir):
subprocess.check_call('mkdir -p %s' % idx_dir, shell=True)
with cd(idx_dir):
subprocess.check_call("rsync -avzP {server} {idx_dir}".format(server=org_rsync,
idx_dir=idx_dir), shell=True)
except subprocess.CalledProcessError:
pass
if os.path.exists(idx_dir):
try:
subprocess.check_call("ls {idx_dir}/{gid}.fa*".format(idx_dir=idx_dir,
gid=gid), shell=True)
ext = ".fa" if (has_fa_ext.succeeded and idx not in ["seq"]) else ""
except subprocess.CalledProcessError:
pass
return os.path.join(idx_dir, gid + ext)
================================================
FILE: cloudbio/biodata/genomes.py
================================================
"""Download and install structured genome data and aligner index files.
Downloads prepared FASTA, indexes for aligners like BWA, Bowtie and novoalign
and other genome data in automated pipelines. Specify the genomes and aligners
to use in an input biodata.yaml configuration file.
The main targets are fabric functions:
- install_data -- Install biological data from scratch, including indexing genomes.
- install_data_s3 -- Install biological data, downloading pre-computed indexes from S3.
- upload_s3 -- Upload created indexes to biodata S3 bucket.
"""
from __future__ import print_function
import collections
import os
import operator
import socket
import subprocess
import sys
import traceback
from math import log
try:
import yaml
except ImportError:
yaml = None
try:
import boto
except ImportError:
boto = None
from cloudbio.biodata import galaxy, ggd, rnaseq
from cloudbio.custom import shared
# -- Configuration for genomes to download and prepare
class _DownloadHelper:
def __init__(self):
self.config = {}
def ucsc_name(self):
return None
def _exists(self, fname, seq_dir):
"""Check if a file exists in either download or final destination.
"""
return os.path.exists(fname) or os.path.exists(os.path.join(seq_dir, fname))
class UCSCGenome(_DownloadHelper):
def __init__(self, genome_name, dl_name=None):
_DownloadHelper.__init__(self)
self.data_source = "UCSC"
self._name = genome_name
self.dl_name = dl_name if dl_name is not None else genome_name
self._url = "ftp://hgdownload.cse.ucsc.edu/goldenPath/%s/bigZips" % \
genome_name
def ucsc_name(self):
return self._name
def _karyotype_sort(self, xs):
"""Sort reads in karyotypic order to work with GATK's defaults.
"""
def karyotype_keyfn(x):
base = os.path.splitext(os.path.basename(x))[0]
if base.startswith("chr"):
base = base[3:]
parts = base.split("_")
try:
base = int(parts[0])
except ValueError:
base = sys.maxsize
# unplaced at the very end
if parts[0] == "Un":
parts.insert(0, "z")
# mitochondrial special case -- after X/Y
elif parts[0] in ["M", "MT"]:
parts.insert(0, "x")
# sort random and extra chromosomes after M
elif len(parts) > 1:
parts.insert(0, "y")
# standard integers, sort first
else:
parts.insert(0, "a")
return [base] + parts
return sorted(xs, key=karyotype_keyfn)
def _split_multifasta(self, fasta_file):
chrom = ""
file_handle = None
file_names = []
out_dir = os.path.dirname(fasta_file)
with open(fasta_file) as in_handle:
for line in in_handle:
if line.startswith(">"):
chrom = line.split(">")[1].strip()
file_handle.close() if file_handle else None
file_names.append(chrom + ".fa")
file_handle = open(os.path.join(out_dir, chrom + ".fa"), "w")
file_handle.write(line)
else:
file_handle.write(line)
file_handle.close()
return file_names
def download(self, seq_dir):
zipped_file = None
genome_file = "%s.fa" % self._name
if not self._exists(genome_file, seq_dir):
prep_dir = "seq_prep"
subprocess.check_call("mkdir -p %s" % prep_dir, shell=True)
with shared.chdir(prep_dir):
zipped_file = self._download_zip(seq_dir)
if zipped_file.endswith(".tar.gz"):
subprocess.check_call("tar -xzpf %s" % zipped_file, shell=True)
elif zipped_file.endswith(".zip"):
subprocess.check_call("unzip %s" % zipped_file, shell=True)
elif zipped_file.endswith(".gz"):
if not os.path.exists("out.fa"):
subprocess.check_call("gunzip -c %s > out.fa" % zipped_file, shell=True)
else:
raise ValueError("Do not know how to handle: %s" % zipped_file)
tmp_file = genome_file.replace(".fa", ".txt")
result = subprocess.check_output("find `pwd` -name '*.fa'", shell=True).decode()
result = [x.strip() for x in result.split("\n")]
if len(result) == 1:
orig_result = result[0]
result = self._split_multifasta(result[0])
subprocess.check_call("rm %s" % orig_result, shell=True)
result = self._karyotype_sort(result)
subprocess.check_call("rm -f inputs.txt", shell=True)
for fname in result:
subprocess.check_output("echo '%s' >> inputs.txt" % fname, shell=True).decode()
subprocess.check_call("cat `cat inputs.txt` > %s" % (tmp_file), shell=True)
for fname in result:
subprocess.check_output("rm -f %s" % fname, shell=True).decode()
subprocess.check_call("mv %s %s" % (tmp_file, genome_file), shell=True)
zipped_file = os.path.join(prep_dir, zipped_file)
genome_file = os.path.join(prep_dir, genome_file)
return genome_file, [zipped_file]
def _download_zip(self, seq_dir):
for zipped_file in ["chromFa.tar.gz", "%s.fa.gz" % self._name,
"chromFa.zip"]:
if not self._exists(zipped_file, seq_dir):
result = shared._remote_fetch(None, "%s/%s" % (self._url, zipped_file), allow_fail=True)
if result:
break
else:
break
return zipped_file
class NCBIRest(_DownloadHelper):
"""Retrieve files using the TogoWS REST server pointed at NCBI.
"""
def __init__(self, name, refs, dl_name=None):
_DownloadHelper.__init__(self)
self.data_source = "NCBI"
self._name = name
self._refs = refs
self.dl_name = dl_name if dl_name is not None else name
self._base_url = "http://togows.dbcls.jp/entry/ncbi-nucleotide/%s.fasta"
def download(self, seq_dir):
genome_file = "%s.fa" % self._name
if not self._exists(genome_file, seq_dir):
for ref in self._refs:
shared._remote_fetch(None, self._base_url % ref)
subprocess.check_call("ls -l", shell=True)
subprocess.check_call(r"sed -i 's/^>.*$/>%s/' %s.fasta" % (ref, ref), shell=True)
tmp_file = genome_file.replace(".fa", ".txt")
subprocess.check_call("cat *.fasta > %s" % tmp_file, shell=True)
subprocess.check_call("rm -f *.fasta", shell=True)
subprocess.check_call("rm -f *.bak", shell=True)
subprocess.check_call("mv %s %s" % (tmp_file, genome_file), shell=True)
return genome_file, []
class VectorBase(_DownloadHelper):
"""Retrieve genomes from VectorBase) """
def __init__(self, name, genus, species, strain, release, assembly_types):
_DownloadHelper.__init__(self)
self._name = name
self.data_source = "VectorBase"
self._base_url = ("http://www.vectorbase.org/sites/default/files/ftp/"
"downloads/")
_base_file = ("{genus}-{species}-{strain}_{assembly}"
"_{release}.fa.gz")
self._to_get = []
for assembly in assembly_types:
self._to_get.append(_base_file.format(**locals()))
def download(self, seq_dir):
genome_file = "%s.fa" % self._name
for fn in self._to_get:
url = self._base_url + fn
if not self._exists(fn, seq_dir):
shared._remote_fetch(None, url)
subprocess.check_call("gunzip -c %s >> %s" % (fn, genome_file), shell=True)
return genome_file, []
class EnsemblGenome(_DownloadHelper):
"""Retrieve genome FASTA files from Ensembl.
ftp://ftp.ensemblgenomes.org/pub/plants/release-22/fasta/
arabidopsis_thaliana/dna/Arabidopsis_thaliana.TAIR10.22.dna.toplevel.fa.gz
ftp://ftp.ensembl.org/pub/release-75/fasta/
caenorhabditis_elegans/dna/Caenorhabditis_elegans.WBcel235.75.dna.toplevel.fa.gz
ftp://ftp.ensemblgenomes.org/pub/bacteria/release-23/bacteria/fasta/
bacteria_17_collection/pseudomonas_aeruginosa_ucbpp_pa14/dna/
Pseudomonas_aeruginosa_ucbpp_pa14.GCA_000014625.1.23.dna.toplevel.fa.gz
"""
def __init__(self, ensembl_section, release, organism, name, subsection=None):
_DownloadHelper.__init__(self)
self.data_source = "Ensembl"
if ensembl_section == "standard":
url = "ftp://ftp.ensembl.org/pub/"
else:
url = "ftp://ftp.ensemblgenomes.org/pub/%s/" % ensembl_section
url += "release-%s/fasta/" % release
if subsection:
url += "%s/" % subsection
url += "%s/dna/" % organism.lower()
self._url = url
if ensembl_section == "standard":
self._get_file = "%s.%s.dna.toplevel.fa.gz" % (organism, name)
else:
self._get_file = "%s.%s.%s.dna.toplevel.fa.gz" % (organism, name, release)
self._name = name
self.dl_name = name
def download(self, seq_dir):
genome_file = "%s.fa" % self._name
if not self._exists(self._get_file, seq_dir):
shared._remote_fetch(None, "%s%s" % (self._url, self._get_file))
if not self._exists(genome_file, seq_dir):
subprocess.check_call("gunzip -c %s > %s" % (self._get_file, genome_file), shell=True)
return genome_file, [self._get_file]
class BroadGenome(_DownloadHelper):
"""Retrieve genomes organized and sorted by Broad for use with GATK.
Uses the UCSC-name compatible versions of the GATK bundles.
"""
def __init__(self, name, target_fasta, dl_name=None):
_DownloadHelper.__init__(self)
self.data_source = "UCSC"
self._name = name
self.dl_name = dl_name if dl_name is not None else name
self._target = target_fasta
self._ftp_url = "ftp://gsapubftp-anonymous:@ftp.broadinstitute.org/bundle/" + \
"{org}/".format(org=self.dl_name)
def download(self, seq_dir):
org_file = "%s.fa" % self._name
if not self._exists(org_file, seq_dir):
shared._remote_fetch(None, "%s%s.gz" % (self._ftp_url, self._target))
subprocess.check_call("gunzip %s.gz" % self._target, shell=True)
subprocess.check_call("mv %s %s" % (self._target, org_file), shell=True)
return org_file, []
class GGDGenome:
"""Genome with download specified via a GGD recipe.
"""
def __init__(self, name):
self._name = name
GENOMES_SUPPORTED = [
("phiX174", "phix", NCBIRest("phix", ["NC_001422.1"])),
("Scerevisiae", "sacCer3", UCSCGenome("sacCer3")),
("Mmusculus", "mm10", UCSCGenome("mm10")),
("Mmusculus", "mm9", UCSCGenome("mm9")),
("Mmusculus", "mm8", UCSCGenome("mm8")),
("Hsapiens", "hg18", BroadGenome("hg18", "Homo_sapiens_assembly18.fasta")),
("Hsapiens", "hg19", BroadGenome("hg19", "ucsc.hg19.fasta")),
("Hsapiens", "GRCh37", BroadGenome("GRCh37", "human_g1k_v37.fasta", "b37")),
("Hsapiens", "hg38", GGDGenome("hg38")),
("Hsapiens", "hg38-noalt", GGDGenome("hg38-noalt")),
("Rnorvegicus", "rn6", GGDGenome("rn6")),
("Rnorvegicus", "rn5", UCSCGenome("rn5")),
("Rnorvegicus", "rn4", UCSCGenome("rn4")),
("Xtropicalis", "xenTro3", UCSCGenome("xenTro3")),
("Athaliana", "TAIR10", EnsemblGenome("plants", "26",
"Arabidopsis_thaliana", "TAIR10")),
("Dmelanogaster", "dm3", UCSCGenome("dm3")),
("Dmelanogaster", "BDGP6", GGDGenome("BDGP6")),
("Celegans", "WBcel235", EnsemblGenome("standard", "80",
"Caenorhabditis_elegans", "WBcel235")),
("Mtuberculosis_H37Rv", "mycoTube_H37RV", NCBIRest("mycoTube_H37RV",
["NC_000962"])),
("Msmegmatis", "92", NCBIRest("92", ["NC_008596.1"])),
("Paeruginosa_UCBPP-PA14", "pseudomonas_aeruginosa_ucbpp_pa14",
EnsemblGenome("bacteria", "26", "Pseudomonas_aeruginosa_ucbpp_pa14",
"GCA_000014625.1", "bacteria_17_collection")),
("Ecoli", "eschColi_K12", NCBIRest("eschColi_K12", ["U00096.2"])),
("Amellifera_Honeybee", "apiMel3", UCSCGenome("apiMel3")),
("Cfamiliaris_Dog", "canFam3", UCSCGenome("canFam3")),
("Cfamiliaris_Dog", "canFam2", UCSCGenome("canFam2")),
("Drerio_Zebrafish", "Zv9", EnsemblGenome("standard", "80", "Danio_rerio", "Zv9")),
("Drerio_Zebrafish", "GRCz10", EnsemblGenome("standard", "81", "Danio_rerio", "GRCz10")),
("Drerio_Zebrafish", "GRCz11", EnsemblGenome("standard", "92", "Danio_rerio", "GRCz11")),
("Sscrofa", "Sscrofa11.1", EnsemblGenome("standard", "92", "Sus_scrofa", "Sscrofa11.1")),
("Ecaballus_Horse", "equCab2", UCSCGenome("equCab2")),
("Fcatus_Cat", "felCat3", UCSCGenome("felCat3")),
("Ggallus_Chicken", "galGal4", UCSCGenome("galGal4")),
("Tguttata_Zebra_finch", "taeGut1", UCSCGenome("taeGut1")),
("Aalbimanus", "AalbS1", VectorBase("AalbS1", "Anopheles",
"albimanus", "STECLA",
"AalbS1", ["SCAFFOLDS"])),
("Agambiae", "AgamP3", VectorBase("AgamP3", "Anopheles",
"gambiae", "PEST",
"AgamP3", ["CHROMOSOMES"]))]
GENOME_INDEXES_SUPPORTED = ["bowtie", "bowtie2", "bwa", "maq", "minimap2", "novoalign",
"novoalign-cs", "ucsc", "mosaik", "snap", "star",
"rtg", "hisat2", "bbmap", "bismark"]
DEFAULT_GENOME_INDEXES = ["seq"]
# -- Fabric instructions
def _check_version(env):
version = env.version
if int(version.split(".")[0]) < 1:
raise NotImplementedError("Please install fabric version 1 or better")
def install_data(config_source, approaches=None):
"""Main entry point for installing useful biological data, back compatible.
"""
from fabric.api import env
_check_version(env)
install_data_local(config_source, env.system_install, env.data_files,
env.galaxy_home, env.tool_data_table_conf_file, env.cores, approaches)
def install_data_local(config_source, system_installdir, data_filedir,
galaxy_home=None, tool_data_table_conf_file=None,
cores=None, approaches=None):
"""Local installation of biological data, avoiding fabric usage.
"""
if not cores:
cores = 1
PREP_FNS = {"s3": _download_s3_index,
"ggd": _install_with_ggd,
"raw": _prep_raw_index}
if approaches is None: approaches = ["ggd", "s3", "raw"]
ready_approaches = []
Env = collections.namedtuple("Env", "system_install, galaxy_home, tool_data_table_conf_file, cores")
env = Env(system_installdir, galaxy_home, tool_data_table_conf_file, cores)
for approach in approaches:
ready_approaches.append((approach, PREP_FNS[approach]))
# Append a potentially custom system install path to PATH so tools are found
os.environ["PATH"] = "%s/bin:%s" % (os.path.join(system_installdir), os.environ["PATH"])
genomes, genome_indexes, config = _get_genomes(config_source)
genome_indexes = [x for x in DEFAULT_GENOME_INDEXES if x not in genome_indexes] + genome_indexes
_make_genome_directories(genomes, data_filedir)
rnaseq.cleanup(genomes, data_filedir)
_prep_genomes(env, genomes, genome_indexes, ready_approaches, data_filedir)
rnaseq.finalize(genomes, data_filedir)
def install_data_s3(config_source):
"""Install data using pre-existing genomes present on Amazon s3.
"""
from fabric.api import env
_check_version(env)
genomes, genome_indexes, config = _get_genomes(config_source)
genome_indexes += [x for x in DEFAULT_GENOME_INDEXES if x not in genome_indexes]
_make_genome_directories(genomes, env.data_files)
rnaseq.cleanup(genomes, env.data_files)
_download_genomes(env, genomes, genome_indexes)
rnaseq.finalize(genomes, env.data_files)
_install_additional_data(env, genomes, genome_indexes, config)
def install_data_rsync(config_source):
"""Install data using pre-existing genomes from Galaxy rsync servers.
"""
from fabric.api import env
_check_version(env)
genomes, genome_indexes, config = _get_genomes(config_source)
genome_indexes += [x for x in DEFAULT_GENOME_INDEXES if x not in genome_indexes]
# Galaxy stores FASTAs in ucsc format and generates on the fly
if "ucsc" not in genome_indexes:
genome_indexes.append("ucsc")
genome_dir = _make_genome_dir(env.data_files)
galaxy.rsync_genomes(genome_dir, genomes, genome_indexes)
def upload_s3(config_source):
"""Upload prepared genome files by identifier to Amazon s3 buckets.
"""
from fabric.api import env
if boto is None:
raise ImportError("install boto to upload to Amazon s3")
if env.host != "localhost" and not env.host.startswith(socket.gethostname()):
raise ValueError("Need to run S3 upload on a local machine")
_check_version(env)
genomes, genome_indexes, config = _get_genomes(config_source)
genome_indexes += [x for x in DEFAULT_GENOME_INDEXES if x not in genome_indexes]
_data_ngs_genomes(env, genomes, genome_indexes)
_upload_genomes(env, genomes, genome_indexes)
def _install_additional_data(env, genomes, genome_indexes, config):
for custom in (config.get("custom") or []):
_prep_custom_genome(custom, genomes, genome_indexes, env)
if config.get("install_liftover", False):
lift_over_genomes = [g.ucsc_name() for (_, _, g) in genomes if g.ucsc_name()]
_data_liftover(env, lift_over_genomes)
if config.get("install_uniref", False):
_data_uniref(env)
def _get_genomes(config_source):
if isinstance(config_source, dict):
config = config_source
else:
if yaml is None:
raise ImportError("install yaml to read configuration from %s" % config_source)
with open(config_source) as in_handle:
config = yaml.load(in_handle)
genomes = []
genomes_config = config["genomes"] or []
print("List of genomes to get (from the config file at '{0}'): {1}"
.format(config_source, ', '.join(g.get('name', g["dbkey"]) for g in genomes_config)))
for g in genomes_config:
ginfo = None
for info in GENOMES_SUPPORTED:
if info[1] == g["dbkey"]:
ginfo = info
break
assert ginfo is not None, "Did not find download info for %s" % g["dbkey"]
name, gid, manager = ginfo
manager.config = g
genomes.append((name, gid, manager))
indexes = config["genome_indexes"] or []
if "seq" in indexes:
indexes.remove("seq")
indexes.insert(0, "seq")
return genomes, indexes, config
# ## Decorators and context managers
def _if_installed(pname):
"""Run if the given program name is installed.
"""
def argcatcher(func):
def decorator(*args, **kwargs):
envs = [x for x in args if hasattr(x, "system_install")]
env = envs[0] if envs else None
if shared.which(pname, env):
return func(*args, **kwargs)
return decorator
return argcatcher
# ## Generic preparation functions
def _make_genome_dir(data_filedir):
genome_dir = os.path.join(data_filedir, "genomes")
subprocess.check_output("mkdir -p %s" % genome_dir, shell=True).decode()
return genome_dir
def _make_genome_directories(genomes, data_filedir):
genome_dir = _make_genome_dir(data_filedir)
for (orgname, gid, manager) in genomes:
org_dir = os.path.join(genome_dir, orgname, gid)
if not os.path.exists(org_dir):
subprocess.check_call('mkdir -p %s' % org_dir, shell=True)
def _prep_genomes(env, genomes, genome_indexes, retrieve_fns, data_filedir):
"""Prepare genomes with the given indexes, supporting multiple retrieval methods.
"""
genome_dir = _make_genome_dir(data_filedir)
for (orgname, gid, manager) in genomes:
org_dir = os.path.join(genome_dir, orgname, gid)
if not os.path.exists(org_dir):
subprocess.check_call('mkdir -p %s' % org_dir, shell=True)
ggd_recipes = manager.config.get("annotations", []) + manager.config.get("validation", [])
ggd_recipes += [x for x in manager.config.get("indexes", []) if x in genome_indexes]
for idx in genome_indexes + ggd_recipes:
with shared.chdir(org_dir):
if idx in ggd_recipes or not os.path.exists(idx):
finished = False
last_exc = None
for method, retrieve_fn in retrieve_fns:
try:
retrieve_fn(env, manager, gid, idx)
finished = True
break
except KeyboardInterrupt:
raise
except BaseException as e:
# Fail on incorrect GGD recipes
if idx in ggd_recipes and method == "ggd":
raise
else:
last_exc = traceback.format_exc()
print("Moving on to next genome prep method after trying {0}\n{1}".format(
method, str(e)))
if not finished:
raise IOError("Could not prepare index {0} for {1} by any method\n{2}"
.format(idx, gid, last_exc))
ref_file = os.path.join(org_dir, "seq", "%s.fa" % gid)
if not os.path.exists(ref_file):
ref_file = os.path.join(org_dir, "seq", "%s.fa" % manager._name)
assert os.path.exists(ref_file), ref_file
_index_to_galaxy(env, org_dir, ref_file, gid, genome_indexes, manager.config)
# ## Genomes index for next-gen sequencing tools
def _get_ref_seq(manager):
"""Check for or retrieve the reference sequence.
"""
seq_dir = os.path.join(os.getcwd(), "seq")
ref_file = os.path.join(seq_dir, "%s.fa" % manager._name)
if not os.path.exists(ref_file):
ref_file, base_zips = manager.download(seq_dir)
ref_file = _move_seq_files(ref_file, base_zips, seq_dir)
return ref_file
def _prep_raw_index(env, manager, gid, idx):
"""Prepare genome from raw downloads and indexes.
"""
print("Preparing genome {0} with index {1}".format(gid, idx))
ref_file = _get_ref_seq(manager)
get_index_fn(idx)(env, ref_file)
def _data_ngs_genomes(env, genomes, genome_indexes):
"""Download and create index files for next generation genomes.
"""
genome_dir = _make_genome_dir(env.data_files)
for organism, genome, manager in genomes:
cur_dir = os.path.join(genome_dir, organism, genome)
print("Processing genome {0} and putting it to {1}".format(organism, cur_dir))
if not os.path.exists(cur_dir):
subprocess.check_call('mkdir -p %s' % cur_dir, shell=True)
with shared.chdir(cur_dir):
if hasattr(env, "remove_old_genomes") and env.remove_old_genomes:
_clean_genome_directory()
seq_dir = 'seq'
ref_file, base_zips = manager.download(seq_dir)
ref_file = _move_seq_files(ref_file, base_zips, seq_dir)
cur_indexes = manager.config.get("indexes", genome_indexes)
_index_to_galaxy(env, cur_dir, ref_file, genome, cur_indexes, manager.config)
def _index_to_galaxy(env, work_dir, ref_file, gid, genome_indexes, config):
"""Index sequence files and update associated Galaxy loc files.
"""
indexes = {}
with shared.chdir(work_dir):
for idx in genome_indexes:
index_file = get_index_fn(idx)(env, ref_file)
if index_file:
indexes[idx] = os.path.join(work_dir, index_file)
galaxy.prep_locs(env, gid, indexes, config)
class CustomMaskManager:
"""Create a custom genome based on masking an existing genome.
"""
def __init__(self, custom, config):
assert "mask" in custom
self._custom = custom
self.config = config
def download(self, seq_dir):
base_seq = os.path.join(os.pardir, self._custom["base"],
"seq", "{0}.fa".format(self._custom["base"]))
assert os.path.exists(base_seq)
mask_file = os.path.basename(self._custom["mask"])
ready_mask = apply("{0}-complement{1}".format, os.path.splitext(mask_file))
out_fasta = "{0}.fa".format(self._custom["dbkey"])
if not os.path.exists(os.path.join(seq_dir, out_fasta)):
if not os.path.exists(mask_file):
shared._remote_fetch(None, self._custom["mask"])
if not os.path.exists(ready_mask):
subprocess.check_call("bedtools complement -i {i} -g {g}.fai > {o}".format(
i=mask_file, g=base_seq, o=ready_mask), shell=True)
if not os.path.exists(out_fasta):
subprocess.check_call("bedtools maskfasta -fi {fi} -bed {bed} -fo {fo}".format(
fi=base_seq, bed=ready_mask, fo=out_fasta), shell=True)
return out_fasta, [mask_file, ready_mask]
def _prep_custom_genome(custom, genomes, genome_indexes, env):
"""Prepare a custom genome derived from existing genome.
Allows creation of masked genomes for specific purposes.
"""
cur_org = None
cur_manager = None
for org, gid, manager in genomes:
if gid == custom["base"]:
cur_org = org
cur_manager = manager
break
assert cur_org is not None
_data_ngs_genomes(env, [[cur_org, custom["dbkey"],
CustomMaskManager(custom, cur_manager.config)]],
genome_indexes)
def _clean_genome_directory():
"""Remove any existing sequence information in the current directory.
"""
for dirname in GENOME_INDEXES_SUPPORTED + DEFAULT_GENOME_INDEXES:
if os.path.exists(dirname):
subprocess.check_call("rm -rf %s" % dirname, shell=True)
def _move_seq_files(ref_file, base_zips, seq_dir):
if not os.path.exists(seq_dir):
subprocess.check_call('mkdir %s' % seq_dir, shell=True)
for move_file in [ref_file] + base_zips:
if os.path.exists(move_file):
subprocess.check_call("mv %s %s" % (move_file, seq_dir), shell=True)
path, fname = os.path.split(ref_file)
moved_ref = os.path.join(path, seq_dir, fname)
assert os.path.exists(moved_ref), moved_ref
return moved_ref
# ## Indexing for specific aligners
def _index_w_command(env, dir_name, command, ref_file, pre=None, post=None, ext=None):
"""Low level function to do the indexing and paths with an index command.
"""
path_export = _get_path_export(env)
index_name = os.path.splitext(os.path.basename(ref_file))[0]
if ext is not None: index_name += ext
full_ref_path = os.path.join(os.pardir, ref_file)
if not os.path.exists(dir_name):
subprocess.check_call("mkdir %s" % dir_name, shell=True)
with shared.chdir(dir_name):
if pre:
full_ref_path = pre(full_ref_path)
subprocess.check_call(path_export + command.format(ref_file=full_ref_path, index_name=index_name),
shell=True)
if post:
post(full_ref_path)
return os.path.join(dir_name, index_name)
@_if_installed("faToTwoBit")
def _index_twobit(env, ref_file):
"""Index reference files using 2bit for random access.
"""
dir_name = "ucsc"
cmd = "faToTwoBit {ref_file} {index_name}"
return _index_w_command(env, dir_name, cmd, ref_file)
def _index_bowtie(env, ref_file):
dir_name = "bowtie"
cmd = "bowtie-build -f {ref_file} {index_name}"
return _index_w_command(env, dir_name, cmd, ref_file)
def _index_bowtie2(env, ref_file):
dir_name = "bowtie2"
cmd = "bowtie2-build {ref_file} {index_name}"
out_suffix = _index_w_command(env, dir_name, cmd, ref_file)
bowtie_link = os.path.normpath(os.path.join(os.path.dirname(ref_file), os.path.pardir,
out_suffix + ".fa"))
relative_ref_file = os.path.relpath(ref_file, os.path.dirname(bowtie_link))
if not os.path.exists(bowtie_link):
subprocess.check_call("ln -sf %s %s" % (relative_ref_file, bowtie_link), shell=True)
return out_suffix
def _index_bwa(env, ref_file):
dir_name = "bwa"
local_ref = os.path.split(ref_file)[-1]
if not os.path.exists(os.path.join(dir_name, "%s.bwt" % local_ref)):
subprocess.check_call("mkdir -p %s" % dir_name, shell=True)
with shared.chdir(dir_name):
subprocess.check_call("ln -sf %s" % os.path.join(os.pardir, ref_file), shell=True)
try:
subprocess.check_call("bwa index -a bwtsw %s" % local_ref, shell=True)
except subprocess.CalledProcessError:
# work around a bug in bwa indexing for small files
subprocess.check_call("bwa index %s" % local_ref, shell=True)
subprocess.check_call("rm -f %s" % local_ref, shell=True)
return os.path.join(dir_name, local_ref)
def _index_bbmap(env, ref_file):
dir_name = "bbmap"
try:
cores = env.cores
except:
cores = 1
if not os.path.exists(os.path.join(dir_name, "ref", "genome", "1", "summary.txt")):
subprocess.check_call("mkdir -p %s" % dir_name, shell=True)
subprocess.check_call("bbmap.sh -Xms%sg -Xmx24g path=%s ref=%s" %
(cores, dir_name, ref_file), shell=True)
return dir_name
def _index_bismark(env, ref_file):
dir_name = "bismark"
subprocess.check_call("mkdir -p %s" % dir_name, shell=True)
out_dir = os.path.join(dir_name, "Bisulfite_Genome")
if os.path.exists(out_dir):
return out_dir
with shared.chdir(dir_name):
local = os.path.basename(ref_file)
subprocess.check_call("ln -sf {0} {1}".format(ref_file, local), shell=True)
cmd= f"bismark_genome_preparation ."
subprocess.check_call(cmd, shell=True)
return out_dir
def _index_maq(env, ref_file):
dir_name = "maq"
cmd = "maq fasta2bfa {ref_file} {index_name}"
def link_local(ref_file):
local = os.path.basename(ref_file)
subprocess.check_call("ln -sf {0} {1}".format(ref_file, local), shell=True)
return local
def rm_local(local_file):
subprocess.check_call("rm -f {0}".format(local_file), shell=True)
return _index_w_command(env, dir_name, cmd, ref_file, pre=link_local, post=rm_local)
def _index_minimap2(env, ref_file):
dir_name = "minimap2"
indexes = []
for preset in ["sr"]:
index_name = "%s-%s.mmi" % (os.path.splitext(os.path.basename(ref_file))[0], preset)
cmd = "minimap2 -x %s -d %s {ref_file}" % (preset, index_name)
out_basename = _index_w_command(env, dir_name, cmd, ref_file)
indexes.append(os.path.join(os.path.dirname(out_basename), index_name))
return indexes[0]
@_if_installed("novoindex")
def _index_novoalign(env, ref_file):
dir_name = "novoalign"
cmd = "novoindex {index_name} {ref_file}"
return _index_w_command(env, dir_name, cmd, ref_file)
@_if_installed("novoalignCS")
def _index_novoalign_cs(env, ref_file):
dir_name = "novoalign_cs"
cmd = "novoindex -c {index_name} {ref_file}"
return _index_w_command(env, dir_name, cmd, ref_file)
def _index_sam(env, ref_file):
(ref_dir, local_file) = os.path.split(ref_file)
with shared.chdir(ref_dir):
if not os.path.exists("%s.fai" % local_file):
subprocess.check_call("samtools faidx %s" % local_file, shell=True)
galaxy.index_picard(ref_file)
return ref_file
@_if_installed("STAR")
def _index_star(env, ref_file):
(ref_dir, local_file) = os.path.split(ref_file)
build = os.path.basename(os.path.splitext(ref_file)[0])
dir_name = os.path.normpath(os.path.join(ref_dir, os.pardir, "star"))
sentinel_file = os.path.join(dir_name, "SA")
if os.path.exists(sentinel_file):
return dir_name
if build == "hg38":
simple_file = os.path.splitext(ref_file)[0] + "-simple.fa"
print(f"hg38 detected, building a simple reference with no alts, decoys or HLA from {ref_file} to {simple_file}.")
ref_file = prepare_simple_reference(ref_file, simple_file)
GenomeLength = os.path.getsize(ref_file)
Nbases = int(round(min(14, log(GenomeLength, 2) / 2 - 2), 0))
# if there is a large number of contigs, scale nbits down
# https://github.com/alexdobin/STAR/issues/103#issuecomment-173009628
# if there is a small genome, scale nbits down
# https://groups.google.com/forum/#!topic/rna-star/9g8Uoe1Igho
cmd = 'grep ">" {ref_file} | wc -l'.format(ref_file=ref_file)
nrefs = float(subprocess.check_output(cmd, shell=True).decode())
nbits = int(round(min(14, log(GenomeLength / nrefs, 2), log(GenomeLength, 2) / 2 - 1)))
# first we estimate the number of bits we need to hold the genome and allocate
# double that plus some padding to build the index
mem = ((GenomeLength + 1) / nbits + 1) * nbits
mem = (mem + 10000) * 2
mem = mem + mem / 3
mem = max(mem, 30000000000)
try:
cpu = env.cores
except:
cpu = 1
print(f"Preparing STAR index from {ref_file}.")
cmd = ("STAR --genomeDir %s --genomeFastaFiles {ref_file} "
"--runThreadN %s "
"--limitGenomeGenerateRAM %s "
"--genomeChrBinNbits %s "
"--runMode genomeGenerate "
"--genomeSAindexNbases %s" % (dir_name, str(cpu), str(mem), Nbases,
nbits))
if not os.path.exists(os.path.join(dir_name, "SA")):
_index_w_command(env, dir_name, cmd, ref_file)
if build == "hg38":
print(f"Removing {ref_file}.")
os.remove(ref_file)
return dir_name
@_if_installed("hisat2-build")
def _index_hisat2(env, ref_file):
path_export = _get_path_export(env)
build = os.path.splitext(os.path.basename(ref_file))[0]
(ref_dir, local_file) = os.path.split(ref_file)
gtf_file = os.path.join(ref_dir, os.pardir, "rnaseq", "ref-transcripts.gtf")
dir_name = os.path.normpath(os.path.join(ref_dir, os.pardir, "hisat2"))
index_prefix = os.path.join(dir_name, build)
if os.path.exists(os.path.join(index_prefix + ".1.ht2")):
return dir_name
if not os.path.exists(dir_name):
subprocess.check_call('mkdir -p %s' % dir_name, shell=True)
try:
cpu = env.cores
except:
cpu = 1
cmd = "{path_export}hisat2-build -p {cpu} "
exons_file = index_prefix + ".exons"
splicesites_file = index_prefix + ".splicesites"
if os.path.exists(gtf_file):
if not os.path.exists(exons_file):
with open(exons_file, "w") as out_handle:
exons_cmd = ["hisat2_extract_exons.py", gtf_file]
subprocess.check_call(path_export + " ".join(exons_cmd), stdout=out_handle, shell=True)
if not os.path.exists(splicesites_file):
with open(splicesites_file, "w") as out_handle:
splicesites_cmd = ["hisat2_extract_splice_sites.py", gtf_file]
subprocess.check_call(path_export + " ".join(splicesites_cmd), stdout=out_handle, shell=True)
if os.stat(exons_file).st_size > 0 and os.stat(splicesites_file).st_size > 0:
cmd += "--exon {exons_file} --ss {splicesites_file} "
cmd += "{ref_file} {index_prefix} "
if not os.path.exists(os.path.join(index_prefix + ".1.ht2")):
subprocess.check_call(cmd.format(**locals()), shell=True)
return dir_name
def _index_snap(env, ref_file):
"""Snap indexing is computationally expensive. Requests all cores and 64Gb of memory.
"""
dir_name = "snap"
index_name = os.path.splitext(os.path.basename(ref_file))[0]
org_arg = "-hg19" if index_name in ["hg19", "GRCh37"] else ""
cmd = "snap-aligner index {ref_file} {dir_name} -bSpace {org_arg}"
if not os.path.exists(os.path.join(dir_name, "GenomeIndex")):
subprocess.check_call(cmd.format(**locals()), shell=True)
return dir_name
def _get_path_export(env):
"""Ensure PATH points to local install directory.
"""
path_export = ""
if hasattr(env, "system_install") and env.system_install:
local_bin = os.path.join(env.system_install, 'bin')
if os.path.exists(local_bin):
path_export = "export PATH=%s:$PATH && " % local_bin
return path_export
def _index_rtg(env, ref_file):
"""Perform indexing for use with Real Time Genomics tools.
https://github.com/RealTimeGenomics/rtg-tools
"""
path_export = _get_path_export(env)
dir_name = "rtg"
index_name = "%s.sdf" % os.path.splitext(os.path.basename(ref_file))[0]
if not os.path.exists(os.path.join(dir_name, index_name, "done")):
cmd = ("{path_export}export RTG_JAVA_OPTS='-Xms1g' && export RTG_MEM=2g && "
"rtg format -o {dir_name}/{index_name} {ref_file}")
subprocess.check_call(cmd.format(**locals()), shell=True)
return dir_name
@_if_installed("MosaikJump")
def _index_mosaik(env, ref_file):
hash_size = 15
dir_name = "mosaik"
cmd = "MosaikBuild -fr {ref_file} -oa {index_name}"
def create_jumpdb(ref_file):
jmp_base = os.path.splitext(os.path.basename(ref_file))[0]
dat_file = "{0}.dat".format(jmp_base)
if not os.path.exists("{0}_keys.jmp".format(jmp_base)):
cmd = "export MOSAIK_TMP=`pwd` && MosaikJump -hs {hash_size} -ia {ref_file} -out {index_name}".format(
hash_size=hash_size, ref_file=dat_file, index_name=jmp_base)
subprocess.check_call(cmd, shell=True)
return _index_w_command(env, dir_name, cmd, ref_file,
post=create_jumpdb, ext=".dat")
# -- Retrieve using GGD recipes
def _install_with_ggd(env, manager, gid, recipe):
recipe_dir = os.path.normpath(os.path.join(os.path.dirname(__file__),
os.pardir, os.pardir, "ggd-recipes"))
recipe_file = os.path.join(recipe_dir, gid, "%s.yaml" % recipe)
if os.path.exists(recipe_file):
ggd.install_recipe(os.getcwd(), env.system_install, recipe_file, gid)
else:
raise NotImplementedError("GGD recipe not available for %s %s" % (gid, recipe))
# -- Genome upload and download to Amazon s3 buckets
def _download_s3_index(env, manager, gid, idx):
print("Downloading genome from s3: {0} {1}".format(gid, idx))
url = "https://s3.amazonaws.com/biodata/genomes/%s-%s.tar.xz" % (gid, idx)
if gid in ["GRCh37", "hg19", "mm10"] and idx in ["bowtie2", "bwa", "novoalign"]:
out_file = shared._remote_fetch(env, url, samedir=True)
subprocess.check_call("xz -dc %s | tar -xvpf -" % out_file, shell=True)
subprocess.check_call("rm -f %s" % out_file, shell=True)
else:
raise NotImplementedError("No pre-computed indices for %s %s" % (gid, idx))
def _download_genomes(env, genomes, genome_indexes):
"""Download a group of genomes from Amazon s3 bucket.
"""
genome_dir = _make_genome_dir(env.data_files)
for (orgname, gid, manager) in genomes:
org_dir = os.path.join(genome_dir, orgname, gid)
if not os.path.exists(org_dir):
subprocess.check_call('mkdir -p %s' % org_dir, shell=True)
for idx in genome_indexes:
with shared.chdir(org_dir):
if not os.path.exists(idx):
_download_s3_index(env, manager, gid, idx)
ref_file = os.path.join(org_dir, "seq", "%s.fa" % gid)
if not os.path.exists(ref_file):
ref_file = os.path.join(org_dir, "seq", "%s.fa" % manager._name)
assert os.path.exists(ref_file), ref_file
cur_indexes = manager.config.get("indexes", genome_indexes)
_index_to_galaxy(env, org_dir, ref_file, gid, cur_indexes, manager.config)
def _upload_genomes(env, genomes, genome_indexes):
"""Upload our configured genomes to Amazon s3 bucket.
"""
conn = boto.connect_s3()
bucket = conn.create_bucket("biodata")
genome_dir = os.path.join(env.data_files, "genomes")
for (orgname, gid, _) in genomes:
cur_dir = os.path.join(genome_dir, orgname, gid)
_clean_directory(cur_dir, gid)
for idx in genome_indexes:
idx_dir = os.path.join(cur_dir, idx)
tarball = _tar_directory(idx_dir, "%s-%s" % (gid, idx))
_upload_to_s3(tarball, bucket)
bucket.make_public()
def _upload_to_s3(tarball, bucket):
"""Upload the genome tarball to s3.
"""
upload_script = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
"utils", "s3_multipart_upload.py")
s3_key_name = os.path.join("genomes", os.path.basename(tarball))
if not bucket.get_key(s3_key_name):
gb_size = int(subprocess.check_output("du -sm %s" % tarball, shell=True).decode().split()[0]) / 1000.0
print("Uploading %s %.1fGb" % (s3_key_name, gb_size))
cl = ["python", upload_script, tarball, bucket.name, s3_key_name, "--public"]
subprocess.check_call(cl)
def _tar_directory(dir, tar_name):
"""Create a tarball of the directory.
"""
base_dir, tar_dir = os.path.split(dir)
tarball = os.path.join(base_dir, "%s.tar.xz" % tar_name)
if not os.path.exists(tarball):
with shared.chdir(base_dir):
subprocess.check_call("tar -cvpf - %s | xz -zc - > %s" %
(tar_dir, os.path.basename(tarball)), shell=True)
return tarball
def _clean_directory(dir, gid):
"""Clean duplicate files from directories before tar and upload.
"""
# get rid of softlinks
bowtie_ln = os.path.join(dir, "bowtie", "%s.fa" % gid)
maq_ln = os.path.join(dir, "maq", "%s.fa" % gid)
for to_remove in [bowtie_ln, maq_ln]:
if os.path.exists(to_remove):
subprocess.check_call("rm -f %s" % to_remove, shell=True)
# remove any downloaded original sequence files
remove_exts = ["*.gz", "*.zip"]
with shared.chdir(os.path.join(dir, "seq")):
for rext in remove_exts:
fnames = subprocess.check_output("find . -name '%s'" % rext, shell=True).decode()
for fname in (f.strip() for f in fnames.split("\n") if f.strip()):
subprocess.check_call("rm -f %s" % fname, shell=True)
# == Liftover files
def _data_liftover(env, lift_over_genomes):
"""Download chain files for running liftOver.
Does not install liftOver binaries automatically.
"""
lo_dir = os.path.join(env.data_files, "liftOver")
if not os.path.exists(lo_dir):
subprocess.check_call("mkdir %s" % lo_dir, shell=True)
lo_base_url = "ftp://hgdownload.cse.ucsc.edu/goldenPath/%s/liftOver/%s"
lo_base_file = "%sTo%s.over.chain.gz"
for g1 in lift_over_genomes:
for g2 in [g for g in lift_over_genomes if g != g1]:
g2u = g2[0].upper() + g2[1:]
cur_file = lo_base_file % (g1, g2u)
non_zip = os.path.splitext(cur_file)[0]
worked = False
with shared.chdir(lo_dir):
if not os.path.exists(non_zip):
result = shared._remote_fetch(env, "%s" % (lo_base_url % (g1, cur_file)), allow_fail=True)
# Lift over back and forths don't always exist
# Only move forward if we found the file
if result:
worked = True
subprocess.check_call("gunzip %s" % result, shell=True)
if worked:
ref_parts = [g1, g2, os.path.join(lo_dir, non_zip)]
galaxy.update_loc_file(env, "liftOver.loc", ref_parts)
# == UniRef
def _data_uniref(env):
"""Retrieve and index UniRef databases for protein searches.
http://www.ebi.ac.uk/uniref/
These are currently indexed for FASTA searches. Are other indexes desired?
Should this be separated out and organized by program like genome data?
This should also check the release note and automatically download and
replace older versions.
"""
site = "ftp://ftp.uniprot.org"
base_url = site + "/pub/databases/uniprot/" \
"current_release/uniref/%s/%s"
for uniref_db in ["uniref50", "uniref90", "uniref100"]:
work_dir = os.path.join(env.data_files, "uniref", uniref_db)
if not os.path.exists(work_dir):
subprocess.check_call("mkdir -p %s" % work_dir, shell=True)
base_work_url = base_url % (uniref_db, uniref_db)
fasta_url = base_work_url + ".fasta.gz"
base_file = os.path.splitext(os.path.basename(fasta_url))[0]
with shared.chdir(work_dir):
if not os.path.exists(base_file):
out_file = shared._remote_fetch(env, fasta_url)
subprocess.check_call("gunzip %s" % out_file, shell=True)
shared._remote_fetch(env, base_work_url + ".release_note")
_index_blast_db(work_dir, base_file, "prot")
def _index_blast_db(work_dir, base_file, db_type):
"""Index a database using blast+ for similary searching.
"""
type_to_ext = dict(prot=("phr", "pal"), nucl=("nhr", "nal"))
db_name = os.path.splitext(base_file)[0]
with shared.chdir(work_dir):
if not reduce(operator.or_,
(os.path.exists("%s.%s" % (db_name, ext)) for ext in type_to_ext[db_type])):
subprocess.check_call("makeblastdb -in %s -dbtype %s -out %s" %
(base_file, db_type, db_name), shell=True)
def get_index_fn(index):
"""
return the index function for an index, if it is missing return a function
that is a no-op
"""
def noop(env, ref_file):
pass
return INDEX_FNS.get(index, noop)
def prepare_simple_reference(ref_file, out_file):
"""
given an hg38 FASTA file, create a FASTA file with no alts, HLA or decoys
"""
if os.path.exists(out_file):
return out_file
with open(ref_file + ".fai") as in_handle:
chroms = [x.split()[0].strip() for x in in_handle]
chroms = [x for x in chroms if not (is_alt(x) or is_decoy(x) or is_HLA(x))]
cmd = ["samtools", "faidx", ref_file] + chroms
with open(out_file, "w") as out_handle:
subprocess.check_call(cmd, stdout=out_handle)
return out_file
def is_alt(chrom):
return chrom.endswith("_alt")
def is_decoy(chrom):
return chrom.endswith("_decoy")
def is_HLA(chrom):
return chrom.startswith("HLA")
INDEX_FNS = {
"seq": _index_sam,
"bbmap": _index_bbmap,
"bismark": _index_bismark,
"bwa": _index_bwa,
"bowtie": _index_bowtie,
"bowtie2": _index_bowtie2,
"maq": _index_maq,
"mosaik": _index_mosaik,
"minimap2": _index_minimap2,
"novoalign": _index_novoalign,
"novoalign_cs": _index_novoalign_cs,
"ucsc": _index_twobit,
"twobit": _index_twobit,
"star": _index_star,
"snap": _index_snap,
"rtg": _index_rtg,
"hisat2": _index_hisat2
}
================================================
FILE: cloudbio/biodata/ggd.py
================================================
"""Process GGD (Get Genomics Data) configurations for installation in biodata directories.
Builds off work done by Aaron Quinlan to define and install genomic data:
https://github.com/arq5x/ggd
"""
from __future__ import print_function
import collections
import contextlib
from distutils.version import LooseVersion
import os
import shutil
import subprocess
import yaml
def install_recipe(base_dir, system_install, recipe_file, genome_build):
"""Install data in a biodata directory given instructions from GGD YAML recipe.
"""
if not os.path.exists(base_dir):
os.makedirs(base_dir)
recipe = _read_recipe(recipe_file)
if not version_uptodate(base_dir, recipe):
if _has_required_programs(recipe["recipe"]["full"].get("required", [])):
with tx_tmpdir(base_dir) as tmpdir:
with chdir(tmpdir):
print("Running GGD recipe: %s %s %s" % (genome_build, recipe["attributes"]["name"],
recipe["attributes"]["version"]))
_run_recipe(tmpdir, recipe["recipe"]["full"]["recipe_cmds"],
recipe["recipe"]["full"]["recipe_type"], system_install)
_move_files(tmpdir, base_dir, recipe["recipe"]["full"]["recipe_outfiles"])
add_version(base_dir, recipe)
def _has_required_programs(programs):
"""Ensure the provided programs exist somewhere in the current PATH.
http://stackoverflow.com/questions/377017/test-if-executable-exists-in-python
"""
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
for p in programs:
found = False
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, p)
if is_exe(exe_file):
found = True
break
if not found:
return False
return True
def _run_recipe(work_dir, recipe_cmds, recipe_type, system_install):
"""Create a bash script and run the recipe to download data.
"""
assert recipe_type == "bash", "Can only currently run bash recipes"
run_file = os.path.join(work_dir, "ggd-run.sh")
with open(run_file, "w") as out_handle:
out_handle.write("#!/bin/bash\nset -eu -o pipefail\nexport PATH=%s/bin:$PATH\n" % system_install)
out_handle.write("\n".join(recipe_cmds))
subprocess.check_output(["bash", run_file])
def _move_files(tmp_dir, final_dir, targets):
for target in targets:
if os.path.isdir(os.path.join(tmp_dir, target)):
out_files = [os.path.join(target, f) for f in os.listdir(os.path.join(tmp_dir, target))]
else:
out_files = [target]
for out_file in out_files:
orig = os.path.join(tmp_dir, out_file)
final = os.path.join(final_dir, out_file)
assert os.path.exists(orig), ("Did not find expected output file %s in %s" %
(out_file, tmp_dir))
cur_dir = os.path.dirname(final)
if not os.path.exists(cur_dir):
os.makedirs(cur_dir)
if os.path.exists(final):
os.remove(final)
shutil.move(orig, final)
def _read_recipe(in_file):
in_file = os.path.abspath(os.path.expanduser(in_file))
with open(in_file) as in_handle:
recipe = yaml.safe_load(in_handle)
return recipe
# ## Versioning
def version_uptodate(base_dir, recipe):
"""Check if we have an up to date GGD installation in this directory.
"""
versions = _get_versions(base_dir)
return (recipe["attributes"]["name"] in versions and
LooseVersion(versions[recipe["attributes"]["name"]]) >=
LooseVersion(str(recipe["attributes"]["version"])))
def add_version(base_dir, recipe):
versions = _get_versions(base_dir)
versions[recipe["attributes"]["name"]] = recipe["attributes"]["version"]
with open(_get_version_file(base_dir), "w") as out_handle:
for n, v in versions.items():
out_handle.write("%s,%s\n" % (n, v))
def _get_versions(base_dir):
version_file = _get_version_file(base_dir)
versions = collections.OrderedDict()
if os.path.exists(version_file):
with open(version_file) as in_handle:
for line in in_handle:
name, version = line.strip().split(",")
versions[name] = version
return versions
def _get_version_file(base_dir):
return os.path.join(base_dir, "versions.csv")
# ## Transactional utilities
@contextlib.contextmanager
def tx_tmpdir(base_dir):
"""Context manager to create and remove a transactional temporary directory.
"""
tmp_dir = os.path.join(base_dir, "txtmp")
if not os.path.exists(tmp_dir):
os.makedirs(tmp_dir)
yield tmp_dir
shutil.rmtree(tmp_dir, ignore_errors=True)
@contextlib.contextmanager
def chdir(new_dir):
"""Context manager to temporarily change to a new directory.
http://lucentbeing.com/blog/context-managers-and-the-with-statement-in-python/
"""
cur_dir = os.getcwd()
os.chdir(new_dir)
try:
yield
finally:
os.chdir(cur_dir)
================================================
FILE: cloudbio/biodata/rnaseq.py
================================================
"""Infrastructure for RNA-seq supporting files.
"""
import os
import subprocess
from cloudbio.custom import shared
def finalize(genomes, data_filedir):
"""Provide symlinks back to reference genomes so tophat avoids generating FASTA genomes.
"""
genome_dir = os.path.join(data_filedir, "genomes")
for (orgname, gid, manager) in genomes:
org_dir = os.path.join(genome_dir, orgname)
for aligner in ["bowtie", "bowtie2"]:
aligner_dir = os.path.join(org_dir, gid, aligner)
if os.path.exists(aligner_dir):
with shared.chdir(aligner_dir):
for ext in ["", ".fai"]:
orig_seq = os.path.join(os.pardir, "seq", "%s.fa%s" % (gid, ext))
if os.path.exists(orig_seq) and not os.path.exists(os.path.basename(orig_seq)):
subprocess.check_call("ln -sf %s" % orig_seq, shell=True)
def cleanup(genomes, data_filedir):
"""Cleanup for GGD recipe installation, removing old rnaseq symlinks.
"""
folder_name = "rnaseq"
genome_dir = os.path.join(data_filedir, "genomes")
for (orgname, gid, manager) in genomes:
org_dir = os.path.join(genome_dir, orgname)
target_dir = os.path.join(org_dir, gid, folder_name)
if os.path.lexists(target_dir) and os.path.islink(target_dir):
os.remove(target_dir)
================================================
FILE: cloudbio/cloudbiolinux.py
================================================
"""CloudBioLinux specific scripts
"""
import os
from fabric.api import *
from fabric.contrib.files import *
from cloudbio.custom import shared
def _freenx_scripts(env):
"""Provide graphical access to clients via FreeNX.
"""
home_dir = env.safe_run_output("echo $HOME")
setup_script = "setupnx.sh"
bin_dir = shared._get_bin_dir(env)
install_file_dir = os.path.join(env.config_dir, os.pardir, "installed_files")
if not env.safe_exists(os.path.join(bin_dir, setup_script)):
env.safe_put(os.path.join(install_file_dir, setup_script),
os.path.join(home_dir, setup_script))
env.safe_run("chmod 0777 %s" % os.path.join(home_dir, setup_script))
env.safe_sudo("mv %s %s" % (os.path.join(home_dir, setup_script), bin_dir))
remote_login = "configure_freenx.sh"
if not env.safe_exists(os.path.join(home_dir, remote_login)):
env.safe_put(os.path.join(install_file_dir, 'bash_login'), os.path.join(home_dir, remote_login))
env.safe_run("chmod 0777 %s" % os.path.join(home_dir, remote_login))
_configure_gnome(env)
def _cleanup_space(env):
"""Cleanup to recover space from builds and packages.
"""
env.logger.info("Cleaning up space from package builds")
with settings(warn_only=True):
env.safe_sudo("rm -rf .cpanm")
env.safe_sudo("rm -f /var/crash/*")
env.safe_run("rm -f ~/*.dot")
env.safe_run("rm -f ~/*.log")
def _configure_gnome(env):
"""Configure NX server to use classic GNOME.
http://askubuntu.com/questions/50503/why-do-i-get-unity-instead-of-classic-when-using-nx
http://notepad2.blogspot.com/2012/04/install-freenx-server-on-ubuntu-1110.html
"""
add = 'COMMAND_START_GNOME="gnome-session --session gnome-fallback"'
fname = "/etc/nxserver/node.conf"
if env.safe_exists("/etc/nxserver/"):
env.safe_append(fname, add, use_sudo=True)
================================================
FILE: cloudbio/cloudman.py
================================================
"""Build instructions associated with CloudMan.
http://wiki.g2.bx.psu.edu/Admin/Cloud
Adapted from Enis Afgan's code: https://bitbucket.org/afgane/mi-deployment
"""
cm_upstart = """
description "Start CloudMan contextualization script"
start on runlevel [2345]
task
exec python %s 2> %s.log
"""
import os
from fabric.api import sudo, cd, run, put
from fabric.contrib.files import exists, settings
from cloudbio.galaxy import _setup_users
from cloudbio.flavor.config import get_config_file
from cloudbio.package.shared import _yaml_to_packages
from cloudbio.custom.shared import (_make_tmp_dir, _write_to_file, _get_install,
_configure_make, _if_not_installed,
_setup_conf_file, _add_to_profiles,
_create_python_virtualenv,
_setup_simple_service,
_read_boolean)
from cloudbio.package.deb import (_apt_packages, _setup_apt_automation)
MI_REPO_ROOT_URL = "https://bitbucket.org/afgane/mi-deployment/raw/tip"
CM_REPO_ROOT_URL = "https://bitbucket.org/galaxy/cloudman/raw/tip"
def _configure_cloudman(env, use_repo_autorun=False):
"""
Configure the machine to be capable of running CloudMan.
..Also see: ``custom/cloudman.py``
"""
env.logger.debug("Configuring CloudMan")
_setup_users(env)
_setup_env(env)
_configure_logrotate(env)
_configure_ec2_autorun(env, use_repo_autorun)
_configure_sge(env)
_configure_hadoop(env)
_configure_nfs(env)
_configure_novnc(env)
_configure_desktop(env)
install_s3fs(env)
def _configure_desktop(env):
"""
Configure a desktop manager to work with VNC. Note that `xfce4` (or `jwm`)
and `vnc4server` packages need to be installed for this to have effect.
"""
if not _read_boolean(env, "configure_desktop", False):
return
# Set nginx PAM module to allow logins for any system user
if env.safe_exists("/etc/pam.d"):
env.safe_sudo('echo "@include common-auth" > /etc/pam.d/nginx')
env.safe_sudo('usermod -a -G shadow galaxy')
# Create a start script for X
_setup_conf_file(env, "/home/ubuntu/.vnc/xstartup", "xstartup", default_source="xstartup")
# Create jwmrc config file (uncomment this if using jwm window manager)
# _setup_conf_file(env, "/home/ubuntu/.jwmrc", "jwmrc.xml",
# default_source="jwmrc.xml", mode="0644")
env.logger.info("----- Done configuring desktop -----")
def _configure_novnc(env):
if not _read_boolean(env, "configure_novnc", False):
# Longer term would like this enabled by default. -John
return
if not "novnc_install_dir" in env:
env.novnc_install_dir = "/opt/novnc"
if not "vnc_password" in env:
env.vnc_password = "cl0udbi0l1nux"
if not "vnc_user" in env:
env.vnc_user = env.user
if not "vnc_display" in env:
env.vnc_display = "1"
if not "vnc_depth" in env:
env.vnc_depth = "16"
if not "vnc_geometry" in env:
env.vnc_geometry = "1024x768"
_configure_vncpasswd(env)
novnc_dir = env.novnc_install_dir
env.safe_sudo("mkdir -p '%s'" % novnc_dir)
env.safe_sudo("chown %s '%s'" % (env.user, novnc_dir))
clone_cmd = "NOVNC_DIR='%s'; rm -rf $NOVNC_DIR; git clone https://github.com/kanaka/noVNC.git $NOVNC_DIR" % novnc_dir
run(clone_cmd)
## Move vnc_auto.html which takes vnc_password as query argument
## to index.html and rewrite it so that password is autoset, no
## need to specify via query parameter.
run("sed s/password\\ =\\ /password\\ =\\ \\\'%s\\\'\\;\\\\\\\\/\\\\\\\\// '%s/vnc_auto.html' > '%s/index.html'" % (env.vnc_password, novnc_dir, novnc_dir))
_setup_conf_file(env, "/etc/init.d/novnc", "novnc_init", default_source="novnc_init")
_setup_conf_file(env, "/etc/default/novnc", "novnc_default", default_source="novnc_default.template")
_setup_conf_file(env, "/etc/init.d/vncserver", "vncserver_init", default_source="vncserver_init")
_setup_conf_file(env, "/etc/default/vncserver", "vncserver_default", default_source="vncserver_default.template")
_setup_simple_service("novnc")
_setup_simple_service("vncserver")
def _configure_vncpasswd(env):
with cd("~"):
run("mkdir -p ~/.vnc")
run("rm -rf vncpasswd")
run("git clone https://github.com/trinitronx/vncpasswd.py vncpasswd")
run("python vncpasswd/vncpasswd.py '%s' -f ~/.vnc/passwd" % env.vnc_password)
run("chmod 600 ~/.vnc/passwd")
run("rm -rf vncpasswd")
def _setup_env(env):
"""
Setup the system environment required to run CloudMan. This means
installing required system-level packages (as defined in CBL's
``packages.yaml``, or a flavor thereof) and Python dependencies
(i.e., libraries) as defined in CloudMan's ``requirements.txt`` file.
"""
# Get and install required system packages
if env.distribution in ["debian", "ubuntu"]:
config_file = get_config_file(env, "packages.yaml")
(packages, _) = _yaml_to_packages(config_file.base, 'cloudman')
# Allow flavors to modify the package list
packages = env.flavor.rewrite_config_items("packages", packages)
_setup_apt_automation()
_apt_packages(pkg_list=packages)
elif env.distribution in ["centos", "scientificlinux"]:
env.logger.warn("No CloudMan system package dependencies for CentOS")
pass
# Get and install required Python libraries
with _make_tmp_dir() as work_dir:
with cd(work_dir):
url = os.path.join(CM_REPO_ROOT_URL, 'requirements.txt')
_create_python_virtualenv(env, 'CM', reqs_url=url)
# Add a custom vimrc
vimrc_url = os.path.join(MI_REPO_ROOT_URL, 'conf_files', 'vimrc')
remote_file = '/etc/vim/vimrc'
if env.safe_exists("/etc/vim"):
env.safe_sudo("wget --output-document=%s %s" % (remote_file, vimrc_url))
env.logger.debug("Added a custom vimrc to {0}".format(remote_file))
# Setup profile
aliases = ['alias lt="ls -ltr"', 'alias ll="ls -l"']
for alias in aliases:
_add_to_profiles(alias, ['/etc/bash.bashrc'])
env.logger.info("Done setting up CloudMan's environment")
def _configure_logrotate(env):
"""
Add logrotate config file, which will automatically rotate CloudMan's log
"""
conf_file = "cloudman.logrotate"
remote = '/etc/logrotate.d/cloudman'
url = os.path.join(MI_REPO_ROOT_URL, 'conf_files', conf_file)
env.safe_sudo("wget --output-document=%s %s" % (remote, url))
env.logger.info("----- Added logrotate file to {0} -----".format(remote))
def _configure_ec2_autorun(env, use_repo_autorun=False):
"""
ec2autorun.py is a script that launches CloudMan on instance boot
and is thus required on an instance. See the script itself for the
details of what it does.
This script also adds a cloudman service to ``/etc/init``, which
actually runs ec2autorun.py as a system-level service at system boot.
"""
script = "ec2autorun.py"
remote = os.path.join(env.install_dir, "bin", script)
if not env.safe_exists(os.path.dirname(remote)):
env.safe_sudo('mkdir -p {0}'.format(os.path.dirname(remote)))
if use_repo_autorun:
# Is this used, can we eliminate use_repo_autorun?
url = os.path.join(MI_REPO_ROOT_URL, script)
env.safe_sudo("wget --output-document=%s %s" % (remote, url))
else:
install_file_dir = os.path.join(env.config_dir, os.pardir, "installed_files")
tmp_remote = os.path.join("/tmp", os.path.basename(remote))
env.safe_put(os.path.join(install_file_dir, script), tmp_remote)
env.safe_sudo("mv %s %s" % (tmp_remote, remote))
env.safe_sudo("chmod 0777 %s" % remote)
# Create upstart configuration file for boot-time script
cloudman_boot_file = 'cloudman.conf'
remote_file = '/etc/init/%s' % cloudman_boot_file
_write_to_file(cm_upstart % (remote, os.path.splitext(remote)[0]), remote_file, mode="0644")
# Setup default image user data (if configured by image_user_data_path or
# image_user_data_template_path). This specifies defaults for CloudMan when
# used with resulting image, normal userdata supplied by user will override
# these defaults.
image_user_data_path = os.path.join(env.install_dir, "bin", "IMAGE_USER_DATA")
if "image_user_data_dict" in env:
# Explicit YAML contents defined in env, just dump them as is.
import yaml
_write_to_file(yaml.dump(env.get("image_user_data_dict")), image_user_data_path, mode="0644")
else:
# Else use file or template file.
_setup_conf_file(env, image_user_data_path, "image_user_data", default_source="image_user_data")
env.logger.info("Done configuring CloudMan's ec2_autorun")
def _configure_sge(env):
"""
This method sets up the environment for SGE w/o
actually setting up SGE; it basically makes sure system paths expected
by CloudMan exist on the system.
TODO: Merge this with ``install_sge`` method in ``custom/cloudman.py``.
"""
sge_root = '/opt/sge'
if not env.safe_exists(sge_root):
env.safe_sudo("mkdir -p %s" % sge_root)
env.safe_sudo("chown sgeadmin:sgeadmin %s" % sge_root)
# Link our installed SGE to CloudMan's expected directory
sge_package_dir = "/opt/galaxy/pkg"
sge_dir = "ge6.2u5"
if not env.safe_exists(os.path.join(sge_package_dir, sge_dir)):
env.safe_sudo("mkdir -p %s" % sge_package_dir)
if not env.safe_exists(os.path.join(sge_package_dir, sge_dir)):
env.safe_sudo("ln --force -s %s/%s %s/%s" % (env.install_dir, sge_dir, sge_package_dir, sge_dir))
env.logger.info("Done configuring SGE for CloudMan")
def _configure_hadoop(env):
"""
Grab files required by CloudMan to setup a Hadoop cluster atop SGE.
"""
hadoop_root = '/opt/hadoop'
url_root = 'https://s3.amazonaws.com/cloudman'
hcm_file = 'hadoop.1.0.4__1.0.tar.gz'
si_file = 'sge_integration.1.0.tar.gz'
# Make sure we're working with a clean hadoop_home dir to avoid any version conflicts
env.safe_sudo("rm -rf {0}".format(hadoop_root))
env.safe_sudo("mkdir -p %s" % hadoop_root)
with cd(hadoop_root):
env.safe_sudo("wget --output-document={0} {1}/{0}".format(hcm_file, url_root))
env.safe_sudo("wget --output-document={0} {1}/{0}".format(si_file, url_root))
env.safe_sudo("chown -R {0} {1}".format(env.user, hadoop_root))
env.logger.info("Done configuring Hadoop for CloudMan")
def _configure_nfs(env):
"""
Edit ``/etc/exports`` to append paths that are shared over NFS by CloudMan.
In addition to the hard coded paths listed here, additional paths
can be included by setting ``extra_nfs_exports`` in ``fabricrc.txt`` as
a comma-separated list of directories.
"""
nfs_dir = "/export/data"
cloudman_dir = "/mnt/galaxy/export"
if not env.safe_exists(nfs_dir):
# For the case of rerunning this script, ensure the nfs_dir does
# not exist (exists() method does not recognize it as a file because
# by default it points to a non-existing dir/file).
with settings(warn_only=True):
env.safe_sudo('rm -rf {0}'.format(nfs_dir))
env.safe_sudo("mkdir -p %s" % os.path.dirname(nfs_dir))
env.safe_sudo("ln -s %s %s" % (cloudman_dir, nfs_dir))
env.safe_sudo("chown -R %s %s" % (env.user, os.path.dirname(nfs_dir)))
# Setup /etc/exports paths, to be used as NFS mount points
# galaxy_data_mount = env.get("galaxy_data_mount", "/mnt/galaxyData")
# galaxy_indices_mount = env.get("galaxy_indices_mount", "/mnt/galaxyIndices")
# galaxy_tools_mount = env.get("galaxy_tools_mount", "/mnt/galaxyTools")
exports = ['/opt/sge *(rw,sync,no_root_squash,no_subtree_check)',
'/opt/hadoop *(rw,sync,no_root_squash,no_subtree_check)',
# '%s *(rw,sync,no_root_squash,subtree_check,no_wdelay)' % galaxy_data_mount,
# '%s *(rw,sync,no_root_squash,no_subtree_check)' % galaxy_indices_mount,
# '%s *(rw,sync,no_root_squash,no_subtree_check)' % galaxy_tools_mount,
# '%s *(rw,sync,no_root_squash,no_subtree_check)' % nfs_dir,
# '%s/openmpi *(rw,sync,no_root_squash,no_subtree_check)' % env.install_dir
]
extra_nfs_exports = env.get("extra_nfs_exports", "")
if extra_nfs_exports:
for extra_nfs_export in extra_nfs_exports.split(","):
exports.append('%s *(rw,sync,no_root_squash,no_subtree_check)' % extra_nfs_export)
env.safe_append('/etc/exports', exports, use_sudo=True)
# Create a symlink for backward compatibility where all of CloudMan's
# stuff is expected to be in /opt/galaxy
old_dir = '/opt/galaxy'
# Because stow is used, the equivalent to CloudMan's expected path
# is actually the parent of the install_dir so use it for the symlink
new_dir = os.path.dirname(env.install_dir)
if not env.safe_exists(old_dir) and exists(new_dir):
env.safe_sudo('ln -s {0} {1}'.format(new_dir, old_dir))
env.logger.info("Done configuring NFS for CloudMan")
@_if_not_installed("s3fs")
def install_s3fs(env):
"""
Install s3fs, allowing S3 buckets to be mounted as ~POSIX file systems
"""
default_version = "1.61"
version = env.get("tool_version", default_version)
url = "http://s3fs.googlecode.com/files/s3fs-%s.tar.gz" % version
_get_install(url, env, _configure_make)
def _cleanup_ec2(env):
"""
Clean up any extra files after building. This method must be called
on an instance after being built and before creating a new machine
image. *Note* that after this method has run, key-based ssh access
to the machine is no longer possible.
"""
env.logger.info("Cleaning up for EC2 AMI creation")
# Clean up log files and such
fnames = [".bash_history", "/var/log/firstboot.done", ".nx_setup_done",
"/var/crash/*", "%s/ec2autorun.py.log" % env.install_dir,
"%s/ec2autorun.err" % env.install_dir, "%s/ec2autorun.log" % env.install_dir,
"%s/bin/ec2autorun.log" % env.install_dir]
for fname in fnames:
sudo("rm -f %s" % fname)
rmdirs = ["/mnt/galaxyData", "/mnt/cm", "/tmp/cm"]
for rmdir in rmdirs:
sudo("rm -rf %s" % rmdir)
# Seed the history with frequently used commands
env.logger.debug("Setting bash history")
local = os.path.join(env.config_dir, os.pardir, "installed_files", "bash_history")
remote = os.path.join('/home', 'ubuntu', '.bash_history')
put(local, remote, mode="0660", use_sudo=True)
# Make sure the default config dir is owned by ubuntu
sudo("chown ubuntu:ubuntu ~/.config")
# Stop Apache from starting automatically at boot (it conflicts with Galaxy's nginx)
sudo('/usr/sbin/update-rc.d -f apache2 remove')
with settings(warn_only=True):
# RabbitMQ fails to start if its database is embedded into the image
# because it saves the current IP address or host name so delete it now.
# When starting up, RabbitMQ will recreate that directory.
sudo('/etc/init.d/rabbitmq-server stop')
sudo('service rabbitmq-server stop')
# Clean up packages that are causing issues or are unnecessary
pkgs_to_remove = ['tntnet', 'tntnet-runtime', 'libtntnet9', 'vsftpd']
for ptr in pkgs_to_remove:
sudo('apt-get -y --force-yes remove --purge {0}'.format(ptr))
sudo('initctl reload-configuration')
for db_location in ['/var/lib/rabbitmq/mnesia', '/mnesia']:
if exists(db_location):
sudo('rm -rf %s' % db_location)
# remove existing ssh host key pairs
# http://docs.amazonwebservices.com/AWSEC2/latest/UserGuide/AESDG-chapter-sharingamis.html
sudo("rm -f /etc/ssh/ssh_host_*")
sudo("rm -f ~/.ssh/authorized_keys*")
sudo("rm -f /root/.ssh/authorized_keys*")
================================================
FILE: cloudbio/config_management/__init__.py
================================================
""" This module contains code related to integrating the configuration
management tools `chef` and `puppet` into CloudBioLinux. """
================================================
FILE: cloudbio/config_management/chef.py
================================================
import os
import json
from fabric.api import cd
from fabric.contrib import files
from fabric.state import _AttributeDict
from cloudbio.flavor.config import get_config_file
from utils import build_properties, upload_config, config_dir
# Code based heavily on fabric-provision. https://github.com/caffeinehit/fabric-provision
DEFAULTS = dict(
path='/var/chef',
data_bags=config_dir(os.path.join('chef', 'data_bags')),
roles=config_dir(os.path.join('chef', 'roles')),
cookbooks=config_dir(os.path.join('chef', 'cookbooks')),
log_level='info',
recipes=[],
run_list=[],
json={},
)
SOLO_RB = """
log_level :%(log_level)s
log_location STDOUT
file_cache_path "%(path)s"
data_bag_path "%(path)s/data_bags"
role_path [ "%(path)s/roles" ]
cookbook_path [ "%(path)s/cookbooks" ]
Chef::Log::Formatter.show_time = true
"""
class ChefDict(_AttributeDict):
def add_recipe(self, recipe):
self.run_list.append('recipe[{0}]'.format(recipe))
def add_role(self, role):
self.run_list.append('role[{0}]'.format(role))
def _get_json(self):
the_json = self['json'].copy()
the_json['run_list'] = self['run_list']
return the_json
json = property(fget=_get_json)
chef = ChefDict(DEFAULTS)
def omnibus(env):
"""
Install Chef from Opscode's Omnibus installer
"""
ctx = {
'filename': '%(path)s/install-chef.sh' % chef,
'url': 'http://opscode.com/chef/install.sh',
}
if not files.exists(ctx['filename']):
env.safe_sudo('wget -O %(filename)s %(url)s' % ctx)
with cd(chef.path):
env.safe_sudo('bash install-chef.sh')
def _chef_provision(env, _omnibus=True):
env.safe_sudo('mkdir -p %(path)s' % chef)
omnibus(env)
config_files = {'node.json': json.dumps(chef.json),
'solo.rb': SOLO_RB % chef}
upload_config(chef, config_folder_names=['cookbooks', 'data_bags', 'roles'], config_files=config_files)
with cd(chef.path):
env.safe_sudo('chef-solo -c solo.rb -j node.json')
def _configure_chef(env, chef):
# Set node json properties
node_json_path = get_config_file(env, "node_extra.json").base
chef.json = _build_chef_properties(env, node_json_path)
# Set whether to use the Opscode Omnibus Installer to load Chef.
use_omnibus_installer_str = env.get("use_chef_omnibus_installer", "false")
chef.use_omnibus_installer = use_omnibus_installer_str.upper() in ["TRUE", "YES"]
def _build_chef_properties(env, config_file):
"""
Build python object representation of the Chef-solo node.json file from
node_extra.json in config dir and the fabric environment.
"""
json_properties = _parse_json(config_file)
return build_properties(env, "chef", json_properties)
def _parse_json(filename):
""" Parse a JSON file
First remove comments and then use the json module package
Comments look like :
// ...
"""
with open(filename) as f:
lines = f.readlines()
content = ''.join([line for line in lines if not line.startswith('//')])
return json.loads(content)
================================================
FILE: cloudbio/config_management/puppet.py
================================================
from fabric.state import _AttributeDict
from fabric.api import cd
from utils import upload_config, config_dir, build_properties
from cloudbio.package.deb import _apt_packages
import os
DEFAULTS = dict(
path='/var/puppet',
log_level='info',
modules=config_dir(os.path.join('puppet', 'modules'))
)
puppet = _AttributeDict(DEFAULTS)
def _puppet_provision(env, classes):
env.safe_sudo('mkdir -p %(path)s' % puppet)
manifest_body = "node default {\n%s\n}\n" % _build_node_def_body(env, classes)
config_files = {"manifest.pp": manifest_body}
upload_config(puppet, config_folder_names=["modules"], config_files=config_files)
# TODO: Allow yum based install
_apt_packages(pkg_list=["puppet"])
with cd(puppet.path):
env.safe_sudo("sudo puppet apply --modulepath=modules manifest.pp")
def _build_node_def_body(env, classes):
contents = ""
properties = build_properties(env, "puppet")
contents += "\n".join(["$%s = '%s'" % (key, value.replace("'", r"\'")) for key, value in properties.iteritems()])
contents += "\n"
contents += "\n".join([_build_class_include(env, class_name) for class_name in classes])
return contents
def _build_class_include(env, class_name):
"""
If parentns::classname is included and fabric
properties such as puppet_parentns__classname_prop = val1
are set, the class included in puppet will be something like
class { 'parentns::classname':
prop => 'val1',
}
"""
include_def = "class { '%s': \n" % class_name
property_prefix = _property_prefix(class_name)
for name, value in env.iteritems():
if name.startswith(property_prefix):
property_name = name[len(property_prefix):]
if not property_name.startswith("_"): # else subclass property
include_def += " %s => '%s',\n" % (property_name, value)
include_def += "\n}"
return include_def
def _property_prefix(class_name):
return "puppet_%s_" % class_name.replace("::", "__")
================================================
FILE: cloudbio/config_management/utils.py
================================================
from tempfile import mkdtemp
import os
from fabric.api import settings, local, put, sudo, cd
from fabric.contrib import files
def config_dir(relative_path):
cloudbiolinux_dir = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
return os.path.join(cloudbiolinux_dir, "config", relative_path)
def build_properties(env, prefix, overrides={}):
# Prefix will be either chef or puppet
prefix = "%s_" % prefix
# Clone fresh dictonary to modify
overrides = dict(overrides)
# Load fabric environment properties into properties.
for key, value in env.iteritems():
# Skip invalid properties.
if key in overrides or not isinstance(value, str):
continue
if key.startswith(prefix):
# If a property starts with chef_ assume it is meant for chef and
# add without this prefix. So chef_apache_dir would be available
# as apache_dir.
overrides[key[len(prefix):]] = value
else:
# Otherwise, allow chef to access property anyway but prefix with
# cloudbiolinux_ so it doesn't clash with anything explicitly
# configured for chef.
overrides["cloudbiolinux_%s" % key] = value
return overrides
def upload_config(config, config_folder_names=[], config_files={}):
""" Common code to upload puppet and chef config files
to remote server.
Heavily based on upload procedure from fabric-provision:
https://github.com/caffeinehit/fabric-provision/blob/master/provision/__init__.py
"""
names = config_folder_names + config_files.keys()
ctx = dict(map(lambda name: (name, '%s/%s' % (config.path, name)), names))
tmpfolder = mkdtemp()
listify = lambda what: what if isinstance(what, list) else [what]
for folder_name in config_folder_names:
setattr(config, folder_name, listify(getattr(config, folder_name)))
for folder_name in config_folder_names:
local('mkdir %s/%s' % (tmpfolder, folder_name))
def copyfolder(folder, what):
if not os.path.exists(folder):
os.makedirs(folder)
with settings(warn_only=True):
local('cp -r %(folder)s/* %(tmpfolder)s/%(what)s' % dict(
folder=folder,
tmpfolder=tmpfolder,
what=what))
for what in config_folder_names:
map(lambda f: copyfolder(f, what), getattr(config, what))
folder_paths = " ".join(map(lambda folder_name: "./%s" % folder_name, config_folder_names))
local('cd %s && tar -f config_dir.tgz -cz %s' % (tmpfolder, folder_paths))
# Get rid of old files
with settings(warn_only=True):
map(lambda what: sudo("rm -rf '%s'" % ctx[what]), ctx.keys())
# Upload
put('%s/config_dir.tgz' % tmpfolder, config.path, use_sudo=True)
with cd(config.path):
sudo('tar -xf config_dir.tgz')
for file, contents in config_files.iteritems():
files.append(ctx[file], contents, use_sudo=True)
================================================
FILE: cloudbio/custom/__init__.py
================================================
"""Fabric sub-modules providing custom installation for non-packaged programs.
"""
================================================
FILE: cloudbio/custom/bio_general.py
================================================
"""Custom installs for biological packages.
"""
import os
from fabric.api import *
from fabric.contrib.files import *
from cloudbio.custom import shared
from shared import (_if_not_installed, _get_install, _configure_make, _java_install,
_make_tmp_dir)
def install_anaconda(env):
"""Pre-packaged Anaconda Python installed from Continuum.
http://docs.continuum.io/anaconda/index.html
"""
version = "2.0.0"
outdir = os.path.join(env.system_install, "anaconda")
if env.distribution in ["ubuntu", "centos", "scientificlinux", "debian", "arch", "suse"]:
platform = "Linux"
elif env.distribution in ["macosx"]:
platform = "MacOSX"
else:
raise ValueError("Unexpected distribution: %s" % env.distribution)
url = "http://09c8d0b2229f813c1b93-c95ac804525aac4b6dba79b00b39d1d3.r79.cf1.rackcdn.com/" \
"Anaconda-%s-%s-x86_64.sh" % (version, platform)
if not env.safe_exists(outdir):
with _make_tmp_dir() as work_dir:
with cd(work_dir):
installer = shared._remote_fetch(env, url)
env.safe_sed(os.path.basename(url), "more <<EOF", "cat <<EOF")
env.safe_sudo("echo -e '\nyes\n%s\nyes\n' | bash %s" % (outdir, installer))
env.safe_sudo("chown -R %s %s" % (env.user, outdir))
comment_line = "# added by Ananconda %s installer" % version
if not env.safe_contains(env.shell_config, comment_line):
env.safe_append(env.shell_config, comment_line)
env.safe_append(env.shell_config, "export PATH=%s/bin:$PATH" % outdir)
# remove curl library with broken certificates
env.safe_run("%s/bin/conda remove --yes curl" % outdir)
env.safe_run("%s/bin/conda install --yes pip" % outdir)
@_if_not_installed("embossversion")
def install_emboss(env):
"""EMBOSS: A high-quality package of free, Open Source software for molecular biology.
http://emboss.sourceforge.net/
Emboss target for platforms without packages (CentOS -- rpm systems).
"""
default_version = "6.6.0"
version = env.get("tool_version", default_version)
url = "https://science-annex.org/pub/emboss/EMBOSS-%s.tar.gz" % version
#url = "ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-%s.tar.gz" % version
_get_install(url, env, _configure_make)
def install_pgdspider(env):
"""PGDSpider format conversion for population genetics programs.
http://www.cmpg.unibe.ch/software/PGDSpider/
"""
if os.path.exists(os.path.join(shared._get_bin_dir(env), "PGDSpider2.sh")):
return
version = "2.0.2.0"
url = "http://www.cmpg.unibe.ch/software/PGDSpider/PGDSpider_{v}.zip".format(
v=version)
def _install_fn(env, install_dir):
env.safe_sudo("mv *.jar %s" % install_dir)
bin_dir = shared._get_bin_dir(env)
exe_file = "PGDSpider2.sh"
jar = "PGDSpider2.jar"
env.safe_sed(exe_file, jar, "{dir}/{jar}".format(dir=install_dir, jar=jar))
env.safe_run("chmod a+x {0}".format(exe_file))
env.safe_sudo("mv {exe} {bin}".format(exe=exe_file, bin=bin_dir))
_java_install("PGDSpider", version, url, env, install_fn=_install_fn)
def install_bio4j(env):
"""Bio4j graph based database built on Neo4j with UniProt, GO, RefSeq and more.
http://www.bio4j.com/
"""
version = "0.8"
url = "https://s3-eu-west-1.amazonaws.com/bio4j-public/releases/" \
"{v}/bio4j-{v}.zip".format(v=version)
def _install_fn(env, install_dir):
targets = ["conf", "doc", "jars", "lib", "README"]
for x in targets:
env.safe_sudo("mv {0} {1}".format(x, install_dir))
_java_install("bio4j", version, url, env, install_fn=_install_fn)
================================================
FILE: cloudbio/custom/bio_nextgen.py
================================================
"""Install next gen sequencing analysis tools not currently packaged.
"""
from __future__ import print_function
import os
import re
from fabric.api import *
from fabric.contrib.files import *
import yaml
from shared import (_if_not_installed, _make_tmp_dir,
_get_install, _get_install_local, _make_copy, _configure_make,
_java_install, _python_cmd,
_symlinked_java_version_dir, _fetch_and_unpack, _python_make,
_get_lib_dir, _get_include_dir, _apply_patch)
from cloudbio.custom import shared, versioncheck
from cloudbio import libraries
from cloudbio.flavor.config import get_config_file
@_if_not_installed(["twoBitToFa", "gtfToGenePred"])
def install_ucsc_tools(env):
"""Useful executables from UCSC.
todo: install from source to handle 32bit and get more programs
http://hgdownload.cse.ucsc.edu/admin/jksrc.zip
"""
tools = ["liftOver", "faToTwoBit", "bedToBigBed",
"bigBedInfo", "bigBedSummary", "bigBedToBed",
"bedGraphToBigWig", "bigWigInfo", "bigWigSummary",
"bigWigToBedGraph", "bigWigToWig",
"fetchChromSizes", "wigToBigWig", "faSize", "twoBitInfo",
"twoBitToFa", "faCount", "gtfToGenePred"]
url = "http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/"
_download_executables(env, url, tools)
@_if_not_installed("blat")
def install_kent_tools(env):
"""
Please note that the Blat source and executables are freely available for
academic, nonprofit and personal use. Commercial licensing information is
available on the Kent Informatics website (http://www.kentinformatics.com/).
"""
tools = ["blat", "gfClient", "gfServer"]
url = "http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat/"
_download_executables(env, url, tools)
def _download_executables(env, base_url, tools):
install_dir = shared._get_bin_dir(env)
with _make_tmp_dir() as work_dir:
with cd(work_dir):
for tool in tools:
final_tool = os.path.join(install_dir, tool)
if not env.safe_exists(final_tool) and shared._executable_not_on_path(tool):
shared._remote_fetch(env, "%s%s" % (base_url, tool))
env.safe_sudo("cp -f %s %s" % (tool, install_dir))
final_path = os.path.join(install_dir, tool)
env.safe_sudo("chmod uga+rx %s" % final_path)
# --- Alignment tools
def install_featurecounts(env):
"""
featureCounts from the subread package for counting reads mapping to
genomic features
"""
default_version = "1.4.4"
version = env.get("tool_version", default_version)
if versioncheck.up_to_date(env, "featureCounts", version, stdout_flag="Version"):
return
platform = "MacOS" if env.distribution == "macosx" else "Linux"
url = ("http://downloads.sourceforge.net/project/subread/"
"subread-%s/subread-%s-%s-x86_64.tar.gz"
% (version, version, platform))
_get_install(url, env, _make_copy("find . -type f -perm -100 -name 'featureCounts'",
do_make=False))
@_if_not_installed("bowtie")
def install_bowtie(env):
"""The bowtie short read aligner.
http://bowtie-bio.sourceforge.net/index.shtml
"""
default_version = "1.0.0"
version = env.get("tool_version", default_version)
url = "http://downloads.sourceforge.net/project/bowtie-bio/bowtie/%s/" \
"bowtie-%s-src.zip" % (version, version)
_get_install(url, env, _make_copy("find . -perm -100 -name 'bowtie*'"))
@_if_not_installed("bowtie2")
def install_bowtie2(env):
"""bowtie2 short read aligner, with gap support.
http://bowtie-bio.sourceforge.net/bowtie2/index.shtml
"""
default_version = "2.1.0"
version = env.get("tool_version", default_version)
url = "http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/%s/" \
"bowtie2-%s-source.zip" % (version, version)
_get_install(url, env, _make_copy("find . -perm -100 -name 'bowtie2*'"))
@_if_not_installed("bfast")
def install_bfast(env):
"""BFAST: Blat-like Fast Accurate Search Tool.
http://sourceforge.net/apps/mediawiki/bfast/index.php?title=Main_Page
"""
default_version = "0.7.0a"
version = env.get("tool_version", default_version)
major_version_regex = "\d+\.\d+\.\d+"
major_version = re.search(major_version_regex, version).group(0)
url = "http://downloads.sourceforge.net/project/bfast/bfast/%s/bfast-%s.tar.gz"\
% (major_version, version)
_get_install(url, env, _configure_make)
@_if_not_installed("perm")
def install_perm(env):
"""Efficient mapping of short sequences accomplished with periodic full sensitive spaced seeds.
https://code.google.com/p/perm/
"""
default_version = "4"
version = env.get("tool_version", default_version)
url = "http://perm.googlecode.com/files/PerM%sSource.tar.gz" % version
def gcc44_makefile_patch():
gcc_cmd = "g++44"
with settings(hide('warnings', 'running', 'stdout', 'stderr'),
warn_only=True):
result = env.safe_run("%s -v" % gcc_cmd)
print(result.return_code)
if result.return_code == 0:
env.safe_sed("makefile", "g\+\+", gcc_cmd)
_get_install(url, env, _make_copy("ls -1 perm", gcc44_makefile_patch))
@_if_not_installed("snap")
def install_snap(env):
"""Scalable Nucleotide Alignment Program
http://snap.cs.berkeley.edu/
"""
version = "0.15"
url = "http://github.com/downloads/amplab/snap/" \
"snap-%s-linux.tar.gz" % version
_get_install(url, env, _make_copy("find . -perm -100 -type f", do_make=False))
def install_stampy(env):
"""Stampy: mapping of short reads from illumina sequencing machines onto a reference genome.
http://www.well.ox.ac.uk/project-stampy
"""
version = "1.0.21"
#version = base_version
#revision = "1654"
#version = "{0}r{1}".format(base_version, revision)
#url = "http://www.well.ox.ac.uk/bioinformatics/Software/" \
# "stampy-%s.tgz" % (version)
# Ugh -- Stampy now uses a 'Stampy-latest' download target
url = "http://www.well.ox.ac.uk/bioinformatics/Software/" \
"Stampy-latest.tgz"
def _clean_makefile(env):
env.safe_sed("makefile", " -Wl", "")
_get_install_local(url, env, _make_copy(),
dir_name="stampy-{0}".format(version),
post_unpack_fn=_clean_makefile)
@_if_not_installed("gmap")
def install_gmap(env):
"""GMAP and GSNAP: A Genomic Mapping and Alignment Program for mRNA EST and short reads.
http://research-pub.gene.com/gmap/
"""
version = "2012-11-09"
url = "http://research-pub.gene.com/gmap/src/gmap-gsnap-%s.tar.gz" % version
_get_install(url, env, _configure_make)
def _wget_with_cookies(ref_url, dl_url):
env.safe_run("wget --cookies=on --keep-session-cookies --save-cookies=cookie.txt %s"
% (ref_url))
env.safe_run("wget --referer=%s --cookies=on --load-cookies=cookie.txt "
"--keep-session-cookies --save-cookies=cookie.txt %s" %
(ref_url, dl_url))
@_if_not_installed("novoalign")
def install_novoalign(env):
"""Novoalign short read aligner using Needleman-Wunsch algorithm with affine gap penalties.
http://www.novocraft.com/main/index.php
"""
base_version = "V3.00.02"
cs_version = "V1.03.02"
_url = "http://www.novocraft.com/downloads/%s/" % base_version
ref_url = "http://www.novocraft.com/main/downloadpage.php"
base_url = "%s/novocraft%s.gcc.tar.gz" % (_url, base_version)
cs_url = "%s/novoalignCS%s.gcc.tar.gz" % (_url, cs_version)
install_dir = shared._get_bin_dir(env)
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_wget_with_cookies(ref_url, base_url)
env.safe_run("tar -xzvpf novocraft%s.gcc.tar.gz" % base_version)
with cd("novocraft"):
for fname in ["isnovoindex", "novo2maq", "novo2paf",
"novo2sam.pl", "novoalign", "novobarcode",
"novoindex", "novope2bed.pl", "novorun.pl",
"novoutil"]:
env.safe_sudo("mv %s %s" % (fname, install_dir))
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_wget_with_cookies(ref_url, cs_url)
env.safe_run("tar -xzvpf novoalignCS%s.gcc.tar.gz" % cs_version)
with cd("novoalignCS"):
for fname in ["novoalignCS"]:
env.safe_sudo("mv %s %s" % (fname, install_dir))
@_if_not_installed("novosort")
def install_novosort(env):
"""Multithreaded sort and merge for BAM files.
http://www.novocraft.com/wiki/tiki-index.php?page=Novosort
"""
base_version = "V3.00.02"
version = "V1.00.02"
url = "http://www.novocraft.com/downloads/%s/novosort%s.gcc.tar.gz" % (base_version, version)
ref_url = "http://www.novocraft.com/main/downloadpage.php"
install_dir = shared._get_bin_dir(env)
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_wget_with_cookies(ref_url, url)
env.safe_run("tar -xzvpf novosort%s.gcc.tar.gz" % version)
with cd("novosort"):
for fname in ["novosort"]:
env.safe_sudo("mv %s %s" % (fname, install_dir))
@_if_not_installed("lastz")
def install_lastz(env):
"""LASTZ sequence alignment program.
http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02.00a.html
"""
default_version = "1.02.00"
version = env.get("tool_version", default_version)
url = "http://www.bx.psu.edu/miller_lab/dist/" \
"lastz-%s.tar.gz" % version
def _remove_werror(env):
env.safe_sed("src/Makefile", " -Werror", "")
_get_install(url, env, _make_copy("find . -perm -100 -name 'lastz'"),
post_unpack_fn=_remove_werror)
@_if_not_installed("MosaikAligner")
def install_mosaik(env):
"""MOSAIK: reference-guided aligner for next-generation sequencing technologies
http://code.google.com/p/mosaik-aligner/
"""
version = "2.2.3"
url = "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/mosaik-aligner/" \
"MOSAIK-%s-Linux-x64.tar" % version
_get_install(url, env, _make_copy("find . -perm -100 -type f", do_make=False))
# --- Utilities
def install_samtools(env):
"""SAM Tools provide various utilities for manipulating alignments in the SAM format.
http://samtools.sourceforge.net/
"""
default_version = "0.1.19"
version = env.get("tool_version", default_version)
if versioncheck.up_to_date(env, "samtools", version, stdout_flag="Version:"):
env.logger.info("samtools version {0} is up to date; not installing"
.format(version))
return
url = "http://downloads.sourceforge.net/project/samtools/samtools/" \
"%s/samtools-%s.tar.bz2" % (version, version)
def _safe_ncurses_make(env):
"""Combine samtools, removing ncurses refs if not present on system.
"""
with settings(warn_only=True):
result = env.safe_run("make")
# no ncurses, fix Makefile and rebuild
if result.failed:
env.safe_sed("Makefile", "-D_CURSES_LIB=1", "-D_CURSES_LIB=0")
env.safe_sed("Makefile", "-lcurses", "# -lcurses")
env.safe_run("make clean")
env.safe_run("make")
install_dir = shared._get_bin_dir(env)
for fname in env.safe_run_output("ls -1 samtools bcftools/bcftools bcftools/vcfutils.pl misc/wgsim").split("\n"):
env.safe_sudo("cp -f %s %s" % (fname.rstrip("\r"), install_dir))
_get_install(url, env, _safe_ncurses_make)
@_if_not_installed("vtools")
def install_varianttools(env):
"""Annotation, selection, and analysis of variants in the context of next-gen sequencing analysis.
http://varianttools.sourceforge.net/
"""
version = "1.0.6"
url = "http://downloads.sourceforge.net/project/varianttools/" \
"{ver}/variant_tools-{ver}-src.tar.gz".format(ver=version)
_get_install(url, env, _python_make)
@_if_not_installed("dwgsim")
def install_dwgsim(env):
"""DWGSIM: simulating NGS data and evaluating mappings and variant calling.
http://sourceforge.net/apps/mediawiki/dnaa/index.php?title=Main_Page
"""
version = "0.1.10"
samtools_version = "0.1.18"
url = "http://downloads.sourceforge.net/project/dnaa/dwgsim/" \
"dwgsim-{0}.tar.gz".format(version)
samtools_url = "http://downloads.sourceforge.net/project/samtools/samtools/" \
"{ver}/samtools-{ver}.tar.bz2".format(ver=samtools_version)
def _get_samtools(env):
shared._remote_fetch(env, samtools_url)
env.safe_run("tar jxf samtools-{0}.tar.bz2".format(samtools_version))
env.safe_run("ln -s samtools-{0} samtools".format(samtools_version))
_get_install(url, env, _make_copy("ls -1 dwgsim dwgsim_eval scripts/dwgsim_pileup_eval.pl"),
post_unpack_fn=_get_samtools)
@_if_not_installed("fastq_screen")
def install_fastq_screen(env):
"""A screening application for high througput sequence data.
http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/
"""
version = "0.4"
url = "http://www.bioinformatics.babraham.ac.uk/projects/fastq_screen/" \
"fastq_screen_v%s.tar.gz" % version
install_dir = shared._symlinked_shared_dir("fastqc_screen", version, env)
executable = "fastq_screen"
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
out_file = shared._remote_fetch(env, url)
env.safe_run("tar -xzvpf %s" % out_file)
with cd("fastq_screen_v%s" % version):
env.safe_sudo("mv * %s" % install_dir)
env.safe_sudo("ln -s %s/%s %s/bin/%s" % (install_dir, executable,
env.system_install, executable))
def install_bedtools(env):
"""A flexible suite of utilities for comparing genomic features.
https://code.google.com/p/bedtools/
"""
version = "2.17.0"
if versioncheck.up_to_date(env, "bedtools --version", version, stdout_flag="bedtools"):
return
url = "https://bedtools.googlecode.com/files/" \
"BEDTools.v%s.tar.gz" % version
_get_install(url, env, _make_copy("ls -1 bin/*"))
_shrec_run = """
#!/usr/bin/perl
use warnings;
use strict;
use FindBin qw($RealBin);
use Getopt::Long;
my @java_args;
my @args;
foreach (@ARGV) {
if (/^\-X/) {push @java_args,$_;}
else {push @args,$_;}}
system("java -cp $RealBin @java_args Shrec @args");
"""
@_if_not_installed("shrec")
def install_shrec(env):
"""Shrec is a bioinformatics tool for error correction of HTS read data.
http://sourceforge.net/projects/shrec-ec/
"""
version = "2.2"
url = "http://downloads.sourceforge.net/project/shrec-ec/SHREC%%20%s/bin.zip" % version
install_dir = _symlinked_java_version_dir("shrec", version, env)
if install_dir:
shrec_script = "%s/shrec" % install_dir
with _make_tmp_dir() as work_dir:
with cd(work_dir):
out_file = shared._remote_fetch(env, url)
env.safe_run("unzip %s" % out_file)
env.safe_sudo("mv *.class %s" % install_dir)
for line in _shrec_run.split("\n"):
if line.strip():
env.safe_append(shrec_script, line, use_sudo=env.use_sudo)
env.safe_sudo("chmod a+rwx %s" % shrec_script)
env.safe_sudo("ln -s %s %s/bin/shrec" % (shrec_script, env.system_install))
def install_echo(env):
"""ECHO: A reference-free short-read error correction algorithm
http://uc-echo.sourceforge.net/
"""
version = "1_12"
url = "http://downloads.sourceforge.net/project/uc-echo/source%20release/" \
"echo_v{0}.tgz".format(version)
_get_install_local(url, env, _make_copy())
# -- Analysis
def install_picard(env):
"""Command-line utilities that manipulate BAM files with a Java API.
http://picard.sourceforge.net/
"""
version = "1.96"
url = "http://downloads.sourceforge.net/project/picard/" \
"picard-tools/%s/picard-tools-%s.zip" % (version, version)
_java_install("picard", version, url, env)
def install_alientrimmer(env):
"""
Adapter removal tool
http://www.ncbi.nlm.nih.gov/pubmed/23912058
"""
version = "0.3.2"
url = ("ftp://ftp.pasteur.fr/pub/gensoft/projects/AlienTrimmer/"
"AlienTrimmer_%s.tar.gz" % version)
_java_install("AlienTrimmer", version, url, env)
def install_rnaseqc(env):
"""Quality control metrics for RNA-seq data
https://www.broadinstitute.org/cancer/cga/rna-seqc
"""
version = "1.1.7"
url = ("https://github.com/chapmanb/RNA-SeQC/releases/download/"
"v%s/RNA-SeQC_v%s.jar" % (version, version))
install_dir = _symlinked_java_version_dir("RNA-SeQC", version, env)
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
out_file = shared._remote_fetch(env, url)
env.safe_sudo("mv %s %s" % (out_file, install_dir))
def install_varscan(env):
"""Variant detection in massively parallel sequencing data
http://varscan.sourceforge.net/
"""
version = "2.3.7"
url = "http://downloads.sourceforge.net/project/varscan/VarScan.v%s.jar" % version
install_dir = _symlinked_java_version_dir("varscan", version, env)
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
out_file = shared._remote_fetch(env, url)
env.safe_sudo("mv %s %s" % (out_file, install_dir))
def install_mutect(env):
version = "1.1.5"
url = "https://github.com/broadinstitute/mutect/releases/download/" \
"%s/muTect-%s-bin.zip" % (version, version)
install_dir = _symlinked_java_version_dir("mutect", version, env)
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
out_file = shared._remote_fetch(env, url)
env.safe_run("unzip %s" % out_file)
env.safe_sudo("mv *.jar version.txt LICENSE* %s" % install_dir)
@_if_not_installed("bam")
def install_bamutil(env):
"""Utilities for working with BAM files, from U of M Center for Statistical Genetics.
http://genome.sph.umich.edu/wiki/BamUtil
"""
version = "1.0.7"
url = "http://genome.sph.umich.edu/w/images/5/5d/BamUtilLibStatGen.%s.tgz" % version
_get_install(url, env, _make_copy("ls -1 bamUtil/bin/bam"),
dir_name="bamUtil_%s" % version)
@_if_not_installed("tabix")
def install_tabix(env):
"""Generic indexer for TAB-delimited genome position files
http://samtools.sourceforge.net/tabix.shtml
"""
version = "0.2.6"
url = "http://downloads.sourceforge.net/project/samtools/tabix/tabix-%s.tar.bz2" % version
_get_install(url, env, _make_copy("ls -1 tabix bgzip"))
@_if_not_installed("disambiguate.py")
def install_disambiguate(env):
"""a tool for disambiguating reads aligning to multiple genomes
https://github.com:mjafin/disambiguate
"""
repository = "git clone https://github.com/mjafin/disambiguate.git"
_get_install(repository, env, _python_make)
def install_grabix(env):
"""a wee tool for random access into BGZF files
https://github.com/arq5x/grabix
"""
version = "0.1.6"
revision = "ba792bc872d38d3cb5a69b2de00e39a6ac367d69"
try:
uptodate = versioncheck.up_to_date(env, "grabix", version, stdout_flag="version:")
# Old versions will not have any version information
except IOError:
uptodate = False
if uptodate:
return
repository = "git clone https://github.com/arq5x/grabix.git"
_get_install(repository, env, _make_copy("ls -1 grabix"),
revision=revision)
@_if_not_installed("pbgzip")
def install_pbgzip(env):
"""Parallel blocked bgzip -- compatible with bgzip but with thread support.
https://github.com/nh13/samtools/tree/master/pbgzip
"""
repository = "git clone https://github.com/chapmanb/samtools.git"
revision = "2cce3ffa97"
def _build(env):
with cd("pbgzip"):
env.safe_run("make")
install_dir = shared._get_bin_dir(env)
env.safe_sudo("cp -f pbgzip %s" % (install_dir))
_get_install(repository, env, _build, revision=revision)
@_if_not_installed("bamtools")
def install_bamtools(env):
"""command-line toolkit for working with BAM data
https://github.com/pezmaster31/bamtools
"""
version = "3fe66b9"
repository = "git clone --recursive https://github.com/pezmaster31/bamtools.git"
def _cmake_bamtools(env):
env.safe_run("mkdir build")
with cd("build"):
env.safe_run("cmake ..")
env.safe_run("make")
env.safe_sudo("cp bin/* %s" % shared._get_bin_dir(env))
env.safe_sudo("cp lib/* %s" % shared._get_lib_dir(env))
_get_install(repository, env, _cmake_bamtools,
revision=version)
@_if_not_installed("ogap")
def install_ogap(env):
"""gap opening realigner for BAM data streams
https://github.com/ekg/ogap
"""
version = "652c525"
repository = "git clone --recursive https://github.com/ekg/ogap.git"
_get_install(repository, env, _make_copy("ls ogap"),
revision=version)
def install_tophat(env):
"""TopHat is a fast splice junction mapper for RNA-Seq reads
http://ccb.jhu.edu/software/tophat/index.shtml
"""
default_version = "2.0.9"
version = env.get("tool_version", default_version)
if versioncheck.is_version(env, "tophat", version, args="--version", stdout_flag="TopHat"):
env.logger.info("tophat version {0} is up to date; not installing"
.format(version))
return
platform = "OSX" if env.distribution == "macosx" else "Linux"
url = "http://ccb.jhu.edu/software/tophat/downloads/" \
"tophat-%s.%s_x86_64.tar.gz" % (version, platform)
_get_install(url, env,
_make_copy("find . -perm -100 -type f", do_make=False))
install_tophat2 = install_tophat
# --- Assembly
@_if_not_installed("ABYSS")
def install_abyss(env):
"""Assembly By Short Sequences - a de novo, parallel, paired-end sequence assembler.
http://www.bcgsc.ca/platform/bioinfo/software/abyss
"""
# XXX check for no sparehash on non-ubuntu systems
default_version = "2.0.2"
version = env.get("tool_version", default_version)
url = "http://www.bcgsc.ca/platform/bioinfo/software/abyss/releases/%s/abyss-%s.tar.gz" % (version, version)
def _remove_werror_get_boost(env):
env.safe_sed("configure", " -Werror", "")
# http://osdir.com/ml/abyss-users-science/2011-10/msg00108.html
url = "http://downloads.sourceforge.net/project/boost/boost/1.63.0/boost_1_63_0.tar.bz2"
dl_file = shared._remote_fetch(env, url)
env.safe_run("tar jxf %s" % dl_file)
env.safe_run("ln -s boost_1_63_0/boost boost")
_get_install(url, env, _configure_make, post_unpack_fn=_remove_werror_get_boost)
def install_transabyss(env):
"""Analyze ABySS multi-k-assembled shotgun transcriptome data.
http://www.bcgsc.ca/platform/bioinfo/software/trans-abyss
"""
version = "1.4.4"
url = "http://www.bcgsc.ca/platform/bioinfo/software/trans-abyss/" \
"releases/%s/trans-ABySS-v%s.tar.gz" % (version, version)
_get_install_local(url, env, _make_copy(do_make=False))
@_if_not_installed("velvetg")
def install_velvet(env):
"""Sequence assembler for very short reads.
http://www.ebi.ac.uk/~zerbino/velvet/
"""
default_version = "1.2.10"
version = env.get("tool_version", default_version)
url = "http://www.ebi.ac.uk/~zerbino/velvet/velvet_%s.tgz" % version
def _fix_library_order(env):
"""Fix library order problem in recent gcc versions
http://biostar.stackexchange.com/questions/13713/
error-installing-velvet-assembler-1-1-06-on-ubuntu-server
"""
env.safe_sed("Makefile", "Z_LIB_FILES=-lz", "Z_LIB_FILES=-lz -lm")
_get_install(url, env, _make_copy("find . -perm -100 -name 'velvet*'"),
post_unpack_fn=_fix_library_order)
@_if_not_installed("Ray")
def install_ray(env):
"""Ray -- Parallel genome assemblies for parallel DNA sequencing
http://denovoassembler.sourceforge.net/
"""
default_version = "2.2.0"
version = env.get("tool_version", default_version)
url = "http://downloads.sourceforge.net/project/denovoassembler/Ray-v%s.tar.bz2" % version
def _ray_do_nothing(env):
return
_get_install(url, env, _make_copy("find . -name Ray"),
post_unpack_fn=_ray_do_nothing)
def install_trinity(env):
"""Efficient and robust de novo reconstruction of transcriptomes from RNA-seq data.
http://trinityrnaseq.github.io/
"""
version = "2.3.2"
url = "https://github.com/trinityrnaseq/trinityrnaseq/archive/" \
"Trinity-v%s.tar.gz" % version
dir_name = "trinityrnaseq-%s" % version
_get_install_local(url, env, _make_copy(),
dir_name=dir_name)
def install_cortex_var(env):
"""De novo genome assembly and variation analysis from sequence data.
http://cortexassembler.sourceforge.net/index_cortex_var.html
"""
version = "1.0.5.21"
url = "http://downloads.sourceforge.net/project/cortexassembler/cortex_var/" \
"latest/CORTEX_release_v{0}.tgz".format(version)
def _cortex_build(env):
env.safe_sed("Makefile", "\-L/full/path/\S*",
"-L{0}/lib -L/usr/lib -L/usr/local/lib".format(env.system_install))
env.safe_sed("Makefile", "^IDIR_GSL =.*$",
"IDIR_GSL={0}/include -I/usr/include -I/usr/local/include".format(env.system_install))
env.safe_sed("Makefile", "^IDIR_GSL_ALSO =.*$",
"IDIR_GSL_ALSO={0}/include/gsl -I/usr/include/gsl -I/usr/local/include/gsl".format(
env.system_install))
with cd("libs/gsl-1.15"):
env.safe_run("make clean")
with cd("libs/htslib"):
env.safe_run("make clean")
env.safe_run("make")
for cols in ["1", "2", "3", "4", "5"]:
for kmer in ["31", "63", "95"]:
env.safe_run("make MAXK={0} NUM_COLS={1} cortex_var".format(kmer, cols))
with cd("scripts/analyse_variants/needleman_wunsch"):
env.safe_sed("Makefile", "string_buffer.c", "string_buffer.c -lz")
# Fix incompatibilities with gzfile struct in zlib 1.2.6+
for fix_gz in ["libs/string_buffer/string_buffer.c", "libs/bioinf/bioinf.c",
"libs/string_buffer/string_buffer.h", "libs/bioinf/bioinf.h"]:
env.safe_sed(fix_gz, "gzFile \*", "gzFile ")
env.safe_sed(fix_gz, "gzFile\*", "gzFile")
env.safe_run("make")
_get_install_local(url, env, _cortex_build)
def install_bcbio_variation(env):
"""Toolkit to analyze genomic variation data with comparison and ensemble approaches.
https://github.com/chapmanb/bcbio.variation
"""
version = "0.2.6"
url = "https://github.com/chapmanb/bcbio.variation/releases/download/" \
"v%s/bcbio.variation-%s-standalone.jar" % (version, version)
install_dir = _symlinked_java_version_dir("bcbio_variation", version, env)
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
jar_file = shared._remote_fetch(env, url)
env.safe_sudo("mv %s %s" % (jar_file, install_dir))
# --- ChIP-seq
@_if_not_installed("macs14")
def install_macs(env):
"""Model-based Analysis for ChIP-Seq.
http://liulab.dfci.harvard.edu/MACS/
"""
default_version = "1.4.2"
version = env.get("tool_version", default_version)
url = "https://github.com/downloads/taoliu/MACS/" \
"MACS-%s.tar.gz" % version
_get_install(url, env, _python_make)
# --- Structural variation
@_if_not_installed("hydra")
def install_hydra(env):
"""Hydra detects structural variation breakpoints in both unique and duplicated genomic regions.
https://code.google.com/p/hydra-sv/
"""
version = "0.5.3"
url = "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/hydra-sv/Hydra.v{0}.tar.gz".format(version)
def clean_libs(env):
env.safe_run("make clean")
_get_install(url, env, _make_copy("ls -1 bin/* scripts/*"),
post_unpack_fn=clean_libs)
def install_freec(env):
"""Control-FREEC: a tool for detection of copy number changes and allelic imbalances.
http://bioinfo-out.curie.fr/projects/freec/
"""
version = "6.4"
if env.distribution in ["ubuntu", "debian"]:
if env.is_64bit:
url = "http://bioinfo-out.curie.fr/projects/freec/src/FREEC_Linux64.tar.gz"
else:
url = "http://bioinfo-out.curie.fr/projects/freec/src/FREEC_LINUX32.tar.gz"
if not versioncheck.up_to_date(env, "freec", version, stdout_index=1):
_get_install(url, env, _make_copy("find . -name 'freec'"), dir_name=".")
@_if_not_installed("CRISP.py")
def install_crisp(env):
"""Detect SNPs and short indels from pooled sequencing data.
https://sites.google.com/site/vibansal/software/crisp/
"""
version = "5"
url = "https://sites.google.com/site/vibansal/software/crisp/" \
"CRISP-linux-v{0}.tar.gz".format(version)
def _make_executable():
env.safe_run("chmod a+x *.py")
_get_install(url, env, _make_copy("ls -1 CRISP.py crisp_to_vcf.py",
premake_cmd=_make_executable,
do_make=False))
@_if_not_installed("run_pipeline.pl")
def install_tassel(env):
"""TASSEL: evaluate traits associations, evolutionary patterns, and linkage disequilibrium.
http://www.maizegenetics.net/index.php?option=com_content&task=view&id=89&/Itemid=119
"""
version = "5"
build_id = "1140d3fceb75"
url = "https://bitbucket.org/tasseladmin/tassel-{0}-standalone/get/{1}.zip".format(version, build_id)
executables = ["start_tassel.pl", "run_pipeline.pl"]
install_dir = _symlinked_java_version_dir("tassel", version, env)
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
dl_file = shared._remote_fetch(env, url)
env.safe_run("unzip %s" % dl_file)
with cd("tasseladmin-tassel-{0}-standalone-{1}".format(version, build_id)):
for x in executables:
env.safe_sed(x, "^my \$top.*;",
"use FindBin qw($RealBin); my $top = $RealBin;")
env.safe_sudo("chmod a+rwx %s" % x)
env.safe_sudo("mv * %s" % install_dir)
for x in executables:
env.safe_sudo("ln -s %s/%s %s/bin/%s" % (install_dir, x,
env.system_install, x))
@_if_not_installed("ustacks")
def install_stacks(env):
"""Stacks: build loci out of a set of short-read sequenced samples.
http://creskolab.uoregon.edu/stacks/
"""
version = "0.9999"
url = "http://creskolab.uoregon.edu/stacks/source/" \
"stacks-{0}.tar.gz".format(version)
_get_install(url, env, _configure_make)
@_if_not_installed("seqlogo")
def install_weblogo(env):
"""Weblogo
http://weblogo.berkeley.edu/
"""
version = "2.8.2"
url = "http://weblogo.berkeley.edu/release/weblogo.%s.tar.gz" % version
_get_install(url, env, _make_copy("find . -perm -100 -type f", do_make=False))
def _cp_pm(env):
for perl_module in ["template.pm", "logo.pm", "template.eps"]:
env.safe_sudo("cp %s %s/lib/perl5" % (perl_module, env.system_install))
_get_install(url, env, _cp_pm(env))
================================================
FILE: cloudbio/custom/bio_proteomics.py
================================================
"""Install proteomics tools not currently packaged.
"""
import os
import re
from fabric.api import cd
from fabric.context_managers import prefix
from shared import (_if_not_installed, _make_tmp_dir,
_get_install, _make_copy,
_java_install, _symlinked_java_version_dir,
_get_bin_dir, _get_install_subdir,
_fetch_and_unpack,
_create_python_virtualenv,
_get_bitbucket_download_url,
_write_to_file)
from cloudbio.galaxy.utils import _chown_galaxy
# Tools from Tabb lab are only available via TeamCity builds that
# and the artifacts eventually are deleted (I think), storing versions
# for CloudBioLinux at getgalaxyp.msi.umn.edu for safe keeping.
PROTEOMICS_APP_ARCHIVE_URL = "http://getgalaxyp.msi.umn.edu/downloads"
# TODO: Define TPP install root
@_if_not_installed("xinteract")
def install_transproteomic_pipeline(env):
"""
"""
## version should be of form X.X.X-codename
default_version = "4.6.1-occupy"
version = env.get("tool_version", default_version)
version_parts = re.match("(\d\.\d)\.(\d)-(.*)", version)
major_version = version_parts.group(1)
revision = version_parts.group(2)
codename = version_parts.group(3)
if revision == "0":
download_rev = ""
else:
download_rev = ".%s" % revision
download_version = ("%s%s" % (major_version, download_rev))
url_pieces = (major_version, codename, revision, download_version)
url = 'http://sourceforge.net/projects/sashimi/files/Trans-Proteomic Pipeline (TPP)/TPP v%s (%s) rev %s/TPP-%s.tgz' % url_pieces
def _chdir_src(work_cmd):
def do_work(env):
src_dir = "trans_proteomic_pipeline/src" if version == "4.6.1-occupy" else "src"
with cd(src_dir):
env.safe_append("Makefile.config.incl", "TPP_ROOT=%s/" % env["system_install"])
env.safe_append("Makefile.config.incl", "TPP_WEB=/tpp/")
env.safe_append("Makefile.config.incl", "XSLT_PROC=/usr/bin/xsltproc")
env.safe_append("Makefile.config.incl", "CGI_USERS_DIR=${TPP_ROOT}cgi-bin")
work_cmd(env)
return do_work
def _make(env):
env.safe_run("make")
env.safe_sudo("make install")
_get_install(url, env, _chdir_src(_make))
@_if_not_installed("omssacl")
def install_omssa(env):
default_version = "2.1.9"
version = env.get("tool_version", default_version)
url = 'ftp://ftp.ncbi.nih.gov/pub/lewisg/omssa/%s/omssa-%s.linux.tar.gz' % (version, version)
env.safe_sudo("mkdir -p '%s'" % env["system_install"])
## OMSSA really wants mods.xml, usermods.xml, etc... in the same directory
## so just copying everything there.
_get_install(url, env, _make_copy(find_cmd="ls -1", do_make=False))
@_if_not_installed("OpenMSInfo")
def install_openms(env):
"""
See comments above, working on getting this to compile from source. In
the meantime installing from deb will have to do.
"""
default_version = "1.10.0"
version = env.get("tool_version", default_version)
dot_version = version[0:version.rindex('.')]
url = 'http://downloads.sourceforge.net/project/open-ms/OpenMS/OpenMS-%s/OpenMS-%s.tar.gz' % (dot_version, version)
def _make(env):
with cd("contrib"):
env.safe_run("cmake -DINSTALL_PREFIX=%s ." % env.get('system_install'))
env.safe_run("make")
env.safe_run("cmake -DINSTALL_PREFIX=%s ." % env.get('system_install'))
env.safe_run("make")
env.safe_sudo("make install")
_get_install(url, env, _make)
@_if_not_installed("LTQ-iQuant")
def install_tint_proteomics_scripts(env):
default_version = "1.19.19"
version = env.get("tool_version", default_version)
url = "http://artifactory.msi.umn.edu/simple/ext-release-local/msi/umn/edu/tint-proteomics-scripts/%s/tint-proteomics-scripts-%s.zip" % (version, version)
def install_fn(env, install_dir):
env.safe_sudo("mv * '%s'" % install_dir)
bin_dir = _get_bin_dir(env)
for script in ["ITraqScanSummarizer", "LTQ-iQuant", "LTQ-iQuant-cli", "MgfFormatter"]:
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, script), bin_dir))
env.safe_sudo("chmod +x '%s'/*" % bin_dir)
_java_install("tint-proteomics-scripts", version, url, env, install_fn)
@_if_not_installed("ms2preproc")
def install_ms2preproc(env):
default_version = "2009"
version = env.get("tool_version", default_version)
get_cmd = 'wget "http://software.steenlab.org/ms2preproc/ms2preproc.zip" -O ms2preproc.zip'
with _make_tmp_dir() as work_dir:
with cd(work_dir):
env.safe_run(get_cmd)
env.safe_run("unzip ms2preproc.zip")
with cd("ms2preproc"):
env.safe_run("mv ms2preproc-r2821-x86_64 ms2preproc-x86_64")
env.safe_run("chmod +x ms2preproc-x86_64")
install_dir = _get_bin_dir(env)
env.safe_sudo("mv ms2preproc-x86_64 '%s'/ms2preproc" % install_dir)
@_if_not_installed("MZmine")
def install_mzmine(env):
default_version = "2.10"
version = env.get("tool_version", default_version)
url = "http://downloads.sourceforge.net/project/mzmine/mzmine2/%s/MZmine-%s.zip" % (version, version)
def install_fn(env, install_dir):
## Enhanced MZmine startup script that works when used a symbolic link and tailored for CloudBioLinux.
_get_gist_script(env, "https://gist.github.com/jmchilton/5474421/raw/15f3b817fa82d5f5e2143ee08bd248efee951d6a/MZmine")
# Hack for multi-user environment.
env.safe_sudo("chmod -R o+w conf")
env.safe_sudo("mv * '%s'" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "MZmine"), os.path.join(bin_dir, "MZmine")))
_java_install("mzmine2", version, url, env, install_fn)
@_if_not_installed("SearchGUI")
def install_searchgui(env):
default_version = "1.13.1"
version = env.get("tool_version", default_version)
url = "http://searchgui.googlecode.com/files/SearchGUI-%s_mac_and_linux.zip" % version
def install_fn(env, install_dir):
dir_name = "SearchGUI-%s_mac_and_linux" % version
env.safe_sudo("tar -xf %s.tar" % dir_name)
with cd(dir_name):
_get_gist_script(env, "https://gist.github.com/jmchilton/5002161/raw/dc9fa36dd0e6eddcdf43cd2b659e4ecee5ad29df/SearchGUI")
_get_gist_script(env, "https://gist.github.com/jmchilton/5002161/raw/b97fb4d9fe9927de1cfc5433dd1702252e9c0348/SearchCLI")
# Fix known bug with SearchGUI version 1.12.2
env.safe_sudo("find -iname \"*.exe\" -exec rename s/.exe// {} \;")
# Hack for multi-user environment.
env.safe_sudo("chmod -R o+w resources")
env.safe_sudo("mv * '%s'" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "SearchGUI"), os.path.join(bin_dir, "SearchGUI")))
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "SearchCLI"), os.path.join(bin_dir, "SearchCLI")))
_unzip_install("SearchGUI", version, url, env, install_fn)
@_if_not_installed("psm_eval")
def install_psm_eval(env):
default_version = "0.1.0"
version = env.get("tool_version", default_version)
url = "git clone https://github.com/jmchilton/psm-eval.git"
def install_fn(env, install_dir):
env.safe_sudo("cp -r psm-eval/* '%s'" % install_dir)
_create_python_virtualenv(env, "psme", "%s/requirements.txt" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "psm_eval"), os.path.join(bin_dir, "psm_eval")))
_unzip_install("psm_eval", version, url, env, install_fn)
@_if_not_installed("PeptideShaker")
def install_peptide_shaker(env):
default_version = "0.20.1"
version = env.get("tool_version", default_version)
url = "http://peptide-shaker.googlecode.com/files/PeptideShaker-%s.zip" % version
def install_fn(env, install_dir):
_get_gist_script(env, "https://gist.github.com/jmchilton/5002161/raw/f1fe76d6e6eed99a768ed0b9f41c2d0a6a4b24b7/PeptideShaker")
_get_gist_script(env, "https://gist.github.com/jmchilton/5002161/raw/8a17d5fb589984365284e55a98a455c2b47da54f/PeptideShakerCLI")
# Hack for multi-user environment.
env.safe_sudo("chmod -R o+w resources")
env.safe_sudo("mv * '%s'" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "PeptideShaker"), os.path.join(bin_dir, "PeptideShaker")))
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "PeptideShakerCLI"), os.path.join(bin_dir, "PeptideShakerCLI")))
_java_install("PeptideShaker", version, url, env, install_fn)
def _get_gist_script(env, url):
name = url.split("/")[-1]
env.safe_sudo("wget '%s'" % url)
env.safe_sudo("chmod +x '%s'" % name)
@_if_not_installed("Mayu")
def install_mayu(env):
default_version = "1.06"
version = env.get("tool_version", default_version)
url = "http://proteomics.ethz.ch/muellelu/web/LukasReiter/Mayu/package/Mayu.zip"
def install_fn(env, install_dir):
share_dir = _get_install_subdir(env, "share")
env.safe_sudo("mv Mayu '%s'" % share_dir)
bin_dir = _get_bin_dir(env)
executable = "%s/Mayu" % bin_dir
env.safe_sudo("""echo '#!/bin/bash\ncd %s/Mayu; perl Mayu.pl \"$@\"' > %s """ % (share_dir, executable))
env.safe_sudo("chmod +x '%s'" % executable)
_unzip_install("mayu", version, url, env, install_fn)
def install_pride_inspector(env):
default_version = "1.3.0"
version = env.get("tool_version", default_version)
url = "http://pride-toolsuite.googlecode.com/files/pride-inspector-%s.zip" % version
def install_fn(env, install_dir):
_get_gist_script(env, "https://gist.github.com/jmchilton/5474788/raw/6bcffd8680ec0e0301af44961184529a1f76dd3b/pride-inspector")
# Hack for multi-user environment.
env.safe_sudo("chmod -R o+w log config")
env.safe_sudo("mv * '%s'" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "pride-inspector"), os.path.join(bin_dir, "pride-inspector")))
_unzip_install("pride_inspector", version, url, env, install_fn, "PRIDE_Inspector")
def install_pride_converter2(env):
default_version = "2.0.17"
version = env.get("tool_version", default_version)
url = "http://pride-converter-2.googlecode.com/files/pride-converter-%s-bin.zip" % version
def install_fn(env, install_dir):
_get_gist_script(env, "https://gist.github.com/jmchilton/5475119/raw/4e9135ada5114ba149f3ebc8965aee242bfc776f/pride-converter")
# Hack for multi-user environment.
env.safe_sudo("mkdir log; chmod o+w log")
env.safe_sudo("mv * '%s'" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' %s" % (os.path.join(install_dir, "pride-converter"), os.path.join(bin_dir, "pride-converter")))
_unzip_install("pride_converter2", version, url, env, install_fn, ".")
def _unzip_install(pname, version, url, env, install_fn, dir_name="."):
install_dir = _symlinked_java_version_dir(pname, version, env)
if install_dir:
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_fetch_and_unpack(url, need_dir=False)
with cd(dir_name):
install_fn(env, install_dir)
@_if_not_installed("SuperHirnv03")
def install_superhirn(env):
default_version = "0.03"
version = env.get("tool_version", default_version)
url = "https://github.com/jmchilton/SuperHirn/zipball/%s/SuperHirn.zip" % version
def _chdir(work_cmd):
def do_work(env):
with cd("SuperHirnv03/make"):
work_cmd(env)
return do_work
_get_install(url, env, _chdir(_make_copy(find_cmd="find -perm -100 -name 'SuperHirn*'")))
@_if_not_installed("percolator")
def install_percolator(env):
default_version = "2_04"
version = env.get("tool_version", default_version)
url = "https://github.com/downloads/percolator/percolator/percolator_%s_full_src.tar.gz" % version
def make(env):
with cd(".."):
env.safe_run("env")
env.safe_run("cmake -DCMAKE_INSTALL_PREFIX='%s' . " % env.system_install)
env.safe_run("make -j8")
env.safe_sudo("make install")
_get_install(url, env, make)
@_if_not_installed("PepNovo")
def install_pepnovo(env):
default_version = "20120423"
version = env.get("tool_version", default_version)
url = "http://proteomics.ucsd.edu/Downloads/PepNovo.%s.zip" % version
def install_fn(env, install_dir):
with cd("src"):
env.safe_run("make")
env.safe_sudo("mkdir -p '%s/bin'" % env.system_install)
env.safe_sudo("mkdir -p '%s/share/pepnovo'" % env.system_install)
env.safe_sudo("mv PepNovo_bin '%s/bin/PepNovo'" % env.system_install)
env.safe_sudo("cp -r '../Models' '%s/share/pepnovo'" % env.system_install)
_unzip_install("pepnovo", version, url, env, install_fn)
@_if_not_installed("crux")
def install_crux(env):
default_version = "1.39"
version = env.get("tool_version", default_version)
url = "http://noble.gs.washington.edu/proj/crux/download/crux_%s-x86_64-Linux.zip" % version
def _move(env):
bin_dir = _get_bin_dir(env)
env.safe_sudo("mv bin/* '%s'" % (bin_dir))
_get_install(url, env, _move)
@_if_not_installed("Fido")
def install_fido(env):
version = "2011"
url = 'http://noble.gs.washington.edu/proj/fido/fido.tar.gz'
# Adapted from Jorrit Boekel's mi-deployment fork
# https://bitbucket.org/glormph/mi-deployment-protoeimcs
def _chdir_src(work_cmd):
def do_work(env):
with cd("src/cpp"):
env.safe_append('tmpmake', 'SHELL=/bin/bash')
env.safe_append('tmpmake', 'prefix=%s' % env.get("system_install"))
env.safe_append('tmpmake', 'CPPFLAGS=-Wall -ffast-math -march=x86-64 -pipe -O4 -g')
env.safe_run('cat makefile |grep BINPATH -A 9999 >> tmpmake')
env.safe_run('cp tmpmake makefile')
work_cmd(env)
return do_work
_get_install(url, env, _chdir_src(_make_copy(find_cmd="find ../../bin -perm -100 -name 'Fido*'")))
def install_ipig(env):
""" This tool is installed in Galaxy's jars dir """
# This galaxy specific download probable doesn't belong in this file.
default_version = "r5"
version = env.get("tool_version", default_version)
url = 'http://downloads.sourceforge.net/project/ipig/ipig_%s.zip' % version
pkg_name = 'ipig'
install_dir = os.path.join(env.galaxy_jars_dir, pkg_name)
install_cmd = env.safe_sudo if env.use_sudo else env.safe_run
install_cmd("mkdir -p %s" % install_dir)
with cd(install_dir):
install_cmd("wget %s -O %s" % (url, os.path.split(url)[-1]))
install_cmd("unzip -u %s" % (os.path.split(url)[-1]))
install_cmd("rm %s" % (os.path.split(url)[-1]))
install_cmd('chown --recursive %s:%s %s' % (env.galaxy_user, env.galaxy_user, install_dir))
def install_peptide_to_gff(env):
default_version = "master"
version = env.get("tool_version", default_version)
repository = "hg clone https://jmchilton@bitbucket.org/galaxyp/peptide_to_gff"
def install_fn(env, install_dir):
env.safe_sudo("cp -r peptide_to_gff/* '%s'" % install_dir)
_create_python_virtualenv(env, "peptide_to_gff", "%s/requirements.txt" % install_dir)
bin_dir = os.path.join(env.get("system_install"), "bin")
env.safe_sudo("mkdir -p '%s'" % bin_dir)
env.safe_sudo("ln -s '%s' '%s'" % (os.path.join(install_dir, "peptide_to_gff"), os.path.join(bin_dir, "peptide_to_gff")))
_unzip_install("peptide_to_gff", version, repository, env, install_fn)
def install_galaxy_protk(env):
"""This method installs Ira Cooke's ProtK framework. Very galaxy specific,
can only be installed in context of custom Galaxy tool.
By default this will install ProtK from rubygems server, but if
env.protk_version is set to <version>@<url> (e.g.
1.1.5@https://bitbucket.org/iracooke/protk-working) the
gem will be cloned with hg and installed from source.
"""
if not env.get('galaxy_tool_install', False):
from cloudbio.custom.galaxy import _prep_galaxy
_prep_galaxy(env)
default_version = "1.2.2"
version = env.get("tool_version", default_version)
version_and_revision = version
install_from_source = version_and_revision.find("@") > 0
# e.g. protk_version = 1.1.5@https://bitbucket.org/iracooke/protk-working
if install_from_source:
(version, revision) = version_and_revision.split("@")
url = _get_bitbucket_download_url(revision, "https://bitbucket.org/iracooke/protk")
else:
version = version_and_revision
ruby_version = "1.9.3"
force_rvm_install = False
with prefix("HOME=~%s" % env.galaxy_user):
def rvm_exec(env, cmd="", rvm_cmd="use", with_gemset=False):
target = ruby_version if not with_gemset else "%s@%s" % (ruby_version, "protk-%s" % version)
prefix = ". $HOME/.rvm/scripts/rvm; rvm %s %s; " % (rvm_cmd, target)
env.safe_sudo("%s %s" % (prefix, cmd), user=env.galaxy_user)
if not env.safe_exists("$HOME/.rvm") or force_rvm_install:
env.safe_sudo("curl -L get.rvm.io | bash -s stable; source ~%s/.rvm/scripts/rvm" % (env.galaxy_user), user=env.galaxy_user)
rvm_exec(env, rvm_cmd="install")
rvm_exec(env, cmd="rvm gemset create protk-%s" % version)
if not install_from_source:
# Typical rubygem install
rvm_exec(env, "gem install --no-ri --no-rdoc protk -v %s" % version, with_gemset=True)
else:
with cd("~%s" % env.galaxy_user):
env.safe_sudo("rm -rf protk_source; hg clone '%s' protk_source" % url, user=env.galaxy_user)
rvm_exec(env, "cd protk_source; gem build protk.gemspec; gem install protk", with_gemset=True)
protk_properties = {}
## ProtK can set these up itself, should make that an option.
protk_properties["tpp_root"] = os.path.join(env.galaxy_tools_dir, "transproteomic_pipeline", "default")
protk_properties['openms_root'] = "/usr" # os.path.join(env.galaxy_tools_dir, "openms", "default", "bin")
### Assumes omssa, blast, and transproteomic_pipeline CBL galaxy installs.
protk_properties['omssa_root'] = os.path.join(env.galaxy_tools_dir, "omssa", "default", "bin")
protk_properties['blast_root'] = os.path.join(env.galaxy_tools_dir, "blast+", "default")
protk_properties['pwiz_root'] = os.path.join(env.galaxy_tools_dir, "transproteomic_pipeline", "default", "bin")
# Other properties: log_file, blast_root
env.safe_sudo("mkdir -p \"$HOME/.protk\"", user=env.galaxy_user)
env.safe_sudo("mkdir -p \"$HOME/.protk/Databases\"", user=env.galaxy_user)
import yaml
_write_to_file(yaml.dump(protk_properties), "/home/%s/.protk/config.yml" % env.galaxy_user, "0755")
rvm_exec(env, "protk_setup.rb galaxyenv", with_gemset=True)
install_dir = os.path.join(env.galaxy_tools_dir, "galaxy_protk", version)
env.safe_sudo("mkdir -p '%s'" % install_dir)
_chown_galaxy(env, install_dir)
env.safe_sudo('ln -s -f "$HOME/.protk/galaxy/env.sh" "%s/env.sh"' % install_dir, user=env.galaxy_user)
with cd(install_dir):
with cd(".."):
env.safe_sudo("ln -s -f '%s' default" % version)
@_if_not_installed("myrimatch")
def install_myrimatch(env):
default_version = "2.1.131"
_install_tabb_tool(env, default_version, "myrimatch-bin-linux-x86_64-gcc41-release", ["myrimatch"])
@_if_not_installed("pepitome")
def install_pepitome(env):
default_version = "1.0.45"
_install_tabb_tool(env, default_version, "pepitome-bin-linux-x86_64-gcc41-release", ["pepitome"])
@_if_not_installed("directag")
def install_directag(env):
default_version = "1.3.62"
_install_tabb_tool(env, default_version, "directag-bin-linux-x86_64-gcc41-release", ["adjustScanRankerScoreByGroup", "directag"])
@_if_not_installed("tagrecon")
def install_tagrecon(env):
default_version = "1.4.63"
# TODO: Should consider a better way to handle the unimod xml and blosum matrix.
_install_tabb_tool(env, default_version, "tagrecon-bin-linux-x86_64-gcc41-release", ["tagrecon", "unimod.xml", "blosum62.fas"])
@_if_not_installed("idpQonvert")
def install_idpqonvert(env):
default_version = "3.0.475"
version = env.get("tool_version", default_version)
url = "%s/idpQonvert_%s" % (PROTEOMICS_APP_ARCHIVE_URL, version)
env.safe_run("wget --no-check-certificate -O %s '%s'" % ("idpQonvert", url))
env.safe_run("chmod 755 idpQonvert")
env.safe_sudo("mkdir -p '%s/bin'" % env["system_install"])
env.safe_sudo("mv %s '%s/bin'" % ("idpQonvert", env["system_install"]))
env.safe_sudo("chmod +x '%s/bin/idpQonvert'" % env["system_install"])
def _install_tabb_tool(env, default_version, download_name, exec_names):
version = env.get("tool_version", default_version)
url = "%s/%s-%s.tar.bz2" \
% (PROTEOMICS_APP_ARCHIVE_URL, download_name, version.replace(".", "_"))
_fetch_and_unpack(url, False)
env.safe_sudo("mkdir -p '%s/bin'" % env["system_install"])
for exec_name in exec_names:
env.safe_sudo("mv %s '%s/bin'" % (exec_name, env["system_install"]))
================================================
FILE: cloudbio/custom/bio_proteomics_wine.py
================================================
from fabric.api import cd
from shared import (_make_tmp_dir, _fetch_and_unpack, _write_to_file, _get_bin_dir)
import os
def install_proteomics_wine_env(env):
script_src = env.get("setup_proteomics_wine_env_script")
script_dest = "%s/bin/setup_proteomics_wine_env.sh" % env.get("system_install")
if not env.safe_exists(script_dest):
env.safe_put(script_src, script_dest, mode="0755", use_sudo=True)
def install_multiplierz(env):
"""
Assumes your wine environment contains an install Python 2.6
in C:\Python26.
"""
wine_user = _get_wine_user(env)
install_proteomics_wine_env(env)
env.safe_sudo("setup_proteomics_wine_env.sh", user=wine_user)
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_fetch_and_unpack("hg clone http://multiplierz.hg.sourceforge.net:8000/hgroot/multiplierz/multiplierz")
with cd("multiplierz"):
wine_prefix = _get_wine_prefix(env)
env.safe_sudo("%s; wine %s/drive_c/Python26/python.exe setup.py install" % (_conf_wine(env), wine_prefix), user=wine_user)
def install_proteowizard(env):
build_id = "85131"
version = "3_0_4624"
url = "http://teamcity.labkey.org:8080/repository/download/bt36/%s:id/pwiz-bin-windows-x86-vc100-release-%s.tar.bz2?guest=1" % (build_id, version)
install_dir = env.get("install_dir")
share_dir = "%s/share/proteowizard" % install_dir
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_fetch_and_unpack(url, need_dir=False)
env.safe_sudo("cp -r . '%s'" % share_dir)
proteowizard_apps = ["msconvert", "msaccess", "chainsaw", "msdiff", "mspicture", "mscat", "txt2mzml", "MSConvertGUI", "Skyline", "Topograph", "SeeMS"]
for app in proteowizard_apps:
setup_wine_wrapper(env, "%s/%s" % (share_dir, app))
def install_morpheus(env):
url = "http://www.chem.wisc.edu/~coon/Downloads/Morpheus/latest/Morpheus.zip" # TODO:
install_dir = env.get("install_dir")
share_dir = "%s/share/morpheus" % install_dir
with _make_tmp_dir() as work_dir:
with cd(work_dir):
_fetch_and_unpack(url, need_dir=False)
env.safe_sudo("cp -r Morpheus '%s'" % share_dir)
morpheus_exes = ["morpheus_cl.exe", "Morpheus.exe"]
for app in morpheus_exes:
setup_wine_wrapper(env, "%s/%s" % (share_dir, app))
def setup_wine_wrapper(env, to):
basename = os.path.basename(to)
contents = """#!/bin/bash
setup_proteomics_wine_env.sh
export WINEPREFIX=$HOME/.wine-proteomics
wine %s "$@"
""" % to
bin_dir = _get_bin_dir(env)
dest = "%s/%s" % (bin_dir, basename)
_write_to_file(contents, dest, '0755')
def _conf_wine(env):
return "export WINEPREFIX=%s" % _get_wine_prefix(env)
def _get_wine_prefix(env):
wine_user = _get_wine_user(env)
return "~%s/.wine-proteomics" % wine_user
def _get_wine_user(env):
return env.get("wine_user", env.get("user"))
================================================
FILE: cloudbio/custom/cloudman.py
================================================
"""Custom install scripts for CloudMan environment.
From Enis Afgan: https://bitbucket.org/afgane/mi-deployment
"""
import os
import contextlib
from fabric.api import cd
from fabric.contrib.files import settings, hide
from cloudbio.custom.shared import (_make_tmp_dir, _setup_conf_file)
from cloudbio.cloudman import (_configure_cloudman, _configure_novnc,
_configure_desktop, _configure_ec2_autorun)
from cloudbio.galaxy import _install_nginx
CDN_ROOT_URL = "http://linuxcourse.rutgers.edu/rate/Clusters/download"
REPO_ROOT_URL = "https://bitbucket.org/afgane/mi-deployment/raw/tip"
def install_cloudman(env):
""" A meta method for installing all of CloudMan components.
Allows CloudMan and all of its dependencies to be installed via:
fab -f fabfile.py -i <key> -H ubuntu@<IP> install_custom:cloudman
"""
env.logger.debug("Installing CloudMan")
_configure_cloudman(env, use_repo_autorun=False)
install_nginx(env)
install_proftpd(env)
install_sge(env)
install_novnc(env)
def install_ec2_autorun(env):
_configure_ec2_autorun(env)
def install_novnc(env):
_configure_novnc(env)
_configure_desktop(env)
def install_nginx(env):
_install_nginx(env)
def install_proftpd(env):
"""Highly configurable GPL-licensed FTP server software.
http://proftpd.org/
"""
version = "1.3.4c"
postgres_ver = "9.1"
url = "ftp://ftp.tpnet.pl/pub/linux/proftpd/distrib/source/proftpd-%s.tar.gz" % version
modules = "mod_sql:mod_sql_postgres:mod_sql_passwd"
extra_modules = env.get("extra_proftp_modules", "") # Comma separated list of extra modules
if extra_modules:
modules = "%s:%s" % (modules, extra_modules.replace(",", ":"))
install_dir = os.path.join(env.install_dir, 'proftpd')
remote_conf_dir = os.path.join(install_dir, "etc")
# Skip install if already available
if env.safe_exists(remote_conf_dir):
env.logger.debug("ProFTPd seems to already be installed in {0}".format(install_dir))
return
with _make_tmp_dir() as work_dir:
with cd(work_dir):
env.safe_run("wget %s" % url)
with settings(hide('stdout')):
env.safe_run("tar xvzf %s" % os.path.split(url)[1])
with cd("proftpd-%s" % version):
env.safe_run("CFLAGS='-I/usr/include/postgresql' ./configure --prefix=%s "
"--disable-auth-file --disable-ncurses --disable-ident --disable-shadow "
"--enable-openssl --with-modules=%s "
"--with-libraries=/usr/lib/postgresql/%s/lib" % (install_dir, modules, postgres_ver))
env.safe_sudo("make")
env.safe_sudo("make install")
env.safe_sudo("make clean")
# Get the init.d startup script
initd_script = 'proftpd.initd'
initd_url = os.path.join(REPO_ROOT_URL, 'conf_files', initd_script)
remote_file = "/etc/init.d/proftpd"
env.safe_sudo("wget --output-document=%s %s" % (remote_file, initd_url))
env.safe_sed(remote_file, 'REPLACE_THIS_WITH_CUSTOM_INSTALL_DIR', install_dir, use_sudo=True)
env.safe_sudo("chmod 755 %s" % remote_file)
# Set the configuration file
conf_file = 'proftpd.conf'
remote_file = os.path.join(remote_conf_dir, conf_file)
if "postgres_port" not in env:
env.postgres_port = '5910'
if "galaxy_ftp_user_password" not in env:
env.galaxy_ftp_user_password = 'fu5yOj2sn'
proftpd_conf = {'galaxy_uid': env.safe_run('id -u galaxy'),
'galaxy_fs': '/mnt/galaxy', # Should be a var but uncertain how to get it
'install_dir': install_dir}
_setup_conf_file(env, remote_file, conf_file, overrides=proftpd_conf,
default_source="proftpd.conf.template")
# Get the custom welcome msg file
welcome_msg_file = 'welcome_msg.txt'
welcome_url = os.path.join(REPO_ROOT_URL, 'conf_files', welcome_msg_file)
env.safe_sudo("wget --output-document=%s %s" %
(os.path.join(remote_conf_dir, welcome_msg_file), welcome_url))
# Stow
env.safe_sudo("cd %s; stow proftpd" % env.install_dir)
env.logger.debug("----- ProFTPd %s installed to %s -----" % (version, install_dir))
def install_sge(env):
"""Sun Grid Engine.
"""
out_dir = "ge6.2u5"
url = "%s/ge62u5_lx24-amd64.tar.gz" % CDN_ROOT_URL
install_dir = env.install_dir
if env.safe_exists(os.path.join(install_dir, out_dir)):
return
with _make_tmp_dir() as work_dir:
with contextlib.nested(cd(work_dir), settings(hide('stdout'))):
env.safe_run("wget %s" % url)
env.safe_sudo("chown %s %s" % (env.user, install_dir))
env.safe_run("tar -C %s -xvzf %s" % (install_dir, os.path.split(url)[1]))
env.logger.debug("SGE setup")
================================================
FILE: cloudbio/custom/distributed.py
================================================
"""Install instructions for distributed MapReduce style programs.
"""
import os
from fabric.api import *
from fabric.contrib.files import *
from shared import (_if_not_python_lib, _pip_cmd, _is_anaconda)
@_if_not_python_lib("pydoop")
def install_pydoop(env):
"""pydoop; provides Hadoop access for Python.
http://pydoop.sourceforge.net/docs/
"""
java_home = env.java_home if "java_home" in env else os.environ["JAVA_HOME"]
export_str = "export JAVA_HOME=%s" % (java_home)
cmd = env.safe_run if _is_anaconda(env) else env.safe_sudo
cmd("%s && %s install pydoop" % (export_str, _pip_cmd(env)))
@_if_not_python_lib("bl.mr.seq.seqal")
def install_seal(env):
"""Install seal: process high-throughput sequencing with Hadoop.
http://biodoop-seal.sf.net/
"""
install_pydoop(env)
java_home = env.java_home if "java_home" in env else os.environ["JAVA_HOME"]
export_str = "export JAVA_HOME=%s" % (java_home)
cmd = env.safe_run if _is_anaconda(env) else env.safe_sudo
cmd("%s && %s install --pre seal" % (export_str, _pip_cmd(env)))
================================================
FILE: cloudbio/custom/galaxy.py
================================================
"""
Install any components that fall under 'galaxy' directive in main.yaml
"""
from cloudbio.galaxy import _setup_users
from cloudbio.galaxy import _setup_galaxy_env_defaults
from cloudbio.galaxy import _install_galaxy
from cloudbio.galaxy import _configure_galaxy_options
def install_galaxy_webapp(env):
_prep_galaxy(env)
_install_galaxy(env)
_configure_galaxy_options(env)
def _prep_galaxy(env):
_setup_users(env)
_setup_galaxy_env_defaults(env)
================================================
FILE: cloudbio/custom/galaxy_tools.py
================================================
"""
Install any components that fall under 'galaxy_tools' directive in main.yaml
"""
from cloudbio.galaxy.tools import _install_tools
from cloudbio.custom.galaxy import _prep_galaxy
def install_cbl_galaxy_tools(env):
_prep_galaxy(env)
_install_tools(env)
================================================
FILE: cloudbio/custom/galaxyp.py
================================================
"""
"""
from cloudbio.galaxy.utils import _chown_galaxy
from fabric.contrib.files import *
from shared import _write_to_file
def install_protvis(env):
""" Installs Andrew Brock's proteomics visualize tool.
https://bitbucket.org/Andrew_Brock/proteomics-visualise/
"""
_setup_protvis_env(env)
protvis_home = env["protvis_home"]
env.safe_sudo("sudo apt-get -y --force-yes install libxml2-dev libxslt-dev")
run("rm -rf protvis")
run("git clone -b lorikeet https://github.com/jmchilton/protvis.git")
with cd("protvis"):
run("git submodule init")
run("git submodule update")
env.safe_sudo("rsync -avur --delete-after . %s" % (protvis_home))
_chown_galaxy(env, protvis_home)
with cd(protvis_home):
env.safe_sudo("./setup.sh", user=env.get("galaxy_user", "galaxy"))
#default_revision = "8cc6af1c492c"
#
#revision = env.get("protvis_revision", default_revision)
#url = _get_bitbucket_download_url(revision, "https://bitbucket.org/Andrew_Brock/proteomics-visualise")
#def _make(env):
#_get_install(url, env, _make)
galaxy_data_dir = env.get('galaxy_data_dir', "/mnt/galaxyData/")
protvis_converted_files_dir = env.get('protvis_converted_files_dir')
_write_to_file('''GALAXY_ROOT = "%s"
PATH_WHITELIST = ["%s/files/", "%s"]
CONVERTED_FILES = "%s"
''' % (env.galaxy_home, galaxy_data_dir, protvis_converted_files_dir, protvis_converted_files_dir), "%s/conf.py" % protvis_home, "0755")
_setup_protvis_service(env)
def _setup_protvis_env(env):
if not "protvis_home" in env:
env["protvis_home"] = "%s/%s" % (env.galaxy_tools_dir, "protvis")
if not "protvis_user" in env:
env["protvis_user"] = "galaxy"
if not "protvis_port" in env:
env["protvis_port"] = "8500"
if not "protvis_converted_files_dir" in env:
galaxy_data_dir = env.get('galaxy_data_dir', "/mnt/galaxyData/")
env['protvis_converted_files_dir'] = "%s/tmp/protvis" % galaxy_data_dir
def _setup_protvis_service(env):
_setup_conf_file(env, os.path.join("/etc/init.d/protvis"), "protvis_init", default_source="protvis_init")
_setup_conf_file(env, os.path.join("/etc/default/protvis"), "protvis_default")
_setup_simple_service("protvis")
================================================
FILE: cloudbio/custom/java.py
================================================
"""Install instructions for non-packaged java programs.
"""
import os
from fabric.api import *
from fabric.contrib.files import *
from shared import (_if_not_installed, _make_tmp_dir)
from cloudbio.custom import shared
@_if_not_installed("lein -v")
def install_leiningen(env):
"""Clojure tool for project configuration and automation.
http://github.com/technomancy/leiningen
"""
bin_dir = os.path.join(env.system_install, "bin")
with _make_tmp_dir() as work_dir:
with cd(work_dir):
shared._remote_fetch(env, "https://raw.github.com/technomancy/leiningen/stable/bin/lein")
env.safe_run("chmod a+rwx lein")
env.safe_sudo("mv lein %s" % bin_dir)
env.safe_run("%s/lein" % bin_dir)
================================================
FILE: cloudbio/custom/millstone.py
================================================
"""Install instructions for non-packaged programs required by Millstone.
"""
from fabric.api import cd
from cloudbio.custom.shared import _make_tmp_dir
def install_unafold(env):
"""Required by optmage.
"""
# Since unafold is distributed as an .rpm, we need the program alien to
# convert it into a .deb that can be installed on this system.
env.safe_sudo("apt-get install -y alien")
with _make_tmp_dir() as work_dir:
with cd(work_dir):
env.safe_run("wget http://dinamelt.rit.albany.edu/download/unafold-3.8-1.x86_64.rpm")
env.safe_sudo("alien -i unafold-3.8-1.x86_64.rpm")
================================================
FILE: cloudbio/custom/phylogeny.py
================================================
"""Install instructions for non-packaged phyologeny programs.
"""
import os
from fabric.api import *
from fabric.contrib.files import *
from cloudbio.custom.shared import _if_not_installed, _make_tmp_dir
def install_tracer(env):
"""A program for analysing results from Bayesian MCMC programs such as BEAST & MrBayes.
http://tree.bio.ed.ac.uk/software/tracer/
"""
version = "1.5"
install_dir = os.path.join(env.system_install, "bioinf")
final_exe = os.path.join(env.system_install, "bin", "tracer")
if env.safe_exists(final_exe):
return
if not env.safe_exists(final_exe):
with _make_tmp_dir() as work_dir:
with cd(work_dir):
env.safe_run("wget -O Tracer_v{0}.tgz 'http://tree.bio.ed.ac.uk/download.php?id=80&num=3'".format(
version))
env.safe_run("tar xvzf Tracer_v{0}.tgz".format(version))
env.safe_run("chmod a+x Tracer_v{0}/bin/tracer".format(version))
env.safe_sudo("mkdir -p %s" % install_dir)
env.safe_sudo("rm -rvf %s/tracer" % install_dir)
env.safe_sudo("mv -f Tracer_v%s %s/tracer" % (version, install_dir))
env.safe_sudo("ln -sf %s/tracer/bin/tracer %s" % (install_dir, final_exe))
@_if_not_installed("beast -help")
def install_beast(env):
"""BEAST: Bayesian MCMC analysis of molecular sequences.
http://beast.bio.ed.ac.uk
"""
version = "1.7.4"
install_dir = os.path.join(env.system_install, "bioinf")
final_exe = os.path.join(env.system_install, "bin", "beast")
if not env.safe_exists(final_exe):
with _make_tmp_dir() as work_dir:
with cd(work_dir):
env.safe_run("wget -c http://beast-mcmc.googlecode.com/files/BEASTv%s.tgz" % version)
env.safe_run("tar xvzf BEASTv%s.tgz" % version)
env.safe_sudo("mkdir -p %s" % install_dir)
env.safe_sudo("rm -rvf %s/beast" % install_dir)
env.safe_sudo("mv -f BEASTv%s %s/beast" % (version, install_dir))
for l in ["beast","beauti","loganalyser","logcombiner","treeannotator","treestat"]:
env.safe_sudo("ln -sf %s/beast/bin/%s %s/bin/%s" % (install_dir, l,
env.system_install, l))
================================================
FILE: cloudbio/custom/python.py
================================================
"""Install instructions for python libraries not ready for easy_install.
"""
import os
from fabric.api import *
from fabric.contrib.files import *
from shared import (_if_not_python_lib, _get_install, _python_make, _pip_cmd,
_is_anaconda)
@_if_not_python_lib("bx")
def install_bx_python(env):
"""Tools for manipulating biological data, particularly multiple sequence alignments
https://bitbucket.org/james_taylor/bx-python/wiki/Home
"""
version = "bitbucket"
url = "https://bitbucket.org/james_taylor/bx-python/get/tip.tar.bz2"
cmd = env.safe_run if _is_anaconda(env) else env.safe_sudo
if not _is_anaconda(env):
cmd("%s install --upgrade distribute" % _pip_cmd(env))
cmd("%s install --upgrade %s" % (_pip_cmd(env), url))
@_if_not_python_lib("rpy")
def install_rpy(env):
"""RPy is a very simple, yet robust, Python interface to the R Programming Language.
http://rpy.sourceforge.net/
"""
version = "1.0.3"
ext = "a"
url = "http://downloads.sourceforge.net/project/rpy/rpy/" \
"%s/rpy-%s%s.zip" % (version, version, ext)
def _fix_libraries(env):
env.safe_run("""sed -i.bak -r -e "s/,'Rlapack'//g" setup.py""")
with settings(hide('warnings', 'running', 'stdout', 'stderr'),
warn_only=True):
result = env.safe_run("R --version")
if result.failed:
return
_get_install(url, env, _python_make, post_unpack_fn=_fix_libraries)
@_if_not_python_lib("netsa")
def install_netsa_python(env):
"""A suite of open source tools for monitoring large-scale networks using flow data.
http://tools.netsa.cert.org/index.html
"""
version = "1.3"
url = "http://tools.netsa.cert.org/releases/netsa-python-%s.tar.gz" % version
cmd = env.safe_run if _is_anaconda(env) else env.safe_sudo
cmd("%s install %s" % (_pip_cmd(env), url))
================================================
FILE: cloudbio/custom/shared.py
================================================
"""Reusable decorators and functions for custom installations.
"""
from __future__ import print_function
from contextlib import contextmanager
import functools
import os
import socket
from string import Template
import sys
import tempfile
from tempfile import NamedTemporaryFile
import urllib
import uuid
import shutil
import subprocess
import time
# Optional fabric imports, for back compatibility
try:
from fabric.api import *
from fabric.contrib.files import *
from cloudbio.fabutils import quiet, warn_only
except ImportError:
pass
CBL_REPO_ROOT_URL = "https://raw.github.com/chapmanb/cloudbiolinux/master/"
# -- decorators and context managers
@contextmanager
def chdir(new_dir):
"""Context manager to temporarily change to a new directory.
http://lucentbeing.com/blog/context-managers-and-the-with-statement-in-python/
"""
# On busy filesystems can have issues accessing main directory. Allow retries
num_tries = 0
max_tries = 5
cur_dir = None
while cur_dir is None:
try:
cur_dir = os.getcwd()
except OSError:
if num_tries > max_tries:
raise
num_tries += 1
time.sleep(2)
safe_makedir(new_dir)
os.chdir(new_dir)
try:
yield
finally:
os.chdir(cur_dir)
def safe_makedir(dname):
"""Make a directory if it doesn't exist, handling concurrent race conditions.
"""
if not dname:
return dname
num_tries = 0
max_tries = 5
while not os.path.exists(dname):
# we could get an error here if multiple processes are creating
# the directory at the same time. Grr, concurrency.
try:
os.makedirs(dname)
except OSError:
if num_tries > max_tries:
raise
num_tries += 1
time.sleep(2)
return dname
def which(program, env=None):
""" returns the path to an executable or None if it can't be found"""
paths = os.environ["PATH"].split(os.pathsep)
if env and hasattr(env, "system_install"):
paths += [env.system_install, os.path.join(env.system_install, "anaconda")]
def is_exe(fpath):
return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
fpath, fname = os.path.split(program)
if fpath:
if is_exe(program):
return program
else:
for path in paths:
exe_file = os.path.join(path, program)
if is_exe(exe_file):
return exe_file
return None
def _if_not_installed(pname):
"""Decorator that checks if a callable program is installed.
"""
def argcatcher(func):
functools.wraps(func)
def decorator(*args, **kwargs):
if _galaxy_tool_install(args):
run_function = not _galaxy_tool_present(args)
elif isinstance(pname, list):
run_function = any([_executable_not_on_path(x) for x in pname])
else:
run_function = _executable_not_on_path(pname)
if run_function:
return func(*args, **kwargs)
return decorator
return argcatcher
def _all_cbl_paths(env, ext):
"""Add paths to other non-system directories installed by CloudBioLinux.
"""
return ":".join("%s/%s" % (p, ext) for p in [env.system_install,
os.path.join(env.system_install, "anaconda")])
def _executable_not_on_path(pname):
with settings(hide('warnings', 'running', 'stdout', 'stderr'),
warn_only=True):
result = env.safe_run("export PATH=%s:$PATH && "
"export LD_LIBRARY_PATH=%s:$LD_LIBRARY_PATH && %s" %
(_all_cbl_paths(env, "bin"), _all_cbl_paths(env, "lib"), pname))
return result.return_code == 127
def _galaxy_tool_install(args):
try:
return args[0]["galaxy_tool_install"]
except:
return False
def _galaxy_tool_present(args):
return env.safe_exists(os.path.join(args[0]["system_install"], "env.sh"))
def _if_not_python_lib(library):
"""Decorator that checks if a python library is installed.
"""
def argcatcher(func):
functools.wraps(func)
def decorator(*args, **kwargs):
with settings(warn_only=True):
errcount = int(env.safe_run_output("%s -c 'import %s' 2>&1 | grep -c ImportError | cat" % (_python_cmd(env), library)))
result = 0 if errcount >= 1 else 1
if result == 0:
return func(*args, **kwargs)
else:
return result
return decorator
return argcatcher
@contextmanager
def make_tmp_dir_local(ext, work_dir):
if ext:
work_dir += ext
safe_makedir(work_dir)
yield work_dir
shutil.rmtree(work_dir)
@contextmanager
def _make_tmp_dir(ext=None, work_dir=None):
"""
Setup a temporary working directory for building custom software. First checks
fabric environment for a `work_dir` path, if that is not set it will use the
remote path $TMPDIR/cloudbiolinux if $TMPDIR is defined remotely, finally falling
back on remote $HOME/cloudbiolinux otherwise.
`ext` allows creation of tool specific temporary directories to avoid conflicts
using CloudBioLinux inside of CloudBioLinux.
"""
if not work_dir:
work_dir = __work_dir()
if ext:
work_dir += ext
use_sudo = False
if not env.safe_exists(work_dir):
with settings(warn_only=True):
# Try to create this directory without using sudo, but
# if needed fallback.
result = env.safe_run("mkdir -p '%s'" % work_dir)
if result.return_code != 0:
use_sudo = True
if use_sudo:
env.safe_sudo("mkdir -p '%s'" % work_dir)
env.safe_sudo("chown -R %s '%s'" % (env.user, work_dir))
yield work_dir
if env.safe_exists(work_dir):
run_func = env.safe_sudo if use_sudo else env.safe_run
run_func("rm -rf %s" % work_dir)
def __work_dir():
work_dir = env.get("work_dir", None)
if not work_dir:
with quiet():
tmp_dir = env.safe_run_output("echo $TMPDIR")
if tmp_dir.failed or not tmp_dir.strip():
home_dir = env.safe_run_output("echo $HOME")
tmp_dir = os.path.join(home_dir, "tmp")
work_dir = os.path.join(tmp_dir.strip(), "cloudbiolinux")
return work_dir
# -- Standard build utility simplifiers
def _get_expected_file(url, dir_name=None, safe_tar=False, tar_file_name=None):
if tar_file_name:
tar_file = tar_file_name
else:
tar_file = os.path.split(url.split("?")[0])[-1]
safe_tar = "--pax-option='delete=SCHILY.*,delete=LIBARCHIVE.*'" if safe_tar else ""
exts = {(".tar.gz", ".tgz"): "tar %s -xzpf" % safe_tar,
(".tar",): "tar %s -xpf" % safe_tar,
(".tar.bz2",): "tar %s -xjpf" % safe_tar,
(".zip",): "unzip"}
for ext_choices, tar_cmd in exts.iteritems():
for ext in ext_choices:
if tar_file.endswith(ext):
if dir_name is None:
dir_name = tar_file[:-len(ext)]
return tar_file, dir_name, tar_cmd
raise ValueError("Did not find extract command for %s" % url)
def _safe_dir_name(dir_name, need_dir=True):
replace_try = ["", "-src", "_core"]
for replace in replace_try:
check = dir_name.replace(replace, "")
if env.safe_exists(check):
return check
# still couldn't find it, it's a nasty one
for check_part in (dir_name.split("-")[0].split("_")[0],
dir_name.split("-")[-1].split("_")[-1],
dir_name.split(".")[0],
dir_name.lower().split(".")[0]):
with settings(hide('warnings', 'running', 'stdout', 'stderr'),
warn_only=True):
dirs = env.safe_run_output("ls -d1 *%s*/" % check_part).split("\n")
dirs = [x for x in dirs if "cannot access" not in x and "No such" not in x]
if len(dirs) == 1 and dirs[0]:
return dirs[0]
dirs = env.safe_run_output("find * -type d -maxdepth 0").split("\n")
if len(dirs) == 1 and dirs[0]:
return dirs[0]
if need_dir:
raise ValueError("Could not find directory %s" % dir_name)
def _remote_fetch(env, url, out_file=None, allow_fail=False, fix_fn=None, samedir=False):
"""Retrieve url using wget, performing download in a temporary directory.
Provides a central location to handle retrieval issues and avoid
using interrupted downloads.
"""
if out_file is None:
out_file = os.path.basename(url)
if not os.path.exists(out_file):
if samedir and os.path.isabs(out_file):
orig_dir = os.path.dirname(out_file)
out_file = os.path.basename(out_file)
else:
orig_dir = os.getcwd()
temp_ext = "/%s" % uuid.uuid3(uuid.NAMESPACE_URL,
str("file://%s/%s/%s" %
("localhost", socket.gethostname(), out_file)))
with make_tmp_dir_local(ext=temp_ext, work_dir=orig_dir) as tmp_dir:
with chdir(tmp_dir):
try:
subprocess.check_call("wget --continue --no-check-certificate -O %s '%s'"
% (out_file, url), shell=True)
if fix_fn:
out_file = fix_fn(env, out_file)
subprocess.check_call("mv %s %s" % (out_file, orig_di
gitextract_z4wsjush/
├── .gitignore
├── .gitmodules
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── cloudbio/
│ ├── __init__.py
│ ├── biodata/
│ │ ├── __init__.py
│ │ ├── galaxy.py
│ │ ├── genomes.py
│ │ ├── ggd.py
│ │ └── rnaseq.py
│ ├── cloudbiolinux.py
│ ├── cloudman.py
│ ├── config_management/
│ │ ├── __init__.py
│ │ ├── chef.py
│ │ ├── puppet.py
│ │ └── utils.py
│ ├── custom/
│ │ ├── __init__.py
│ │ ├── bio_general.py
│ │ ├── bio_nextgen.py
│ │ ├── bio_proteomics.py
│ │ ├── bio_proteomics_wine.py
│ │ ├── cloudman.py
│ │ ├── distributed.py
│ │ ├── galaxy.py
│ │ ├── galaxy_tools.py
│ │ ├── galaxyp.py
│ │ ├── java.py
│ │ ├── millstone.py
│ │ ├── phylogeny.py
│ │ ├── python.py
│ │ ├── shared.py
│ │ ├── system.py
│ │ ├── vcr.py
│ │ └── versioncheck.py
│ ├── deploy/
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── main.py
│ │ ├── plugins/
│ │ │ ├── __init__.py
│ │ │ ├── cloudman.py
│ │ │ ├── galaxy.py
│ │ │ └── gvl.py
│ │ ├── util.py
│ │ ├── vmlauncher/
│ │ │ ├── __init__.py
│ │ │ ├── config.md
│ │ │ └── transfer.py
│ │ └── volume.py
│ ├── distribution.py
│ ├── fabutils.py
│ ├── flavor/
│ │ ├── __init__.py
│ │ └── config.py
│ ├── galaxy/
│ │ ├── __init__.py
│ │ ├── applications.py
│ │ ├── r.py
│ │ ├── tools.py
│ │ └── utils.py
│ ├── libraries.py
│ ├── manifest.py
│ ├── package/
│ │ ├── __init__.py
│ │ ├── brew.py
│ │ ├── conda.py
│ │ ├── cpan.py
│ │ ├── deb.py
│ │ ├── nix.py
│ │ ├── rpm.py
│ │ └── shared.py
│ └── utils.py
├── config/
│ ├── README.md
│ ├── biodata.yaml
│ ├── chef/
│ │ └── cookbooks/
│ │ └── .gitkeep
│ ├── chef_recipes.yaml
│ ├── custom.yaml
│ ├── fabricrc.txt
│ ├── haskell-libs.yaml
│ ├── main.yaml
│ ├── node_extra.json
│ ├── packages-debian.yaml
│ ├── packages-homebrew.yaml
│ ├── packages-nix.yaml
│ ├── packages-scientificlinux.yaml
│ ├── packages-yum.yaml
│ ├── packages.yaml
│ ├── perl-libs.yaml
│ ├── puppet/
│ │ └── modules/
│ │ └── .gitkeep
│ ├── puppet_classes.yaml
│ ├── python-libs.yaml
│ ├── r-libs.yaml
│ └── ruby-libs.yaml
├── contrib/
│ ├── __init__.py
│ └── flavor/
│ ├── __init__.py
│ ├── biocloudcentral/
│ │ └── main.yaml
│ ├── biopython/
│ │ ├── custom.yaml
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ ├── packages-homebrew.yaml
│ │ ├── packages.yaml
│ │ └── python-libs.yaml
│ ├── boinc/
│ │ ├── __init__.py
│ │ ├── boincflavor.py
│ │ ├── fabricrc_debian.txt
│ │ └── main.yaml
│ ├── cloudman/
│ │ ├── README.md
│ │ ├── cloudman/
│ │ │ └── main.yaml
│ │ ├── cloudman_and_galaxy/
│ │ │ └── main.yaml
│ │ ├── cloudman_and_galaxyp/
│ │ │ └── main.yaml
│ │ ├── cloudman_desktop_and_galaxyp/
│ │ │ ├── main.yaml
│ │ │ └── ruby-libs.yaml
│ │ ├── migration_checklist.md
│ │ └── tools.yaml
│ ├── cwl_dockers/
│ │ └── packages-bcbio-alignment.yaml
│ ├── demo/
│ │ ├── README.md
│ │ ├── custom.yaml
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ └── packages-homebrew.yaml
│ ├── edx_course/
│ │ ├── custom.yaml
│ │ ├── edx_setup.sh
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ ├── packages-homebrew.yaml
│ │ └── python-libs.yaml
│ ├── globus/
│ │ └── main.yaml
│ ├── millstone/
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── custom.yaml
│ │ ├── installer.py
│ │ ├── main.yaml
│ │ ├── millstoneflavor.py
│ │ └── python-libs.yaml
│ ├── minimal/
│ │ ├── fabricrc_debian.txt
│ │ └── main.yaml
│ ├── neuro/
│ │ ├── __init__.py
│ │ ├── custom.yaml
│ │ ├── fabricrc.txt
│ │ ├── main.yaml
│ │ ├── neuro.py
│ │ └── packages.yaml
│ ├── ngs_pipeline_minimal/
│ │ ├── custom.yaml
│ │ ├── main.yaml
│ │ ├── packages-conda.yaml
│ │ ├── packages-homebrew.yaml
│ │ ├── perl-libs.yaml
│ │ └── r-libs.yaml
│ ├── phylogeny/
│ │ ├── __init__.py
│ │ ├── fabricrc_debian.txt
│ │ ├── fabricrc_ubuntu.txt
│ │ ├── install_debian.sh
│ │ ├── install_ubuntu.sh
│ │ ├── main.yaml
│ │ ├── phylogenyflavor.py
│ │ └── virtualbox.md
│ ├── pjotrp/
│ │ ├── __init__.py
│ │ └── biotest/
│ │ ├── __init__.py
│ │ ├── biotestflavor.py
│ │ ├── fabricrc_debian.txt
│ │ └── main.yaml
│ ├── proteomics/
│ │ ├── galaxyp/
│ │ │ ├── README.md
│ │ │ ├── main.yaml
│ │ │ ├── settings-sample-galaxyp.yaml
│ │ │ └── tools.yaml
│ │ └── swift/
│ │ ├── custom.yaml
│ │ ├── main.yaml
│ │ └── r-libs.yaml
│ ├── seal/
│ │ ├── __init__.py
│ │ ├── fabricrc_sl.txt
│ │ ├── main.yaml
│ │ └── sealflavor.py
│ └── variantviz/
│ ├── custom.yaml
│ ├── fabricrc.txt
│ ├── main.yaml
│ └── packages-yum.yaml
├── contributors.mkd
├── data_fabfile.py
├── deploy/
│ ├── README.md
│ ├── TODO
│ ├── Vagrantfile
│ ├── cloudman.html
│ ├── cloudman.md
│ ├── config/
│ │ └── tool_data_table_conf.xml
│ ├── deploy.sh
│ ├── deploy_bourne.sh
│ ├── deploy_no_deps.sh
│ ├── requirements.txt
│ ├── settings-sample-cm.yaml
│ ├── settings-sample-minimal.yaml
│ ├── settings-sample-oldgalaxyvmlauncher.yaml
│ ├── test_install_galaxy_tool.py
│ └── update_dependencies.sh
├── doc/
│ ├── Makefile
│ ├── hacking.md
│ ├── intro/
│ │ ├── FAQ.tex
│ │ ├── README
│ │ ├── basicTerminology.aux
│ │ ├── basicTerminology.tex
│ │ ├── cloudbl_desktopIntro.aux
│ │ ├── cloudbl_desktopIntro.tex
│ │ ├── getReady.aux
│ │ ├── getReady.tex
│ │ ├── gettingStarted_CloudBioLinux.aux
│ │ ├── gettingStarted_CloudBioLinux.out
│ │ ├── gettingStarted_CloudBioLinux.tex
│ │ ├── gettingStarted_CloudBioLinux.toc
│ │ ├── images/
│ │ │ ├── createAndMountVol-1.odg
│ │ │ ├── nutshell.odg
│ │ │ └── unmountDetach-1.odg
│ │ ├── tips.tex
│ │ ├── usefulLinks.tex
│ │ ├── workingOnCloudBL.aux
│ │ ├── workingOnCloudBL.tex
│ │ ├── workingWithData.aux
│ │ └── workingWithData.tex
│ ├── linux_kvm.md
│ ├── private_cloud.md
│ ├── remote_gui.md
│ ├── source/
│ │ ├── conf.py
│ │ ├── framework.rst
│ │ └── index.rst
│ └── virtualbox.md
├── fabfile.py
├── ggd-recipes/
│ ├── BDGP6/
│ │ ├── gtf.yaml
│ │ ├── mirbase.yaml
│ │ ├── seq.yaml
│ │ └── transcripts.yaml
│ ├── GRCh37/
│ │ ├── 1000g.yaml
│ │ ├── 1000g_omni_snps.yaml
│ │ ├── 1000g_snps.yaml
│ │ ├── ACMG56_genes.yaml
│ │ ├── GA4GH_problem_regions.yaml
│ │ ├── GRCh37_NCBI2ensembl.txt
│ │ ├── MIG.yaml
│ │ ├── RADAR.yaml
│ │ ├── af_only_gnomad.yaml
│ │ ├── ancestral.yaml
│ │ ├── battenberg.yaml
│ │ ├── capture_regions.yaml
│ │ ├── clinvar.yaml
│ │ ├── cosmic.yaml
│ │ ├── dbnsfp.yaml
│ │ ├── dbscsnv.yaml
│ │ ├── dbsnp.yaml
│ │ ├── dream-syn3.yaml
│ │ ├── dream-syn4.yaml
│ │ ├── ericscript.yaml
│ │ ├── esp.yaml
│ │ ├── exac.yaml
│ │ ├── fusion-blacklist.yaml
│ │ ├── genesplicer.yaml
│ │ ├── giab-NA12878-NA24385-somatic.yaml
│ │ ├── giab-NA12878.yaml
│ │ ├── giab-NA24143.yaml
│ │ ├── giab-NA24149.yaml
│ │ ├── giab-NA24385.yaml
│ │ ├── giab-NA24631.yaml
│ │ ├── giab-NA24694.yaml
│ │ ├── giab-NA24695.yaml
│ │ ├── gnomad.yaml
│ │ ├── gnomad_exome.yaml
│ │ ├── gnomad_sv.yaml
│ │ ├── hapmap.yaml
│ │ ├── mills_indels.yaml
│ │ ├── prioritize.yaml
│ │ ├── qsignature.yaml
│ │ ├── seq.yaml
│ │ ├── topmed.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ ├── varpon.yaml
│ │ ├── vcfanno.yaml
│ │ └── viral.yaml
│ ├── GRCz11/
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ ├── README.md
│ ├── Sscrofa11.1/
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ ├── TAIR10/
│ │ └── mirbase.yaml
│ ├── canFam3/
│ │ ├── dbsnp.yaml
│ │ ├── mirbase.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ ├── hg19/
│ │ ├── 1000g.yaml
│ │ ├── 1000g_omni_snps.yaml
│ │ ├── 1000g_snps.yaml
│ │ ├── ACMG56_genes.yaml
│ │ ├── GA4GH_problem_regions.yaml
│ │ ├── MIG.yaml
│ │ ├── RADAR.yaml
│ │ ├── af_only_gnomad.yaml
│ │ ├── battenberg.yaml
│ │ ├── capture_regions.yaml
│ │ ├── clinvar.yaml
│ │ ├── cosmic.yaml
│ │ ├── dbsnp.yaml
│ │ ├── effects_transcripts.yaml
│ │ ├── esp.yaml
│ │ ├── exac.yaml
│ │ ├── fusion-blacklist.yaml
│ │ ├── giab-NA12878.yaml
│ │ ├── giab-NA24143.yaml
│ │ ├── giab-NA24149.yaml
│ │ ├── giab-NA24385.yaml
│ │ ├── giab-NA24631.yaml
│ │ ├── gnomad.yaml
│ │ ├── gnomad_exome.yaml
│ │ ├── gnomad_genome.grch37_to_hg19.sh
│ │ ├── gtf.yaml
│ │ ├── hapmap.yaml
│ │ ├── mills_indels.yaml
│ │ ├── mirbase.yaml
│ │ ├── platinum-genome-NA12878.yaml
│ │ ├── prioritize.yaml
│ │ ├── purecn_mappability.yaml
│ │ ├── rmsk.yaml
│ │ ├── seq.yaml
│ │ ├── simple_repeat.yaml
│ │ ├── topmed.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ ├── varpon.yaml
│ │ └── viral.yaml
│ ├── hg38/
│ │ ├── 1000g_indels.yaml
│ │ ├── 1000g_omni_snps.yaml
│ │ ├── 1000g_snps.yaml
│ │ ├── ACMG56_genes.yaml
│ │ ├── RADAR.yaml
│ │ ├── README.md
│ │ ├── af_only_gnomad.yaml
│ │ ├── bwa.yaml
│ │ ├── canonical_cancer_99.txt
│ │ ├── capture_regions.yaml
│ │ ├── ccds.yaml
│ │ ├── clinvar.yaml
│ │ ├── coverage.yaml
│ │ ├── dbnsfp.yaml
│ │ ├── dbscsnv.yaml
│ │ ├── dbsnp.yaml
│ │ ├── dream-syn3-crossmap.yaml
│ │ ├── dream-syn4-crossmap.yaml
│ │ ├── effects_transcripts.yaml
│ │ ├── ericscript.yaml
│ │ ├── esp.yaml
│ │ ├── exac.yaml
│ │ ├── fusion-blacklist.yaml
│ │ ├── genesplicer.yaml
│ │ ├── genotype2phenotype.yaml
│ │ ├── giab-NA12878-NA24385-somatic.yaml
│ │ ├── giab-NA12878-crossmap.yaml
│ │ ├── giab-NA12878-remap.yaml
│ │ ├── giab-NA12878.yaml
│ │ ├── giab-NA24143.yaml
│ │ ├── giab-NA24149.yaml
│ │ ├── giab-NA24385.yaml
│ │ ├── giab-NA24631.yaml
│ │ ├── giab-NA24694.yaml
│ │ ├── giab-NA24695.yaml
│ │ ├── gnomad.yaml
│ │ ├── gnomad_exome.yaml
│ │ ├── gnomad_fields_to_keep.txt
│ │ ├── gtf.yaml
│ │ ├── hapmap_snps.yaml
│ │ ├── hisat2.yaml
│ │ ├── mills_indels.yaml
│ │ ├── mirbase.yaml
│ │ ├── platinum-genome-NA12878.yaml
│ │ ├── prioritize.yaml
│ │ ├── purecn_mappability.yaml
│ │ ├── qsignature.yaml
│ │ ├── rmsk.yaml
│ │ ├── salmon-decoys.yaml
│ │ ├── seq.yaml
│ │ ├── simple_repeat.yaml
│ │ ├── topmed.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ ├── varpon.yaml
│ │ ├── vcfanno.yaml
│ │ └── viral.yaml
│ ├── hg38-noalt/
│ │ ├── RADAR.yaml
│ │ ├── README.md
│ │ ├── bowtie2.yaml
│ │ ├── bwa.yaml
│ │ ├── gtf.yaml
│ │ ├── mirbase.yaml
│ │ ├── seq.yaml
│ │ └── transcripts.yaml
│ ├── mm10/
│ │ ├── dbsnp.yaml
│ │ ├── mirbase.yaml
│ │ ├── prioritize.yaml
│ │ ├── problem_regions.yaml
│ │ ├── rmsk.yaml
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ ├── twobit.yaml
│ │ └── vcfanno.yaml
│ ├── rn6/
│ │ ├── mirbase.yaml
│ │ ├── seq.yaml
│ │ ├── transcripts.yaml
│ │ └── twobit.yaml
│ └── sacCer3/
│ ├── seq.yaml
│ └── transcripts.yaml
├── installed_files/
│ ├── bash_history
│ ├── bash_login
│ ├── ec2autorun.py
│ ├── galaxy_default.template
│ ├── galaxy_init
│ ├── galaxyp_nginx.conf.template
│ ├── image_user_data
│ ├── ipython_config.py
│ ├── jwmrc.xml
│ ├── nginx.conf.template
│ ├── nginx_init
│ ├── novnc_default.template
│ ├── novnc_init
│ ├── pg_ctl
│ ├── proftpd.conf.template
│ ├── protvis_default.template
│ ├── protvis_init
│ ├── psql
│ ├── setupnx.sh
│ ├── tool_data_table_conf.xml
│ ├── vncserver_default.template
│ ├── vncserver_init
│ ├── xstartup
│ ├── xvfb_default
│ └── xvfb_init
├── manifest/
│ ├── custom-packages.yaml
│ ├── debian-packages.yaml
│ ├── python-packages.yaml
│ └── r-packages.yaml
├── setup.py
├── test/
│ ├── README
│ ├── test_biolinux
│ ├── test_vagrant
│ └── testlib/
│ ├── test_biolinux.rb
│ └── test_support.rb
└── utils/
├── bootstrap.sh
├── cbl_exome_setup.py
├── cbl_installed_software.py
├── convert_to_xz.py
├── cwl2yaml_packages.py
├── get_biolinux_packages.py
├── get_yum_packages.py
├── images_and_snapshots.py
├── prep_esp_hg38.py
├── prepare_cosmic.py
├── prepare_dbsnp.py
├── prepare_tx_gff.py
├── prioritize/
│ ├── AZ300.txt
│ ├── AZ300_with_known.txt
│ ├── az-cancer-panel.txt
│ ├── az300_to_bed.py
│ ├── prep_ccds_genes.py
│ └── prep_prioritize_downloads.sh
├── query_conda_deps.py
├── s3_multipart_upload.py
└── sv/
├── NA24385_crowd_dels.py
└── NA24385_giab_dels.py
SYMBOL INDEX (940 symbols across 83 files)
FILE: cloudbio/biodata/galaxy.py
class LocCols (line 37) | class LocCols(object):
method __init__ (line 40) | def __init__(self, config, dbkey, file_path):
function _get_tool_conf (line 51) | def _get_tool_conf(env, tool_name):
function _build_galaxy_loc_line (line 66) | def _build_galaxy_loc_line(env, dbkey, file_path, config, prefix, tool_n...
function update_loc_file (line 85) | def update_loc_file(env, ref_file, line_parts):
function prep_locs (line 108) | def prep_locs(env, gid, indexes, config):
function index_picard (line 129) | def index_picard(ref_file):
function _finalize_index_seq (line 138) | def _finalize_index_seq(fname):
function _finalize_index (line 149) | def _finalize_index(idx, fname):
function rsync_genomes (line 158) | def rsync_genomes(genome_dir, genomes, genome_indexes):
function _get_galaxy_genomes (line 169) | def _get_galaxy_genomes(gid, genome_dir, genomes, genome_indexes):
function _rsync_genome_index (line 187) | def _rsync_genome_index(gid, idx, org_dir):
FILE: cloudbio/biodata/genomes.py
class _DownloadHelper (line 37) | class _DownloadHelper:
method __init__ (line 38) | def __init__(self):
method ucsc_name (line 41) | def ucsc_name(self):
method _exists (line 44) | def _exists(self, fname, seq_dir):
class UCSCGenome (line 49) | class UCSCGenome(_DownloadHelper):
method __init__ (line 50) | def __init__(self, genome_name, dl_name=None):
method ucsc_name (line 58) | def ucsc_name(self):
method _karyotype_sort (line 61) | def _karyotype_sort(self, xs):
method _split_multifasta (line 88) | def _split_multifasta(self, fasta_file):
method download (line 106) | def download(self, seq_dir):
method _download_zip (line 142) | def _download_zip(self, seq_dir):
class NCBIRest (line 153) | class NCBIRest(_DownloadHelper):
method __init__ (line 156) | def __init__(self, name, refs, dl_name=None):
method download (line 164) | def download(self, seq_dir):
class VectorBase (line 178) | class VectorBase(_DownloadHelper):
method __init__ (line 181) | def __init__(self, name, genus, species, strain, release, assembly_typ...
method download (line 193) | def download(self, seq_dir):
class EnsemblGenome (line 202) | class EnsemblGenome(_DownloadHelper):
method __init__ (line 212) | def __init__(self, ensembl_section, release, organism, name, subsectio...
method download (line 231) | def download(self, seq_dir):
class BroadGenome (line 239) | class BroadGenome(_DownloadHelper):
method __init__ (line 244) | def __init__(self, name, target_fasta, dl_name=None):
method download (line 253) | def download(self, seq_dir):
class GGDGenome (line 261) | class GGDGenome:
method __init__ (line 264) | def __init__(self, name):
function _check_version (line 319) | def _check_version(env):
function install_data (line 324) | def install_data(config_source, approaches=None):
function install_data_local (line 332) | def install_data_local(config_source, system_installdir, data_filedir,
function install_data_s3 (line 357) | def install_data_s3(config_source):
function install_data_rsync (line 370) | def install_data_rsync(config_source):
function upload_s3 (line 383) | def upload_s3(config_source):
function _install_additional_data (line 398) | def _install_additional_data(env, genomes, genome_indexes, config):
function _get_genomes (line 407) | def _get_genomes(config_source):
function _if_installed (line 437) | def _if_installed(pname):
function _make_genome_dir (line 451) | def _make_genome_dir(data_filedir):
function _make_genome_directories (line 456) | def _make_genome_directories(genomes, data_filedir):
function _prep_genomes (line 463) | def _prep_genomes(env, genomes, genome_indexes, retrieve_fns, data_filed...
function _get_ref_seq (line 504) | def _get_ref_seq(manager):
function _prep_raw_index (line 514) | def _prep_raw_index(env, manager, gid, idx):
function _data_ngs_genomes (line 521) | def _data_ngs_genomes(env, genomes, genome_indexes):
function _index_to_galaxy (line 539) | def _index_to_galaxy(env, work_dir, ref_file, gid, genome_indexes, config):
class CustomMaskManager (line 550) | class CustomMaskManager:
method __init__ (line 553) | def __init__(self, custom, config):
method download (line 558) | def download(self, seq_dir):
function _prep_custom_genome (line 576) | def _prep_custom_genome(custom, genomes, genome_indexes, env):
function _clean_genome_directory (line 592) | def _clean_genome_directory():
function _move_seq_files (line 599) | def _move_seq_files(ref_file, base_zips, seq_dir):
function _index_w_command (line 612) | def _index_w_command(env, dir_name, command, ref_file, pre=None, post=No...
function _index_twobit (line 631) | def _index_twobit(env, ref_file):
function _index_bowtie (line 638) | def _index_bowtie(env, ref_file):
function _index_bowtie2 (line 643) | def _index_bowtie2(env, ref_file):
function _index_bwa (line 654) | def _index_bwa(env, ref_file):
function _index_bbmap (line 669) | def _index_bbmap(env, ref_file):
function _index_bismark (line 681) | def _index_bismark(env, ref_file):
function _index_maq (line 694) | def _index_maq(env, ref_file):
function _index_minimap2 (line 705) | def _index_minimap2(env, ref_file):
function _index_novoalign (line 716) | def _index_novoalign(env, ref_file):
function _index_novoalign_cs (line 722) | def _index_novoalign_cs(env, ref_file):
function _index_sam (line 727) | def _index_sam(env, ref_file):
function _index_star (line 736) | def _index_star(env, ref_file):
function _index_hisat2 (line 782) | def _index_hisat2(env, ref_file):
function _index_snap (line 818) | def _index_snap(env, ref_file):
function _get_path_export (line 829) | def _get_path_export(env):
function _index_rtg (line 839) | def _index_rtg(env, ref_file):
function _index_mosaik (line 854) | def _index_mosaik(env, ref_file):
function _install_with_ggd (line 870) | def _install_with_ggd(env, manager, gid, recipe):
function _download_s3_index (line 881) | def _download_s3_index(env, manager, gid, idx):
function _download_genomes (line 891) | def _download_genomes(env, genomes, genome_indexes):
function _upload_genomes (line 910) | def _upload_genomes(env, genomes, genome_indexes):
function _upload_to_s3 (line 925) | def _upload_to_s3(tarball, bucket):
function _tar_directory (line 937) | def _tar_directory(dir, tar_name):
function _clean_directory (line 948) | def _clean_directory(dir, gid):
function _data_liftover (line 967) | def _data_liftover(env, lift_over_genomes):
function _data_uniref (line 996) | def _data_uniref(env):
function _index_blast_db (line 1023) | def _index_blast_db(work_dir, base_file, db_type):
function get_index_fn (line 1033) | def get_index_fn(index):
function prepare_simple_reference (line 1042) | def prepare_simple_reference(ref_file, out_file):
function is_alt (line 1056) | def is_alt(chrom):
function is_decoy (line 1059) | def is_decoy(chrom):
function is_HLA (line 1062) | def is_HLA(chrom):
FILE: cloudbio/biodata/ggd.py
function install_recipe (line 17) | def install_recipe(base_dir, system_install, recipe_file, genome_build):
function _has_required_programs (line 34) | def _has_required_programs(programs):
function _run_recipe (line 54) | def _run_recipe(work_dir, recipe_cmds, recipe_type, system_install):
function _move_files (line 64) | def _move_files(tmp_dir, final_dir, targets):
function _read_recipe (line 82) | def _read_recipe(in_file):
function version_uptodate (line 90) | def version_uptodate(base_dir, recipe):
function add_version (line 98) | def add_version(base_dir, recipe):
function _get_versions (line 105) | def _get_versions(base_dir):
function _get_version_file (line 115) | def _get_version_file(base_dir):
function tx_tmpdir (line 121) | def tx_tmpdir(base_dir):
function chdir (line 131) | def chdir(new_dir):
FILE: cloudbio/biodata/rnaseq.py
function finalize (line 8) | def finalize(genomes, data_filedir):
function cleanup (line 23) | def cleanup(genomes, data_filedir):
FILE: cloudbio/cloudbiolinux.py
function _freenx_scripts (line 9) | def _freenx_scripts(env):
function _cleanup_space (line 27) | def _cleanup_space(env):
function _configure_gnome (line 37) | def _configure_gnome(env):
FILE: cloudbio/cloudman.py
function _configure_cloudman (line 36) | def _configure_cloudman(env, use_repo_autorun=False):
function _configure_desktop (line 55) | def _configure_desktop(env):
function _configure_novnc (line 74) | def _configure_novnc(env):
function _configure_vncpasswd (line 111) | def _configure_vncpasswd(env):
function _setup_env (line 121) | def _setup_env(env):
function _configure_logrotate (line 157) | def _configure_logrotate(env):
function _configure_ec2_autorun (line 168) | def _configure_ec2_autorun(env, use_repo_autorun=False):
function _configure_sge (line 210) | def _configure_sge(env):
function _configure_hadoop (line 232) | def _configure_hadoop(env):
function _configure_nfs (line 250) | def _configure_nfs(env):
function install_s3fs (line 298) | def install_s3fs(env):
function _cleanup_ec2 (line 308) | def _cleanup_ec2(env):
FILE: cloudbio/config_management/chef.py
class ChefDict (line 36) | class ChefDict(_AttributeDict):
method add_recipe (line 37) | def add_recipe(self, recipe):
method add_role (line 40) | def add_role(self, role):
method _get_json (line 43) | def _get_json(self):
function omnibus (line 53) | def omnibus(env):
function _chef_provision (line 67) | def _chef_provision(env, _omnibus=True):
function _configure_chef (line 80) | def _configure_chef(env, chef):
function _build_chef_properties (line 91) | def _build_chef_properties(env, config_file):
function _parse_json (line 101) | def _parse_json(filename):
FILE: cloudbio/config_management/puppet.py
function _puppet_provision (line 17) | def _puppet_provision(env, classes):
function _build_node_def_body (line 28) | def _build_node_def_body(env, classes):
function _build_class_include (line 37) | def _build_class_include(env, class_name):
function _property_prefix (line 58) | def _property_prefix(class_name):
FILE: cloudbio/config_management/utils.py
function config_dir (line 7) | def config_dir(relative_path):
function build_properties (line 12) | def build_properties(env, prefix, overrides={}):
function upload_config (line 37) | def upload_config(config, config_folder_names=[], config_files={}):
FILE: cloudbio/custom/bio_general.py
function install_anaconda (line 12) | def install_anaconda(env):
function install_emboss (line 42) | def install_emboss(env):
function install_pgdspider (line 53) | def install_pgdspider(env):
function install_bio4j (line 72) | def install_bio4j(env):
FILE: cloudbio/custom/bio_nextgen.py
function install_ucsc_tools (line 23) | def install_ucsc_tools(env):
function install_kent_tools (line 40) | def install_kent_tools(env):
function _download_executables (line 52) | def _download_executables(env, base_url, tools):
function install_featurecounts (line 65) | def install_featurecounts(env):
function install_bowtie (line 83) | def install_bowtie(env):
function install_bowtie2 (line 94) | def install_bowtie2(env):
function install_bfast (line 105) | def install_bfast(env):
function install_perm (line 118) | def install_perm(env):
function install_snap (line 136) | def install_snap(env):
function install_stampy (line 145) | def install_stampy(env):
function install_gmap (line 165) | def install_gmap(env):
function _wget_with_cookies (line 173) | def _wget_with_cookies(ref_url, dl_url):
function install_novoalign (line 181) | def install_novoalign(env):
function install_novosort (line 211) | def install_novosort(env):
function install_lastz (line 229) | def install_lastz(env):
function install_mosaik (line 243) | def install_mosaik(env):
function install_samtools (line 254) | def install_samtools(env):
function install_varianttools (line 283) | def install_varianttools(env):
function install_dwgsim (line 293) | def install_dwgsim(env):
function install_fastq_screen (line 311) | def install_fastq_screen(env):
function install_bedtools (line 330) | def install_bedtools(env):
function install_shrec (line 357) | def install_shrec(env):
function install_echo (line 377) | def install_echo(env):
function install_picard (line 388) | def install_picard(env):
function install_alientrimmer (line 397) | def install_alientrimmer(env):
function install_rnaseqc (line 407) | def install_rnaseqc(env):
function install_varscan (line 421) | def install_varscan(env):
function install_mutect (line 434) | def install_mutect(env):
function install_bamutil (line 447) | def install_bamutil(env):
function install_tabix (line 457) | def install_tabix(env):
function install_disambiguate (line 466) | def install_disambiguate(env):
function install_grabix (line 473) | def install_grabix(env):
function install_pbgzip (line 491) | def install_pbgzip(env):
function install_bamtools (line 505) | def install_bamtools(env):
function install_ogap (line 522) | def install_ogap(env):
function install_tophat (line 531) | def install_tophat(env):
function install_abyss (line 553) | def install_abyss(env):
function install_transabyss (line 570) | def install_transabyss(env):
function install_velvet (line 580) | def install_velvet(env):
function install_ray (line 597) | def install_ray(env):
function install_trinity (line 609) | def install_trinity(env):
function install_cortex_var (line 620) | def install_cortex_var(env):
function install_bcbio_variation (line 653) | def install_bcbio_variation(env):
function install_macs (line 670) | def install_macs(env):
function install_hydra (line 682) | def install_hydra(env):
function install_freec (line 693) | def install_freec(env):
function install_crisp (line 708) | def install_crisp(env):
function install_tassel (line 722) | def install_tassel(env):
function install_stacks (line 747) | def install_stacks(env):
function install_weblogo (line 757) | def install_weblogo(env):
FILE: cloudbio/custom/bio_proteomics.py
function install_transproteomic_pipeline (line 28) | def install_transproteomic_pipeline(env):
function install_omssa (line 64) | def install_omssa(env):
function install_openms (line 75) | def install_openms(env):
function install_tint_proteomics_scripts (line 96) | def install_tint_proteomics_scripts(env):
function install_ms2preproc (line 112) | def install_ms2preproc(env):
function install_mzmine (line 129) | def install_mzmine(env):
function install_searchgui (line 148) | def install_searchgui(env):
function install_psm_eval (line 173) | def install_psm_eval(env):
function install_peptide_shaker (line 189) | def install_peptide_shaker(env):
function _get_gist_script (line 208) | def _get_gist_script(env, url):
function install_mayu (line 215) | def install_mayu(env):
function install_pride_inspector (line 231) | def install_pride_inspector(env):
function install_pride_converter2 (line 248) | def install_pride_converter2(env):
function _unzip_install (line 265) | def _unzip_install(pname, version, url, env, install_fn, dir_name="."):
function install_superhirn (line 276) | def install_superhirn(env):
function install_percolator (line 291) | def install_percolator(env):
function install_pepnovo (line 307) | def install_pepnovo(env):
function install_crux (line 324) | def install_crux(env):
function install_fido (line 337) | def install_fido(env):
function install_ipig (line 357) | def install_ipig(env):
function install_peptide_to_gff (line 374) | def install_peptide_to_gff(env):
function install_galaxy_protk (line 389) | def install_galaxy_protk(env):
function install_myrimatch (line 458) | def install_myrimatch(env):
function install_pepitome (line 464) | def install_pepitome(env):
function install_directag (line 470) | def install_directag(env):
function install_tagrecon (line 476) | def install_tagrecon(env):
function install_idpqonvert (line 483) | def install_idpqonvert(env):
function _install_tabb_tool (line 494) | def _install_tabb_tool(env, default_version, download_name, exec_names):
FILE: cloudbio/custom/bio_proteomics_wine.py
function install_proteomics_wine_env (line 8) | def install_proteomics_wine_env(env):
function install_multiplierz (line 15) | def install_multiplierz(env):
function install_proteowizard (line 32) | def install_proteowizard(env):
function install_morpheus (line 47) | def install_morpheus(env):
function setup_wine_wrapper (line 60) | def setup_wine_wrapper(env, to):
function _conf_wine (line 72) | def _conf_wine(env):
function _get_wine_prefix (line 76) | def _get_wine_prefix(env):
function _get_wine_user (line 81) | def _get_wine_user(env):
FILE: cloudbio/custom/cloudman.py
function install_cloudman (line 20) | def install_cloudman(env):
function install_ec2_autorun (line 33) | def install_ec2_autorun(env):
function install_novnc (line 37) | def install_novnc(env):
function install_nginx (line 42) | def install_nginx(env):
function install_proftpd (line 46) | def install_proftpd(env):
function install_sge (line 105) | def install_sge(env):
FILE: cloudbio/custom/distributed.py
function install_pydoop (line 11) | def install_pydoop(env):
function install_seal (line 21) | def install_seal(env):
FILE: cloudbio/custom/galaxy.py
function install_galaxy_webapp (line 10) | def install_galaxy_webapp(env):
function _prep_galaxy (line 16) | def _prep_galaxy(env):
FILE: cloudbio/custom/galaxy_tools.py
function install_cbl_galaxy_tools (line 8) | def install_cbl_galaxy_tools(env):
FILE: cloudbio/custom/galaxyp.py
function install_protvis (line 11) | def install_protvis(env):
function _setup_protvis_env (line 45) | def _setup_protvis_env(env):
function _setup_protvis_service (line 57) | def _setup_protvis_service(env):
FILE: cloudbio/custom/java.py
function install_leiningen (line 12) | def install_leiningen(env):
FILE: cloudbio/custom/millstone.py
function install_unafold (line 9) | def install_unafold(env):
FILE: cloudbio/custom/phylogeny.py
function install_tracer (line 10) | def install_tracer(env):
function install_beast (line 32) | def install_beast(env):
FILE: cloudbio/custom/python.py
function install_bx_python (line 12) | def install_bx_python(env):
function install_rpy (line 24) | def install_rpy(env):
function install_netsa_python (line 42) | def install_netsa_python(env):
FILE: cloudbio/custom/shared.py
function chdir (line 31) | def chdir(new_dir):
function safe_makedir (line 55) | def safe_makedir(dname):
function which (line 74) | def which(program, env=None):
function _if_not_installed (line 94) | def _if_not_installed(pname):
function _all_cbl_paths (line 112) | def _all_cbl_paths(env, ext):
function _executable_not_on_path (line 117) | def _executable_not_on_path(pname):
function _galaxy_tool_install (line 126) | def _galaxy_tool_install(args):
function _galaxy_tool_present (line 133) | def _galaxy_tool_present(args):
function _if_not_python_lib (line 137) | def _if_not_python_lib(library):
function make_tmp_dir_local (line 155) | def make_tmp_dir_local(ext, work_dir):
function _make_tmp_dir (line 163) | def _make_tmp_dir(ext=None, work_dir=None):
function __work_dir (line 193) | def __work_dir():
function _get_expected_file (line 208) | def _get_expected_file(url, dir_name=None, safe_tar=False, tar_file_name...
function _safe_dir_name (line 227) | def _safe_dir_name(dir_name, need_dir=True):
function _remote_fetch (line 250) | def _remote_fetch(env, url, out_file=None, allow_fail=False, fix_fn=None...
function _fetch_and_unpack (line 284) | def _fetch_and_unpack(url, need_dir=True, dir_name=None, revision=None,
function _configure_make (line 305) | def _configure_make(env):
function _ac_configure_make (line 313) | def _ac_configure_make(env):
function _make_copy (line 317) | def _make_copy(find_cmd=None, premake_cmd=None, do_make=True):
function _get_install (line 330) | def _get_install(url, env, make_command, post_unpack_fn=None, revision=N...
function _apply_patch (line 343) | def _apply_patch(env, url):
function _get_install_local (line 348) | def _get_install_local(url, env, make_command, dir_name=None,
function _symlinked_install_dir (line 378) | def _symlinked_install_dir(pname, version, env, extra_dir=None):
function _symlinked_dir_exists (line 385) | def _symlinked_dir_exists(pname, version, env, extra_dir=None):
function _symlinked_shared_dir (line 395) | def _symlinked_shared_dir(pname, version, env, extra_dir=None):
function _symlinked_java_version_dir (line 419) | def _symlinked_java_version_dir(pname, version, env):
function _java_install (line 423) | def _java_install(pname, version, url, env, install_fn=None,
function _python_cmd (line 446) | def _python_cmd(env):
function _pip_cmd (line 462) | def _pip_cmd(env):
function _conda_cmd (line 483) | def _conda_cmd(env):
function _is_anaconda (line 497) | def _is_anaconda(env):
function _python_make (line 511) | def _python_make(env):
function _get_installed_file (line 521) | def _get_installed_file(env, local_file):
function _get_installed_file_contents (line 536) | def _get_installed_file_contents(env, local_file):
function _write_to_file (line 540) | def _write_to_file(contents, path, mode):
function _get_bin_dir (line 556) | def _get_bin_dir(env):
function _get_include_dir (line 564) | def _get_include_dir(env):
function _get_lib_dir (line 568) | def _get_lib_dir(env):
function _get_install_subdir (line 572) | def _get_install_subdir(env, subdir):
function _set_default_config (line 579) | def _set_default_config(env, install_dir, sym_dir_name="default"):
function _setup_simple_service (line 600) | def _setup_simple_service(service_name):
function _render_config_file_template (line 614) | def _render_config_file_template(env, name, defaults={}, overrides={}, d...
function _extend_env (line 657) | def _extend_env(env, defaults={}, overrides={}):
function _setup_conf_file (line 673) | def _setup_conf_file(env, dest, name, defaults={}, overrides={}, default...
function _add_to_profiles (line 678) | def _add_to_profiles(line, profiles=[], use_sudo=True):
function install_venvburrito (line 691) | def install_venvburrito():
function _create_python_virtualenv (line 703) | def _create_python_virtualenv(env, venv_name, reqs_file=None, reqs_url=N...
function _create_local_python_virtualenv (line 735) | def _create_local_python_virtualenv(env, venv_name, reqs_file, reqs_url):
function _create_global_python_virtualenv (line 749) | def _create_global_python_virtualenv(env, venv_name, reqs_file, reqs_url):
function _get_bitbucket_download_url (line 771) | def _get_bitbucket_download_url(revision, default_repo):
function _read_boolean (line 779) | def _read_boolean(env, name, default):
FILE: cloudbio/custom/system.py
function install_homebrew (line 12) | def install_homebrew(env):
function _linuxbrew_origin_problem (line 64) | def _linuxbrew_origin_problem(brewcmd):
function install_s3fs (line 75) | def install_s3fs(env):
FILE: cloudbio/custom/vcr.py
function install_galaxy_vcr (line 20) | def install_galaxy_vcr(env):
function install_viralassembly (line 62) | def install_viralassembly(env):
function install_viralassembly_cleanall (line 71) | def install_viralassembly_cleanall(env):
function _initialize_area_viral (line 81) | def _initialize_area_viral():
function _add_tools_viral (line 113) | def _add_tools_viral():
function _add_refs (line 133) | def _add_refs():
function _initialize_bio_linux (line 138) | def _initialize_bio_linux():
function install_viralvigor (line 157) | def install_viralvigor(env):
function install_viralvigor_test (line 166) | def install_viralvigor_test(env):
function install_viralvigor_validate (line 180) | def install_viralvigor_validate(env):
function install_viralvigor_cleanall (line 192) | def install_viralvigor_cleanall(env):
function _initialize_area_vigor (line 203) | def _initialize_area_vigor():
function _initialize_host (line 246) | def _initialize_host():
function _add_vigor (line 251) | def _add_vigor():
function _add_tools_vigor (line 263) | def _add_tools_vigor():
function _fix_etc_hosts (line 271) | def _fix_etc_hosts():
function _create_vigor_tempspace_dir (line 277) | def _create_vigor_tempspace_dir():
function _create_vigor_scratch_dir (line 283) | def _create_vigor_scratch_dir():
function _create_tools_dir (line 289) | def _create_tools_dir():
function _add_blast (line 294) | def _add_blast():
function _add_clustalw (line 304) | def _add_clustalw():
function install_vicvb (line 314) | def install_vicvb(env):
function install_vicvb_cleanall (line 337) | def install_vicvb_cleanall(env):
function _initialize_env (line 351) | def _initialize_env(pipeline):
function _add_package (line 369) | def _add_package(download_url, filename, install_dir, type):
function _remove_dir (line 388) | def _remove_dir(dirspec):
function _unlock_dir (line 395) | def _unlock_dir(dirspec):
function _apt_get_install (line 401) | def _apt_get_install(tool):
function _path_exists (line 404) | def _path_exists(path):
function _path_is_dir (line 411) | def _path_is_dir(path):
function _set_pre_VCR (line 418) | def _set_pre_VCR(filename,user,group):
function _get_file_string (line 422) | def _get_file_string(filename,directory):
FILE: cloudbio/custom/versioncheck.py
function _parse_from_stdoutflag (line 13) | def _parse_from_stdoutflag(out, flag, stdout_index=-1):
function _clean_version (line 27) | def _clean_version(x):
function up_to_date (line 38) | def up_to_date(env, cmd, version, args=None, stdout_flag=None,
function is_version (line 47) | def is_version(env, cmd, version, args=None, stdout_flag=None,
function get_installed_version (line 56) | def get_installed_version(env, cmd, version, args=None, stdout_flag=None,
FILE: cloudbio/deploy/__init__.py
function deploy (line 35) | def deploy(options):
class LocalVmLauncher (line 66) | class LocalVmLauncher:
method __init__ (line 69) | def __init__(self, options):
method get_ip (line 72) | def get_ip(self):
method get_key_file (line 77) | def get_key_file(self):
method boot_and_connect (line 80) | def boot_and_connect(self):
method destroy (line 83) | def destroy(self):
method get_user (line 86) | def get_user(self):
method list (line 89) | def list(self):
function _setup_vm (line 93) | def _setup_vm(options, vm_launcher, actions):
function _expand_actions (line 137) | def _expand_actions(actions):
function _possible_actions (line 150) | def _possible_actions():
function _do_perform_action (line 175) | def _do_perform_action(action, action_list):
function _setup_fabric (line 182) | def _setup_fabric(vm_launcher, ip, options):
function _setup_cloudbiolinux (line 189) | def _setup_cloudbiolinux(options):
function _setup_cloudbiolinux_fabric_properties (line 200) | def _setup_cloudbiolinux_fabric_properties(env, options):
function _setup_image_user_data (line 221) | def _setup_image_user_data(env, options):
function purge_genomes (line 226) | def purge_genomes():
function configure_ssh_key (line 230) | def configure_ssh_key(options):
function setup_biodata (line 242) | def setup_biodata(options):
function configure_instance (line 256) | def configure_instance(options, actions):
function install_custom (line 274) | def install_custom(options):
function install_biolinux (line 279) | def install_biolinux(options):
function _interactive_ssh (line 285) | def _interactive_ssh(vm_launcher):
function transfer_files (line 295) | def transfer_files(options):
function _build_transfer_options (line 300) | def _build_transfer_options(options, destination, user):
function _do_transfer (line 314) | def _do_transfer(transfer_options, files, compressed_files=[]):
function purge_tools (line 320) | def purge_tools():
function install_tools (line 324) | def install_tools(tools_conf):
function get_boolean_option (line 330) | def get_boolean_option(options, name, default=False):
function get_main_options_string (line 337) | def get_main_options_string(options, key, default=''):
function __invoke_plugin_actions (line 344) | def __invoke_plugin_actions(env, actions, action_type, provided_args):
function __invoke_plugin_action (line 352) | def __invoke_plugin_action(env, action_function, provided_args):
function __get_plugin_actions (line 358) | def __get_plugin_actions(env, action_type):
function __get_plugin_modules (line 367) | def __get_plugin_modules(env):
function __get_plugin_module_names (line 388) | def __get_plugin_module_names():
FILE: cloudbio/deploy/config.py
function parse_settings (line 6) | def parse_settings(name="deploy/settings.yaml"):
function _path_from_root (line 10) | def _path_from_root(name):
function _read_yaml (line 16) | def _read_yaml(yaml_file):
FILE: cloudbio/deploy/main.py
function main (line 34) | def main():
function _copy_arg_to_options (line 49) | def _copy_arg_to_options(options, args, property):
function parse_args (line 55) | def parse_args():
function parse_settings (line 87) | def parse_settings(name):
function _read_yaml (line 97) | def _read_yaml(yaml_file):
FILE: cloudbio/deploy/plugins/cloudman.py
function bundle_cloudman (line 22) | def bundle_cloudman(vm_launcher, options):
function cloudman_launch (line 53) | def cloudman_launch(vm_launcher, options):
function sync_cloudman_bucket (line 84) | def sync_cloudman_bucket(vm_launcher, options):
function _save_file_to_bucket (line 97) | def _save_file_to_bucket(conn, bucket_name, remote_filename, local_file,...
function __get_bucket_default (line 127) | def __get_bucket_default(options):
function _prepare_user_data (line 134) | def _prepare_user_data(vm_launcher, cloudman_options):
function _set_property_if_needed (line 151) | def _set_property_if_needed(user_data, property, value):
function _get_bucket (line 156) | def _get_bucket(s3_conn, bucket_name):
FILE: cloudbio/deploy/plugins/galaxy.py
function install_tool (line 4) | def install_tool(options):
FILE: cloudbio/deploy/plugins/gvl.py
function setup_genomes (line 26) | def setup_genomes(options):
function setup_taxonomy_data (line 44) | def setup_taxonomy_data():
function stash_genomes (line 69) | def stash_genomes(where):
function upload_genomes (line 84) | def upload_genomes(options):
function purge_genomes (line 94) | def purge_genomes():
function _cd_indices_parent (line 98) | def _cd_indices_parent():
function _indices_parent (line 102) | def _indices_parent():
function _indices_dir_name (line 107) | def _indices_dir_name():
function galaxy_transfer (line 115) | def galaxy_transfer(vm_launcher, options):
function create_data_library_for_uploads (line 128) | def create_data_library_for_uploads(options):
function _seed_at_configure_time (line 140) | def _seed_at_configure_time(options):
function copy_runtime_properties (line 147) | def copy_runtime_properties(vm_launcher, options):
function _transfer_genomes (line 162) | def _transfer_genomes(options):
function wait_for_galaxy (line 170) | def wait_for_galaxy():
function purge_galaxy (line 178) | def purge_galaxy():
function setup_galaxy (line 190) | def setup_galaxy(options):
function _setup_galaxy (line 197) | def _setup_galaxy(options, seed=True):
function _migrate_galaxy_database (line 211) | def _migrate_galaxy_database():
function seed_database (line 216) | def seed_database(galaxy_data):
function seed_workflows (line 223) | def seed_workflows(options):
function _setup_database_seed_file (line 238) | def _setup_database_seed_file(galaxy_data):
function _import_histories (line 298) | def _import_histories(user_object, histories):
function _import_history (line 305) | def _import_history(user_object, history_name):
function _seed_append (line 311) | def _seed_append(text):
function _start_galaxy (line 315) | def _start_galaxy():
function refresh_galaxy (line 321) | def refresh_galaxy(target_galaxy_repo):
function _setup_galaxy_log_dir (line 326) | def _setup_galaxy_log_dir():
function _setup_shed_tools_dir (line 330) | def _setup_shed_tools_dir():
function _make_dir_for_galaxy (line 334) | def _make_dir_for_galaxy(path):
function _update_galaxy (line 339) | def _update_galaxy(target_galaxy_repo):
function refresh_galaxy_action (line 346) | def refresh_galaxy_action(vm_launcher, options):
function setup_image (line 350) | def setup_image(options):
function _configure_package_holds (line 357) | def _configure_package_holds(options):
function configure_smtp (line 365) | def configure_smtp(options):
function configure_sudoers (line 384) | def configure_sudoers(options):
function configure_MI (line 390) | def configure_MI(env):
function _required_programs (line 400) | def _required_programs(env):
function _ensure_export (line 421) | def _ensure_export(command):
function _start_nginx (line 426) | def _start_nginx(env):
function _deploy_setup_postgresql (line 433) | def _deploy_setup_postgresql(env):
FILE: cloudbio/deploy/util.py
function setup_install_dir (line 9) | def setup_install_dir():
function eval_template (line 19) | def eval_template(env, template_str):
function ensure_can_sudo_into (line 28) | def ensure_can_sudo_into(user):
function sudoers_append (line 32) | def sudoers_append(line):
function start_service (line 36) | def start_service(service_name):
function wget (line 42) | def wget(url, install_command=sudo, file_name=None):
FILE: cloudbio/deploy/vmlauncher/__init__.py
class VmLauncher (line 19) | class VmLauncher:
method __init__ (line 21) | def __init__(self, driver_options_key, options):
method __set_and_verify_key (line 26) | def __set_and_verify_key(self):
method _get_driver_options (line 35) | def _get_driver_options(self, driver_option_keys):
method _driver_options (line 42) | def _driver_options(self):
method get_key_file (line 45) | def get_key_file(self):
method boot_and_connect (line 48) | def boot_and_connect(self):
method _connect_driver (line 56) | def _connect_driver(self):
method _wait_for_node_info (line 61) | def _wait_for_node_info(self, f):
method _parse_node_info (line 72) | def _parse_node_info(self, value):
method _find_node (line 78) | def _find_node(self):
method destroy (line 85) | def destroy(self, node=None):
method __get_ssh_client (line 91) | def __get_ssh_client(self):
method get_user (line 100) | def get_user(self):
method get_ssh_port (line 103) | def get_ssh_port(self):
method connect (line 106) | def connect(self, conn, tries=5):
method list (line 116) | def list(self):
method _boot (line 120) | def _boot(self):
method _image_from_id (line 146) | def _image_from_id(self, image_id=None):
method _get_image_id (line 150) | def _get_image_id(self, image_id=None):
method _default_image_id (line 158) | def _default_image_id(self):
method _get_default_size_id (line 161) | def _get_default_size_id(self):
method _get_size_id_option (line 164) | def _get_size_id_option(self):
method _size_from_id (line 167) | def _size_from_id(self, size_id):
method _get_size_id (line 171) | def _get_size_id(self, size_id=None):
method _boot_new (line 180) | def _boot_new(self, conn):
method access_id (line 185) | def access_id(self):
method secret_key (line 188) | def secret_key(self):
method package_image_name (line 191) | def package_image_name(self):
method package_image_description (line 195) | def package_image_description(self, default=""):
class VagrantConnection (line 200) | class VagrantConnection:
method _ssh_client_connect (line 203) | def _ssh_client_connect(self, ssh_client):
method destroy_node (line 206) | def destroy_node(self, node=None):
method list_nodes (line 209) | def list_nodes(self):
class VagrantNode (line 213) | class VagrantNode:
method __init__ (line 215) | def __init__(self):
class VagrantVmLauncher (line 220) | class VagrantVmLauncher(VmLauncher):
method _get_connection (line 223) | def _get_connection():
method __init__ (line 226) | def __init__(self, driver_options_key, options):
method _boot (line 232) | def _boot(self):
method get_ip (line 236) | def get_ip(self):
method get_user (line 239) | def get_user(self):
method package (line 242) | def package(self, **kwds):
class OpenstackVmLauncher (line 246) | class OpenstackVmLauncher(VmLauncher):
method get_ip (line 249) | def get_ip(self):
method _get_size_id_option (line 252) | def _get_size_id_option(self):
method create_node (line 255) | def create_node(self, hostname, image_id=None, size_id=None, **kwds):
method _get_connection (line 269) | def _get_connection(self):
method package (line 288) | def package(self, **kwds):
method attach_public_ip (line 292) | def attach_public_ip(self, public_ip=None):
class EucalyptusVmLauncher (line 298) | class EucalyptusVmLauncher(VmLauncher):
method get_ip (line 300) | def get_ip(self):
method _get_connection (line 303) | def _get_connection(self):
method create_node (line 316) | def create_node(self, hostname, image_id=None, size_id=None, **kwds):
class Ec2VmLauncher (line 330) | class Ec2VmLauncher(VmLauncher):
method get_ip (line 332) | def get_ip(self):
method boto_connection (line 335) | def boto_connection(self):
method boto_s3_connection (line 345) | def boto_s3_connection(self):
method _default_image_id (line 351) | def _default_image_id(self):
method package (line 354) | def package(self, **kwds):
method _create_image (line 361) | def _create_image(self, **kwds):
method _default_package (line 379) | def _default_package(self, **kwds):
method _install_ec2_tools (line 386) | def _install_ec2_tools(self):
method _install_packaging_scripts (line 391) | def _install_packaging_scripts(self):
method _write_script (line 408) | def _write_script(self, path, contents):
method _copy_keys (line 413) | def _copy_keys(self):
method _availability_zone (line 419) | def _availability_zone(self):
method _get_default_size_id (line 426) | def _get_default_size_id(self):
method _get_location (line 429) | def _get_location(self):
method create_node (line 437) | def create_node(self, hostname, image_id=None, size_id=None, location=...
method attach_public_ip (line 459) | def attach_public_ip(self, public_ip=None):
method _get_connection (line 464) | def _get_connection(self):
function build_vm_launcher (line 472) | def build_vm_launcher(options):
FILE: cloudbio/deploy/vmlauncher/transfer.py
class FileSplitter (line 16) | class FileSplitter:
method __init__ (line 24) | def __init__(self, chunk_size, destination_directory, callback):
method split_file (line 29) | def split_file(self, path, compress, transfer_target):
class TransferTarget (line 59) | class TransferTarget:
method __init__ (line 61) | def __init__(self, file, precompressed, transfer_manager):
method should_compress (line 73) | def should_compress(self):
method split_up (line 76) | def split_up(self):
method clean (line 79) | def clean(self):
method compressed_basename (line 83) | def compressed_basename(self):
method decompressed_basename (line 90) | def decompressed_basename(self):
method compressed_file (line 98) | def compressed_file(self):
method build_simple_chunk (line 102) | def build_simple_chunk(self):
class TransferChunk (line 111) | class TransferChunk:
method __init__ (line 113) | def __init__(self, chunk_path, transfer_target):
method clean_up (line 117) | def clean_up(self):
class FileTransferManager (line 124) | class FileTransferManager:
method __init__ (line 126) | def __init__(self,
method handle_chunk (line 152) | def handle_chunk(self, chunk, transfer_target):
method transfer_files (line 155) | def transfer_files(self, files=[], compressed_files=[]):
method _setup_workers (line 167) | def _setup_workers(self):
method _setup_destination_directory (line 172) | def _setup_destination_directory(self):
method _setup_compress_threads (line 176) | def _setup_compress_threads(self):
method _setup_decompress_threads (line 180) | def _setup_decompress_threads(self):
method _setup_transfer_threads (line 184) | def _setup_transfer_threads(self):
method _launch_threads (line 188) | def _launch_threads(self, num_threads, func):
method _enqueue_files (line 194) | def _enqueue_files(self, files, compressed_files):
method _sort_transfer_targets (line 209) | def _sort_transfer_targets(self, transfer_targets):
method _wait_for_completion (line 216) | def _wait_for_completion(self):
method _compress_files (line 225) | def _compress_files(self):
method _decompress_files (line 243) | def _decompress_files(self):
method _put_files (line 274) | def _put_files(self):
method _chown (line 291) | def _chown(self, destination):
method _put_as_user (line 294) | def _put_as_user(self, source, destination):
method _enqueue_chunk (line 314) | def _enqueue_chunk(self, transfer_chunk):
FILE: cloudbio/deploy/volume.py
function attach_volumes (line 8) | def attach_volumes(vm_launcher, options, format=False):
function _mount (line 53) | def _mount(device_id, path):
function _format_device (line 57) | def _format_device(device_id):
function detach_volumes (line 61) | def detach_volumes(vm_launcher, options):
function make_snapshots (line 75) | def make_snapshots(vm_launcher, options):
function _get_attached (line 88) | def _get_attached(conn, instance_id, device_id, valid_states=['attached']):
function _make_snapshot (line 99) | def _make_snapshot(vm_launcher, fs_path, desc):
function _find_mounted_device_id (line 143) | def _find_mounted_device_id(path):
function _attach (line 149) | def _attach(ec2_conn, instance_id, volume_id, device):
function _detach (line 175) | def _detach(ec2_conn, instance_id, volume_id):
function _delete_volume (line 198) | def _delete_volume(ec2_conn, vol_id):
function _create_snapshot (line 206) | def _create_snapshot(ec2_conn, volume_id, description=None):
FILE: cloudbio/distribution.py
function _setup_distribution_environment (line 15) | def _setup_distribution_environment(ignore_distcheck=False):
function _setup_fullpaths (line 66) | def _setup_fullpaths(env):
function _cloudman_compatibility (line 76) | def _cloudman_compatibility(env):
function _validate_target_distribution (line 82) | def _validate_target_distribution(dist, dist_name=None):
function _setup_ubuntu (line 108) | def _setup_ubuntu():
function _setup_debian (line 126) | def _setup_debian():
function _setup_deb_general (line 139) | def _setup_deb_general():
function _setup_centos (line 166) | def _setup_centos():
function _setup_scientificlinux (line 179) | def _setup_scientificlinux():
function _setup_macosx (line 187) | def _setup_macosx(env):
function _setup_nixpkgs (line 195) | def _setup_nixpkgs():
function _setup_local_environment (line 214) | def _setup_local_environment():
function _setup_vagrant_environment (line 222) | def _setup_vagrant_environment():
function _add_source_versions (line 239) | def _add_source_versions(version, sources):
function _ubuntu_dist_name (line 252) | def _ubuntu_dist_name(env):
function _debian_dist_name (line 259) | def _debian_dist_name(env):
function _determine_distribution (line 266) | def _determine_distribution(env):
FILE: cloudbio/fabutils.py
function local_exists (line 30) | def local_exists(path, use_sudo=False):
function run_local (line 39) | def run_local(use_sudo=False, capture=False):
function local_put (line 53) | def local_put(orig_file, new_file):
function local_sed (line 56) | def local_sed(filename, before, after, limit='', use_sudo=False, backup=...
function local_comment (line 99) | def local_comment(filename, regex, use_sudo=False, char='#', backup='.ba...
function _escape_for_regex (line 117) | def _escape_for_regex(text):
function _expand_path (line 128) | def _expand_path(path):
function local_contains (line 131) | def local_contains(filename, text, exact=False, use_sudo=False, escape=T...
function local_append (line 142) | def local_append(filename, text, use_sudo=False, partial=False, escape=T...
function run_output (line 156) | def run_output(*args, **kwargs):
function configure_runsudo (line 161) | def configure_runsudo(env):
function find_cmd (line 198) | def find_cmd(env, cmd, args):
function quiet (line 212) | def quiet():
function warn_only (line 218) | def warn_only():
FILE: cloudbio/flavor/__init__.py
class Flavor (line 11) | class Flavor:
method __init__ (line 14) | def __init__(self, env):
method rewrite_config_items (line 20) | def rewrite_config_items(self, name, items):
method check_distribution (line 28) | def check_distribution(self):
method check_packages_source (line 35) | def check_packages_source(self):
method rewrite_apt_sources_list (line 40) | def rewrite_apt_sources_list(self, sources):
method rewrite_apt_preferences (line 45) | def rewrite_apt_preferences(self, preferences):
method rewrite_apt_automation (line 50) | def rewrite_apt_automation(self, package_info):
method rewrite_apt_keys (line 55) | def rewrite_apt_keys(self, standalone, keyserver):
method apt_upgrade_system (line 59) | def apt_upgrade_system(self, env=None):
method post_install (line 64) | def post_install(self):
class Minimal (line 69) | class Minimal(Flavor):
method __init__ (line 71) | def __init__(self, env):
method rewrite_config_items (line 76) | def rewrite_config_items(self, name, items):
method post_install (line 84) | def post_install(self, pkg_install=None):
FILE: cloudbio/flavor/config.py
function _find_fname (line 7) | def _find_fname(env, fname):
function get_config_file (line 15) | def get_config_file(env, fname):
FILE: cloudbio/galaxy/__init__.py
function _setup_users (line 25) | def _setup_users(env):
function _setup_galaxy_env_defaults (line 39) | def _setup_galaxy_env_defaults(env):
function _install_galaxy (line 67) | def _install_galaxy(env):
function _clone_galaxy_repo (line 94) | def _clone_galaxy_repo(env):
function _setup_galaxy_db (line 128) | def _setup_galaxy_db(env):
function _get_galaxy_db_configs (line 138) | def _get_galaxy_db_configs(env):
function _galaxy_db_exists (line 161) | def _galaxy_db_exists(env):
function _create_galaxy_db (line 194) | def _create_galaxy_db(env):
function _init_galaxy_db (line 234) | def _init_galaxy_db(env):
function _configure_galaxy_options (line 255) | def _configure_galaxy_options(env, option_dict=None, prefix="galaxy_univ...
function _setup_shed_tools_dir (line 281) | def _setup_shed_tools_dir(env):
function _setup_trackster (line 288) | def _setup_trackster(env):
function _configure_galaxy_repository (line 305) | def _configure_galaxy_repository(env):
function _setup_service (line 347) | def _setup_service(env):
function _setup_xvfb (line 353) | def _setup_xvfb(env):
function _setup_nginx_service (line 362) | def _setup_nginx_service(env):
function _install_nginx_standalone (line 369) | def _install_nginx_standalone(env):
function _install_nginx_package (line 374) | def _install_nginx_package(env):
function _install_nginx (line 394) | def _install_nginx(env):
function _get_nginx_modules (line 452) | def _get_nginx_modules(env):
function _get_nginx_module_upload (line 471) | def _get_nginx_module_upload(env):
function _get_nginx_module_chunk (line 481) | def _get_nginx_module_chunk(env):
function _get_nginx_module_ldap (line 492) | def _get_nginx_module_ldap(env):
function _setup_postgresql (line 498) | def _setup_postgresql(env):
function _configure_postgresql (line 505) | def _configure_postgresql(env, delete_main_dbcluster=False):
function _init_postgresql_data (line 532) | def _init_postgresql_data(env):
function _postgres_running (line 538) | def _postgres_running(env):
function _make_dir_for_galaxy (line 548) | def _make_dir_for_galaxy(env, path):
FILE: cloudbio/galaxy/applications.py
function install_fastx_toolkit (line 26) | def install_fastx_toolkit(env):
function install_maq (line 53) | def install_maq(env):
function install_macs (line 61) | def install_macs(env):
function install_megablast (line 71) | def install_megablast(env):
function install_blast (line 87) | def install_blast(env):
function install_sputnik (line 104) | def install_sputnik(env):
function install_taxonomy (line 118) | def install_taxonomy(env):
function install_add_scores (line 134) | def install_add_scores(env):
function install_hyphy (line 148) | def install_hyphy(env):
function install_gatk (line 176) | def install_gatk(env):
function install_srma (line 207) | def install_srma(env):
function install_beam (line 226) | def install_beam(env):
function install_pass (line 242) | def install_pass(env):
function install_lps_tool (line 258) | def install_lps_tool(env):
function install_plink (line 277) | def install_plink(env):
function install_fbat (line 294) | def install_fbat(env):
function install_haploview (line 311) | def install_haploview(env):
function install_eigenstrat (line 326) | def install_eigenstrat(env):
function install_augustus (line 343) | def install_augustus(env):
function install_picard (line 356) | def install_picard(env):
function install_fastqc (line 381) | def install_fastqc(env):
function _update_default (line 399) | def _update_default(env, install_dir):
FILE: cloudbio/galaxy/r.py
function _install_r_packages (line 18) | def _install_r_packages(tools_conf):
function _concat_strings (line 34) | def _concat_strings(strings):
FILE: cloudbio/galaxy/tools.py
function _install_tools (line 19) | def _install_tools(env, tools_conf=None):
function _tools_conf_path (line 39) | def _tools_conf_path(env):
function _load_tools_conf (line 51) | def _load_tools_conf(env):
function _setup_install_dir (line 57) | def _setup_install_dir(env):
function _install_configured_applications (line 75) | def _install_configured_applications(env, tools_conf):
function _install_application (line 106) | def _install_application(name, versions, tool_install_dir=None):
function _install_tool (line 151) | def _install_tool(env, name, version, requirement_name, bin_dirs=["bin"]...
function _build_tool_env (line 159) | def _build_tool_env(env, name, version, tool_install_dir):
function __check_conditional (line 173) | def __check_conditional(conf_dict):
class AttributeDict (line 188) | class AttributeDict(dict):
function _install_galaxy_config (line 201) | def _install_galaxy_config(tool_env, bin_dirs, env_vars):
FILE: cloudbio/galaxy/utils.py
function _read_boolean (line 5) | def _read_boolean(env, name, default):
function _chown_galaxy (line 11) | def _chown_galaxy(env, path):
function _dir_is_empty (line 21) | def _dir_is_empty(path):
FILE: cloudbio/libraries.py
function r_library_installer (line 9) | def r_library_installer(config):
function _make_install_script (line 36) | def _make_install_script(out_file, config):
FILE: cloudbio/manifest.py
function create (line 24) | def create(out_dir, tooldir="/usr/local", fetch_remote=False):
function _get_custom_pkg_info (line 37) | def _get_custom_pkg_info(name, fn):
function write_custom_pkg_info (line 64) | def write_custom_pkg_info(out_dir, tooldir):
function write_brew_pkg_info (line 93) | def write_brew_pkg_info(out_dir, tooldir):
function get_r_pkg_info (line 120) | def get_r_pkg_info():
function write_r_pkg_info (line 136) | def write_r_pkg_info(out_dir):
function _get_conda_envs (line 148) | def _get_conda_envs(conda_bin):
function get_python_pkg_info (line 153) | def get_python_pkg_info():
function _resolve_latest_pkg (line 180) | def _resolve_latest_pkg(pkgs):
function write_python_pkg_info (line 187) | def write_python_pkg_info(out_dir):
function _get_pkg_popcon (line 202) | def _get_pkg_popcon():
function get_debian_pkg_info (line 212) | def get_debian_pkg_info(fetch_remote=False):
function write_debian_pkg_info (line 227) | def write_debian_pkg_info(out_dir, fetch_remote=False):
FILE: cloudbio/package/__init__.py
function _configure_and_install_native_packages (line 7) | def _configure_and_install_native_packages(env, pkg_install):
function _connect_native_packages (line 40) | def _connect_native_packages(env, pkg_install, lib_install):
function _get_shell_exports (line 59) | def _get_shell_exports(env):
function _print_shell_exports (line 63) | def _print_shell_exports(env):
function _create_local_virtualenv (line 71) | def _create_local_virtualenv(target_dir):
FILE: cloudbio/package/brew.py
function install_packages (line 24) | def install_packages(env, to_install=None, packages=None):
function _remove_old (line 64) | def _remove_old(env, config_file):
function _safe_update (line 75) | def _safe_update(env, brew_cmd, formula_repos, cur_taps):
function _git_stash (line 94) | def _git_stash(env, brew_cmd):
function _get_current_pkgs (line 122) | def _get_current_pkgs(env, brew_cmd):
function _safe_unlink_pkg (line 136) | def _safe_unlink_pkg(env, pkg_str, brew_cmd):
function _safe_link_pkg (line 143) | def _safe_link_pkg(env, pkg_str, brew_cmd):
function _safe_uninstall_pkg (line 150) | def _safe_uninstall_pkg(env, pkg_str, brew_cmd):
function _install_pkg (line 157) | def _install_pkg(env, pkg_str, brew_cmd, ipkgs):
function _install_pkg_version (line 170) | def _install_pkg_version(env, pkg, args, version, brew_cmd, ipkgs):
function _git_pkg_version (line 200) | def _git_pkg_version(env, brew_cmd, pkg, version):
function _git_cmd_for_pkg_version (line 222) | def _git_cmd_for_pkg_version(env, brew_cmd, pkg, version):
function _latest_pkg_version (line 234) | def _latest_pkg_version(env, brew_cmd, pkg, devel=False):
function _get_brew_install_cmd (line 255) | def _get_brew_install_cmd(brew_cmd, env, pkg):
function _install_pkg_latest (line 267) | def _install_pkg_latest(env, pkg, args, brew_cmd, ipkgs):
function _custom_unlink (line 302) | def _custom_unlink(env, brew_cmd, pkg):
function _get_pkg_version_args (line 318) | def _get_pkg_version_args(pkg_str):
function _install_bottle (line 335) | def _install_bottle(env, brew_cmd, pkg, ipkgs):
function _install_brew_baseline (line 366) | def _install_brew_baseline(env, brew_cmd, ipkgs, packages):
function _brew_cmd (line 404) | def _brew_cmd(env):
FILE: cloudbio/package/conda.py
function install_packages (line 29) | def install_packages(env, to_install=None, packages=None):
function _install_env_pkgs (line 44) | def _install_env_pkgs(env_name, env_packages, conda_bin, conda_envs, cha...
function install_in (line 83) | def install_in(conda_bin, system_installdir, config_file=None, packages=...
function _initial_base_install (line 148) | def _initial_base_install(conda_bin, env_packages, check_channels):
function _link_bin (line 186) | def _link_bin(package, system_installdir, conda_info, conda_bin, conda_p...
function _do_link (line 224) | def _do_link(orig_file, final_file):
function _split_by_condaenv (line 244) | def _split_by_condaenv(packages):
function _get_conda_envs (line 262) | def _get_conda_envs(conda_bin):
function _create_environments (line 266) | def _create_environments(conda_bin, packages):
function _clean_environment (line 300) | def _clean_environment(env_dir):
FILE: cloudbio/package/cpan.py
function install_packages (line 12) | def install_packages(env):
function _install_from_cpan (line 22) | def _install_from_cpan(env, cpanm_cmd, package):
function _install_from_url (line 48) | def _install_from_url(env, cpanm_cmd, package):
FILE: cloudbio/package/deb.py
function _apt_packages (line 11) | def _apt_packages(to_install=None, pkg_list=None):
function _add_apt_gpg_keys (line 51) | def _add_apt_gpg_keys():
function _setup_apt_automation (line 77) | def _setup_apt_automation():
function _setup_apt_sources (line 125) | def _setup_apt_sources():
FILE: cloudbio/package/nix.py
function _setup_nix_sources (line 9) | def _setup_nix_sources():
function _nix_packages (line 40) | def _nix_packages(to_install):
FILE: cloudbio/package/rpm.py
function _yum_packages (line 10) | def _yum_packages(to_install):
function _partition_all (line 30) | def _partition_all(n, iterable):
function _setup_yum_bashrc (line 40) | def _setup_yum_bashrc():
function _setup_yum_sources (line 52) | def _setup_yum_sources():
FILE: cloudbio/package/shared.py
function _yaml_to_packages (line 6) | def _yaml_to_packages(yaml_file, to_install=None, subs_yaml_file=None, n...
function _filter_subs_packages (line 47) | def _filter_subs_packages(initial, subs, namesort=True):
FILE: cloudbio/utils.py
class ColorFormatter (line 17) | class ColorFormatter(logging.Formatter):
method __init__ (line 27) | def __init__(self, fmt="%(name)s %(levelname)s: %(msg)s"):
method format (line 30) | def format(self, record):
function _setup_logging (line 49) | def _setup_logging(env):
function _update_biolinux_log (line 59) | def _update_biolinux_log(env, target, flavor):
function _configure_fabric_environment (line 85) | def _configure_fabric_environment(env, flavor=None, fabricrc_loader=None,
function _setup_flavor (line 96) | def _setup_flavor(env, flavor):
function _parse_fabricrc (line 132) | def _parse_fabricrc(env):
function _create_local_paths (line 147) | def _create_local_paths(env):
FILE: contrib/flavor/boinc/boincflavor.py
class BoincFlavor (line 8) | class BoincFlavor(Flavor):
method __init__ (line 11) | def __init__(self, env):
method rewrite_config_items (line 15) | def rewrite_config_items(self, name, packages):
method post_install (line 22) | def post_install(self):
FILE: contrib/flavor/millstone/installer.py
function install_millstone (line 274) | def install_millstone(env):
FILE: contrib/flavor/millstone/millstoneflavor.py
class MillstoneFlavor (line 14) | class MillstoneFlavor(Flavor):
method __init__ (line 15) | def __init__(self, env):
method rewrite_config_items (line 19) | def rewrite_config_items(self, name, packages):
method post_install (line 22) | def post_install(self):
FILE: contrib/flavor/neuro/neuro.py
class NeuroFlavor (line 12) | class NeuroFlavor(Flavor):
method __init__ (line 17) | def __init__(self, env):
method rewrite_config_items (line 20) | def rewrite_config_items(self, name, packages):
method rewrite_apt_sources_list (line 28) | def rewrite_apt_sources_list(self, name, sources):
FILE: contrib/flavor/phylogeny/phylogenyflavor.py
class PhylogenyFlavor (line 9) | class PhylogenyFlavor(Flavor):
method __init__ (line 12) | def __init__(self, env):
method rewrite_config_items (line 16) | def rewrite_config_items(self, name, packages):
method post_install (line 28) | def post_install(self):
FILE: contrib/flavor/pjotrp/biotest/biotestflavor.py
class BioTestFlavor (line 8) | class BioTestFlavor(Flavor):
method __init__ (line 11) | def __init__(self, env):
method rewrite_config_items (line 15) | def rewrite_config_items(self, name, items):
method post_install (line 31) | def post_install(self):
FILE: contrib/flavor/seal/sealflavor.py
class SealFlavor (line 44) | class SealFlavor(Flavor):
method __init__ (line 47) | def __init__(self, env):
method rewrite_config_items (line 51) | def rewrite_config_items(self, name, packages):
method post_install (line 63) | def post_install(self):
FILE: data_fabfile.py
function setup_environment (line 35) | def setup_environment():
function _add_defaults (line 42) | def _add_defaults():
function install_data (line 56) | def install_data(config_source=CONFIG_FILE):
function install_data_raw (line 62) | def install_data_raw(config_source=CONFIG_FILE):
function install_data_s3 (line 68) | def install_data_s3(config_source=CONFIG_FILE, do_setup_environment=True):
function install_data_rsync (line 74) | def install_data_rsync(config_source=CONFIG_FILE):
function install_data_ggd (line 80) | def install_data_ggd(recipe, organism):
function upload_s3 (line 89) | def upload_s3(config_source=CONFIG_FILE):
FILE: deploy/test_install_galaxy_tool.py
function __clone_cloudbiolinux (line 28) | def __clone_cloudbiolinux(cbl_config):
function install_cbl_tool (line 44) | def install_cbl_tool(tool_name, tool_version, install_dir, cbl_config={}):
FILE: fabfile.py
function install_biolinux (line 48) | def install_biolinux(target=None, flavor=None):
function _perform_install (line 82) | def _perform_install(target=None, flavor=None, more_custom_add=None):
function _print_time_stats (line 135) | def _print_time_stats(action, event, prev_time=None):
function _check_fabric_version (line 158) | def _check_fabric_version():
function _custom_installs (line 165) | def _custom_installs(to_install, ignore=None, add=None):
function _provision_chef_recipes (line 180) | def _provision_chef_recipes(to_install, ignore=None):
function _provision_puppet_classes (line 193) | def _provision_puppet_classes(to_install, ignore=None):
function install_chef_recipe (line 206) | def install_chef_recipe(recipe, automated=False, flavor=None):
function install_puppet_class (line 230) | def install_puppet_class(classes, automated=False, flavor=None):
function install_custom (line 251) | def install_custom(p, automated=False, pkg_to_group=None, flavor=None):
function _install_custom (line 291) | def _install_custom(p, pkg_to_group=None):
function install_brew (line 298) | def install_brew(p=None, version=None, flavor=None, automated=False):
function install_conda (line 313) | def install_conda(p=None, flavor=None, automated=False):
function _custom_install_function (line 323) | def _custom_install_function(env, p, pkg_to_group):
function _read_main_config (line 352) | def _read_main_config():
function _python_library_installer (line 374) | def _python_library_installer(config):
function _ruby_library_installer (line 395) | def _ruby_library_installer(config):
function _perl_library_installer (line 414) | def _perl_library_installer(config):
function _haskell_library_installer (line 430) | def _haskell_library_installer(config):
function install_libraries (line 446) | def install_libraries(language):
function _do_library_installs (line 454) | def _do_library_installs(to_install):
FILE: installed_files/ec2autorun.py
function _setup_logging (line 41) | def _setup_logging():
function _get_user_data (line 56) | def _get_user_data():
function _get_bucket_name (line 83) | def _get_bucket_name(cluster_name, access_key):
function _isurl (line 89) | def _isurl(path):
function _get_s3_conn (line 98) | def _get_s3_conn(ud):
function _bucket_exists (line 125) | def _bucket_exists(s3_conn, bucket_name):
function _remote_file_exists (line 143) | def _remote_file_exists(s3_conn, bucket_name, remote_filename):
function _save_file_to_bucket (line 159) | def _save_file_to_bucket(s3_conn, bucket_name, remote_filename, local_fi...
function _get_file_from_bucket (line 190) | def _get_file_from_bucket(s3_conn, bucket_name, remote_filename, local_f...
function _get_file_from_url (line 212) | def _get_file_from_url(url):
function _get_boot_script (line 228) | def _get_boot_script(ud):
function _run_boot_script (line 282) | def _run_boot_script(boot_script_name):
function _create_basic_user_data_file (line 295) | def _create_basic_user_data_file():
function _get_default_bucket_url (line 310) | def _get_default_bucket_url(ud=None):
function _user_exists (line 326) | def _user_exists(username):
function _allow_password_logins (line 333) | def _allow_password_logins(passwd):
function _handle_freenx (line 347) | def _handle_freenx(passwd):
function _handle_empty (line 367) | def _handle_empty():
function _handle_url (line 376) | def _handle_url(url):
function _merge (line 384) | def _merge(specific, default):
function _load_user_data (line 398) | def _load_user_data(user_data):
function _handle_yaml (line 417) | def _handle_yaml(user_data):
function _parse_user_data (line 501) | def _parse_user_data(ud):
function main (line 509) | def main():
FILE: test/testlib/test_biolinux.rb
function test_for_completed_install (line 3) | def test_for_completed_install(ssh, hostname, box_name)
function test_for_match (line 13) | def test_for_match(ssh, cmd, regex)
function test_in_path (line 20) | def test_in_path(ssh, program)
function test_phylogeny_flavor (line 24) | def test_phylogeny_flavor(vagrant)
FILE: test/testlib/test_support.rb
function run (line 2) | def run cmd
function error (line 12) | def error msg
FILE: utils/cbl_exome_setup.py
function main (line 22) | def main():
function setup_custom_galaxy (line 37) | def setup_custom_galaxy():
function install_latest_pipeline (line 50) | def install_latest_pipeline():
function run_nextgen_analysis_server (line 73) | def run_nextgen_analysis_server(pp_config, work_dir, work_user):
function setup_rabbitmq (line 83) | def setup_rabbitmq(vhost, user, passwd):
function read_pp_config (line 92) | def read_pp_config(fname):
function read_ampq_config (line 99) | def read_ampq_config(fname):
function update_amqp_config (line 107) | def update_amqp_config(fname, hostname):
function wait_until_mounted (line 128) | def wait_until_mounted(fname):
function chdir (line 144) | def chdir(new_dir):
FILE: utils/cbl_installed_software.py
function main (line 11) | def main():
FILE: utils/convert_to_xz.py
function main (line 17) | def main(bucket_name):
function download_parallel (line 33) | def download_parallel(url):
function swap_s3_item (line 43) | def swap_s3_item(xz_file, bucket, orig_s3_item):
function gzip_to_xz (line 52) | def gzip_to_xz(local_file):
FILE: utils/get_biolinux_packages.py
function main (line 16) | def main():
function get_package (line 37) | def get_package(pname):
FILE: utils/get_yum_packages.py
function main (line 14) | def main(orig_file):
function get_yum_package (line 27) | def get_yum_package(pname):
FILE: utils/images_and_snapshots.py
function images_and_snapshots (line 6) | def images_and_snapshots(owner):
function _data_libraries (line 18) | def _data_libraries(conn, owner):
function _sorted_images (line 33) | def _sorted_images(images, start_name):
FILE: utils/prep_esp_hg38.py
function main (line 16) | def main():
function _add_contigs (line 49) | def _add_contigs(out_handle, ref_file):
FILE: utils/prepare_cosmic.py
function main (line 37) | def main(cosmic_version, bcbio_genome_dir, overwrite=False, clean=False):
function remove_installed (line 99) | def remove_installed(installed_file, installed_link):
function make_links (line 116) | def make_links(installed_file, installed_link):
function map_coords_to_ucsc (line 124) | def map_coords_to_ucsc(grc_cosmic, ref_file, out_file):
function _rename_to_ucsc (line 138) | def _rename_to_ucsc(line):
function combine_cosmic (line 147) | def combine_cosmic(fnames, ref_file, out_file):
function sort_to_ref (line 157) | def sort_to_ref(fname, ref_file, add_chr):
function get_cosmic_vcf_files (line 178) | def get_cosmic_vcf_files(genome_build, cosmic_version, clean):
function remove_cosmic_directory (line 213) | def remove_cosmic_directory(installed_directory):
function update_version_file (line 217) | def update_version_file(bcbio_base, version):
FILE: utils/prepare_dbsnp.py
function main (line 17) | def main(org):
function fix_info (line 48) | def fix_info(parts):
function fix_chrom (line 54) | def fix_chrom(parts):
function get_file (line 65) | def get_file(x, ftp_dir, conn):
function karyotype_sort (line 74) | def karyotype_sort(xs):
FILE: utils/prepare_tx_gff.py
function which (line 55) | def which(program):
function manual_ucsc_ensembl_map (line 71) | def manual_ucsc_ensembl_map(org_build):
function ucsc_ensembl_map_via_download (line 84) | def ucsc_ensembl_map_via_download(org_build):
function ensembl_to_ucsc (line 93) | def ensembl_to_ucsc(ensembl_dict, ucsc_dict, org_build):
function ucsc_ensembl_map_via_query (line 108) | def ucsc_ensembl_map_via_query(org_build):
function parse_sequence_dict (line 190) | def parse_sequence_dict(fasta_dict):
class SequenceDictParser (line 202) | class SequenceDictParser(object):
method __init__ (line 204) | def __init__(self, fname):
method _get_sequences_in_genome_dict (line 207) | def _get_sequences_in_genome_dict(self):
method _sequence_from_line (line 212) | def _sequence_from_line(self, line):
function get_ensembl_dict (line 217) | def get_ensembl_dict(org_build):
function get_ucsc_dict (line 227) | def get_ucsc_dict(org_build):
function make_fasta_dict (line 234) | def make_fasta_dict(fasta_file):
function _download_ensembl_genome (line 241) | def _download_ensembl_genome(org_build):
function write_version (line 254) | def write_version(build=None, gtf_file=None, build_version=None):
function main (line 265) | def main(org_build, gtf_file, genome_fasta, genome_dir, cores, args):
function make_hisat2_splicesites (line 326) | def make_hisat2_splicesites(gtf_file):
function make_transcriptome_fasta (line 340) | def make_transcriptome_fasta(gtf_file, genome_fasta):
function clean_gtf (line 346) | def clean_gtf(gtf_file, genome_fasta):
function get_genome_fasta (line 372) | def get_genome_fasta(org_build):
function get_fasta_names (line 377) | def get_fasta_names(genome_fasta):
function cleanup (line 384) | def cleanup(work_dir, out_dir, org_build):
function create_tarball (line 395) | def create_tarball(tar_dirs, org_build):
function upload_to_s3 (line 403) | def upload_to_s3(tarball):
function genepred_to_UCSC_table (line 409) | def genepred_to_UCSC_table(genepred):
function gtf_to_genepred (line 430) | def gtf_to_genepred(gtf):
function gtf_to_refflat (line 438) | def gtf_to_refflat(gtf):
function gtf_to_bed (line 451) | def gtf_to_bed(gtf):
function _is_selenocysteine (line 469) | def _is_selenocysteine(feature):
function db_to_gtf (line 474) | def db_to_gtf(db, out_file):
function make_miso_events (line 485) | def make_miso_events(gtf, org_build):
function prepare_bowtie_index (line 503) | def prepare_bowtie_index(genome_fasta, bowtie_dir):
function prepare_tophat_index (line 511) | def prepare_tophat_index(gtf, org_build, genome_fasta):
function prepare_kallisto_index (line 526) | def prepare_kallisto_index(transcriptome_fasta, org_build):
function prepare_sailfish_index (line 539) | def prepare_sailfish_index(transcriptome_fasta, org_build):
function _create_dummy_fastq (line 551) | def _create_dummy_fastq():
function gtf_to_interval (line 561) | def gtf_to_interval(gtf, genome_fasta):
function prepare_mask_gtf (line 580) | def prepare_mask_gtf(gtf):
function prepare_rrna_gtf (line 603) | def prepare_rrna_gtf(gtf):
function prepare_tx2gene (line 625) | def prepare_tx2gene(gtf):
function _biotype_lookup_fn (line 640) | def _biotype_lookup_fn(gtf):
function prepare_tx_gff (line 661) | def prepare_tx_gff(build, org_name):
function _remap_gff (line 675) | def _remap_gff(base_gff, name_map):
function _download_ensembl_gff (line 693) | def _download_ensembl_gff(build, org_name):
function _create_tiny_gffutils_db (line 705) | def _create_tiny_gffutils_db(gtf_file):
function subfeature_handler (line 725) | def subfeature_handler(f):
function guess_disable_infer_extent (line 743) | def guess_disable_infer_extent(gtf_file):
function guess_id_spec (line 755) | def guess_id_spec(gtf_file):
function _get_gtf_db (line 773) | def _get_gtf_db(gtf):
function _dexseq_preparation_path (line 791) | def _dexseq_preparation_path():
function prepare_dexseq (line 806) | def prepare_dexseq(gtf):
FILE: utils/prioritize/az300_to_bed.py
function read_targets (line 20) | def read_targets(in_file):
function get_gene_info (line 28) | def get_gene_info(cur_symbol):
function find_missing_targets (line 44) | def find_missing_targets(missing, in_file, genome):
function write_from_transcript_file (line 61) | def write_from_transcript_file(targets, ref_dir, genome, out_handle):
function write_from_remap_names (line 72) | def write_from_remap_names(targets, ref_dir, genome, out_handle, in_file):
function write_from_gene_info (line 84) | def write_from_gene_info(targets, genome, out_handle):
FILE: utils/prioritize/prep_ccds_genes.py
function main (line 12) | def main(in_file, out_name, fai_file):
function get_chrom (line 24) | def get_chrom(chrom):
function split_coords (line 28) | def split_coords(coords):
FILE: utils/query_conda_deps.py
function main (line 11) | def main(config_file):
function get_dependencies (line 28) | def get_dependencies(channel, package):
FILE: utils/s3_multipart_upload.py
function main (line 40) | def main(transfer_file, bucket_name, s3_key_name=None, use_rr=True,
function s3_has_uptodate_file (line 65) | def s3_has_uptodate_file(bucket, transfer_file, s3_key_name):
function upload_cb (line 77) | def upload_cb(complete, total):
function _standard_transfer (line 81) | def _standard_transfer(bucket, s3_key_name, transfer_file, use_rr):
function map_wrap (line 88) | def map_wrap(f):
function mp_from_ids (line 94) | def mp_from_ids(mp_id, mp_keyname, mp_bucketname, profile=None):
function transfer_part (line 111) | def transfer_part(mp_id, mp_keyname, mp_bucketname, i, part, profile):
function _multipart_upload (line 120) | def _multipart_upload(bucket, s3_key_name, tarball, mb_size, use_rr=True,
function multimap (line 143) | def multimap(cores=None):
Condensed preview — 458 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,909K chars).
[
{
"path": ".gitignore",
"chars": 195,
"preview": "*.pyc\ntags\ncloudbiolinux.egg-info/\nbuild/\ndoc/build\ndist/\n*.pem\ndpkg.lst\n*.log\nvenv/\nvenv_cbl/\ndeploy/build\ndeploy/keys\n"
},
{
"path": ".gitmodules",
"chars": 1238,
"preview": "[submodule \"config/puppet/modules/apache\"]\n\tpath = config/puppet/modules/apache\n\turl = git://github.com/puppetlabs/puppe"
},
{
"path": "LICENSE.txt",
"chars": 1070,
"preview": "Copyright (c) 2013 CloudBioLinux contributors\n\nPermission is hereby granted, free of charge, to any person obtaining\na c"
},
{
"path": "MANIFEST.in",
"chars": 181,
"preview": "include *fabfile.py\ninclude *.md\ninclude *.mkd\ninclude config/*.yaml\ninclude config/fabricrc.txt\ninclude config/README.m"
},
{
"path": "README.rst",
"chars": 16451,
"preview": "CloudBioLinux is a build and deployment system which installs an easily\ncustomizable selection of bioinformatics and mac"
},
{
"path": "cloudbio/__init__.py",
"chars": 228,
"preview": "\"\"\"Module level code supporting CloudBioLinux installations.\n\nThis provides a reusable architecture allowing definitions"
},
{
"path": "cloudbio/biodata/__init__.py",
"chars": 68,
"preview": "\"\"\"Download, installation and configuration of biological data.\n\"\"\"\n"
},
{
"path": "cloudbio/biodata/galaxy.py",
"chars": 9239,
"preview": "\"\"\"Retrieve indexed genomes using Galaxy's rsync server resources.\n\nhttp://wiki.galaxyproject.org/Admin/Data%20Integrati"
},
{
"path": "cloudbio/biodata/genomes.py",
"chars": 48177,
"preview": "\"\"\"Download and install structured genome data and aligner index files.\n\nDownloads prepared FASTA, indexes for aligners "
},
{
"path": "cloudbio/biodata/ggd.py",
"chars": 5256,
"preview": "\"\"\"Process GGD (Get Genomics Data) configurations for installation in biodata directories.\n\nBuilds off work done by Aaro"
},
{
"path": "cloudbio/biodata/rnaseq.py",
"chars": 1394,
"preview": "\"\"\"Infrastructure for RNA-seq supporting files.\n\"\"\"\nimport os\nimport subprocess\n\nfrom cloudbio.custom import shared\n\ndef"
},
{
"path": "cloudbio/cloudbiolinux.py",
"chars": 1916,
"preview": "\"\"\"CloudBioLinux specific scripts\n\"\"\"\nimport os\nfrom fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom cloudb"
},
{
"path": "cloudbio/cloudman.py",
"chars": 16115,
"preview": "\"\"\"Build instructions associated with CloudMan.\n\nhttp://wiki.g2.bx.psu.edu/Admin/Cloud\n\nAdapted from Enis Afgan's code: "
},
{
"path": "cloudbio/config_management/__init__.py",
"chars": 132,
"preview": "\"\"\" This module contains code related to integrating the configuration\nmanagement tools `chef` and `puppet` into CloudBi"
},
{
"path": "cloudbio/config_management/chef.py",
"chars": 3194,
"preview": "import os\nimport json\n\nfrom fabric.api import cd\nfrom fabric.contrib import files\nfrom fabric.state import _AttributeDic"
},
{
"path": "cloudbio/config_management/puppet.py",
"chars": 2026,
"preview": "from fabric.state import _AttributeDict\nfrom fabric.api import cd\n\nfrom utils import upload_config, config_dir, build_pr"
},
{
"path": "cloudbio/config_management/utils.py",
"chars": 3021,
"preview": "from tempfile import mkdtemp\nimport os\nfrom fabric.api import settings, local, put, sudo, cd\nfrom fabric.contrib import "
},
{
"path": "cloudbio/custom/__init__.py",
"chars": 83,
"preview": "\"\"\"Fabric sub-modules providing custom installation for non-packaged programs.\n\"\"\"\n"
},
{
"path": "cloudbio/custom/bio_general.py",
"chars": 3811,
"preview": "\"\"\"Custom installs for biological packages.\n\"\"\"\nimport os\n\nfrom fabric.api import *\nfrom fabric.contrib.files import *\n\n"
},
{
"path": "cloudbio/custom/bio_nextgen.py",
"chars": 32360,
"preview": "\"\"\"Install next gen sequencing analysis tools not currently packaged.\n\"\"\"\nfrom __future__ import print_function\nimport o"
},
{
"path": "cloudbio/custom/bio_proteomics.py",
"chars": 22414,
"preview": "\"\"\"Install proteomics tools not currently packaged.\n\"\"\"\n\nimport os\nimport re\n\nfrom fabric.api import cd\nfrom fabric.cont"
},
{
"path": "cloudbio/custom/bio_proteomics_wine.py",
"chars": 2960,
"preview": "from fabric.api import cd\n\nfrom shared import (_make_tmp_dir, _fetch_and_unpack, _write_to_file, _get_bin_dir)\n\nimport o"
},
{
"path": "cloudbio/custom/cloudman.py",
"chars": 4819,
"preview": "\"\"\"Custom install scripts for CloudMan environment.\n\nFrom Enis Afgan: https://bitbucket.org/afgane/mi-deployment\n\"\"\"\nimp"
},
{
"path": "cloudbio/custom/distributed.py",
"chars": 1086,
"preview": "\"\"\"Install instructions for distributed MapReduce style programs.\n\"\"\"\nimport os\n\nfrom fabric.api import *\nfrom fabric.co"
},
{
"path": "cloudbio/custom/galaxy.py",
"chars": 472,
"preview": "\"\"\"\nInstall any components that fall under 'galaxy' directive in main.yaml\n\"\"\"\nfrom cloudbio.galaxy import _setup_users\n"
},
{
"path": "cloudbio/custom/galaxy_tools.py",
"chars": 265,
"preview": "\"\"\"\nInstall any components that fall under 'galaxy_tools' directive in main.yaml\n\"\"\"\nfrom cloudbio.galaxy.tools import _"
},
{
"path": "cloudbio/custom/galaxyp.py",
"chars": 2287,
"preview": "\"\"\"\n\"\"\"\n\nfrom cloudbio.galaxy.utils import _chown_galaxy\n\nfrom fabric.contrib.files import *\n\nfrom shared import _write_"
},
{
"path": "cloudbio/custom/java.py",
"chars": 756,
"preview": "\"\"\"Install instructions for non-packaged java programs.\n\"\"\"\nimport os\n\nfrom fabric.api import *\nfrom fabric.contrib.file"
},
{
"path": "cloudbio/custom/millstone.py",
"chars": 633,
"preview": "\"\"\"Install instructions for non-packaged programs required by Millstone.\n\"\"\"\n\nfrom fabric.api import cd\n\nfrom cloudbio.c"
},
{
"path": "cloudbio/custom/phylogeny.py",
"chars": 2360,
"preview": "\"\"\"Install instructions for non-packaged phyologeny programs.\n\"\"\"\nimport os\n\nfrom fabric.api import *\nfrom fabric.contri"
},
{
"path": "cloudbio/custom/python.py",
"chars": 1897,
"preview": "\"\"\"Install instructions for python libraries not ready for easy_install.\n\"\"\"\nimport os\n\nfrom fabric.api import *\nfrom fa"
},
{
"path": "cloudbio/custom/shared.py",
"chars": 30622,
"preview": "\"\"\"Reusable decorators and functions for custom installations.\n\"\"\"\nfrom __future__ import print_function\nfrom contextlib"
},
{
"path": "cloudbio/custom/system.py",
"chars": 4364,
"preview": "\"\"\"\nInstall system programs not available from packages.\n\"\"\"\nimport os\n\nfrom fabric.api import cd\n\nfrom cloudbio.custom "
},
{
"path": "cloudbio/custom/vcr.py",
"chars": 16259,
"preview": "#\n# vcr.py\n# - Configures the environment for running the Viral Assembly (viral_assembly_pipeline.py) and VIGOR (VIGOR3"
},
{
"path": "cloudbio/custom/versioncheck.py",
"chars": 2864,
"preview": "\"\"\"Tool specific version checking to identify out of date dependencies.\n\nThis provides infrastructure to check version s"
},
{
"path": "cloudbio/deploy/__init__.py",
"chars": 14127,
"preview": "#!/usr/bin/env python\nfrom __future__ import print_function\nimport os\n\nfrom tempfile import tempdir\nfrom subprocess impo"
},
{
"path": "cloudbio/deploy/config.py",
"chars": 437,
"preview": "import inspect\nimport os\nimport yaml\n\n\ndef parse_settings(name=\"deploy/settings.yaml\"):\n return _read_yaml(_path_from"
},
{
"path": "cloudbio/deploy/main.py",
"chars": 3801,
"preview": "from argparse import ArgumentParser\nimport yaml\n\nfrom cloudbio.deploy import deploy\n\nDESC = \"Creates an on-demand cloud "
},
{
"path": "cloudbio/deploy/plugins/__init__.py",
"chars": 525,
"preview": "\"\"\"\nDeploy plugins.\n\nlocal_actions:\n These actions occur before a VM has been created, if only local actions are specif"
},
{
"path": "cloudbio/deploy/plugins/cloudman.py",
"chars": 7052,
"preview": "from __future__ import print_function\nfrom datetime import datetime\nfrom os.path import exists, join\nfrom os import list"
},
{
"path": "cloudbio/deploy/plugins/galaxy.py",
"chars": 368,
"preview": "from cloudbio.galaxy.tools import _install_application\n\n\ndef install_tool(options):\n version = options.get(\"galaxy_to"
},
{
"path": "cloudbio/deploy/plugins/gvl.py",
"chars": 15787,
"preview": "\"\"\"\nDeployer plugin containing actions related to older galaxy-vm-launcher functionality.\n\"\"\"\nfrom __future__ import pri"
},
{
"path": "cloudbio/deploy/util.py",
"chars": 1853,
"preview": "from string import Template\nfrom time import strftime\nimport os\n\nfrom fabric.api import local, sudo, env, put, get\nfrom "
},
{
"path": "cloudbio/deploy/vmlauncher/__init__.py",
"chars": 17511,
"preview": "from __future__ import print_function\nimport os\nimport time\n\nfrom libcloud.compute.ssh import SSHClient\nfrom libcloud.co"
},
{
"path": "cloudbio/deploy/vmlauncher/config.md",
"chars": 2462,
"preview": "# Configuring Cloud Parameters\n\nCurrently four different virtual machine providers are implemented: `aws`\n(default), `op"
},
{
"path": "cloudbio/deploy/vmlauncher/transfer.py",
"chars": 11794,
"preview": "from __future__ import print_function\nimport os\nimport gzip\n\nfrom operator import itemgetter\nfrom sys import exit\nfrom t"
},
{
"path": "cloudbio/deploy/volume.py",
"chars": 8821,
"preview": "from __future__ import print_function\nfrom fabric.api import run, env\nfrom time import sleep\nfrom boto.exception import "
},
{
"path": "cloudbio/distribution.py",
"chars": 11419,
"preview": "\"\"\"Configuration details for specific server types.\n\nThis module contains functions that help with initializing a Fabric"
},
{
"path": "cloudbio/fabutils.py",
"chars": 7832,
"preview": "\"\"\"Utilities to generalize usage of fabric for local and remote builds.\n\nHandles:\n - Providing a local equivalent of st"
},
{
"path": "cloudbio/flavor/__init__.py",
"chars": 2712,
"preview": "\"\"\"A Flavor reflects a specialization of a base install, the default being BioLinux.\n\nIf you want to create a new specia"
},
{
"path": "cloudbio/flavor/config.py",
"chars": 969,
"preview": "\"\"\"\nHandle alternative configuration file locations for flavor customizations.\n\"\"\"\nimport os\nimport collections\n\ndef _fi"
},
{
"path": "cloudbio/galaxy/__init__.py",
"chars": 25492,
"preview": "\"\"\"\nAdapted from Enis Afgan's mi-deployment code:\nhttps://bitbucket.org/afgane/mi-deployment\n\"\"\"\nfrom __future__ import "
},
{
"path": "cloudbio/galaxy/applications.py",
"chars": 19563,
"preview": "\"\"\"\nThis file is largely derived from a similar file in mi-deployment written Dr.\nEnis Afgan.\n\nhttps://bitbucket.org/afg"
},
{
"path": "cloudbio/galaxy/r.py",
"chars": 1089,
"preview": "import os\nimport tempfile\n\n\nfrom cloudbio.custom.shared import _make_tmp_dir\nfrom fabric.api import sudo, put, cd\n\nr_pac"
},
{
"path": "cloudbio/galaxy/tools.py",
"chars": 10037,
"preview": "import os\nfrom string import Template\n\nimport six\nimport yaml\n\nfrom cloudbio.custom.bio_general import *\nfrom cloudbio.c"
},
{
"path": "cloudbio/galaxy/utils.py",
"chars": 878,
"preview": "from fabric.api import sudo\nfrom fabric.contrib.files import exists\n\n\ndef _read_boolean(env, name, default):\n ## TODO"
},
{
"path": "cloudbio/libraries.py",
"chars": 4578,
"preview": "\"\"\"Installers for programming language specific libraries.\n\"\"\"\nimport os\n\nfrom fabric.api import env, cd, settings\nfrom "
},
{
"path": "cloudbio/manifest.py",
"chars": 10851,
"preview": "\"\"\"Provide dump of software and libraries installed on CloudBioLinux image.\n\nThis provides an output YAML file with pack"
},
{
"path": "cloudbio/package/__init__.py",
"chars": 3366,
"preview": "\"\"\"Install software and configure package managers.\n\"\"\"\nfrom __future__ import print_function\nimport os\n\n\ndef _configure"
},
{
"path": "cloudbio/package/brew.py",
"chars": 18385,
"preview": "\"\"\"Install packages via the MacOSX Homebrew and Linux Linuxbrew package manager.\nhttps://github.com/mxcl/homebrew\nhttps:"
},
{
"path": "cloudbio/package/conda.py",
"chars": 16250,
"preview": "\"\"\"Install packages via the Conda package manager: http://conda.pydata.org/\n\"\"\"\nfrom __future__ import print_function\nim"
},
{
"path": "cloudbio/package/cpan.py",
"chars": 3266,
"preview": "\"\"\"Install perl packages using CPAN and cpanminus (cpanm).\n\"\"\"\nimport os\n\nfrom fabric.api import cd, settings\n\nfrom clou"
},
{
"path": "cloudbio/package/deb.py",
"chars": 8239,
"preview": "\"\"\"\nAutomated installation on debian package systems with apt.\n\"\"\"\nfrom fabric.api import *\nfrom fabric.contrib.files im"
},
{
"path": "cloudbio/package/nix.py",
"chars": 2189,
"preview": "\"\"\"Install software with the Nix package manager.\n\"\"\"\nfrom fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom "
},
{
"path": "cloudbio/package/rpm.py",
"chars": 2343,
"preview": "\"\"\"Automated installation on RPM systems with the yum package manager.\n\"\"\"\nimport itertools\nfrom fabric.api import *\nfro"
},
{
"path": "cloudbio/package/shared.py",
"chars": 2250,
"preview": "\"\"\"Shared functionality useful for multiple package managers.\n\"\"\"\nfrom __future__ import print_function\nimport yaml\n\ndef"
},
{
"path": "cloudbio/utils.py",
"chars": 7086,
"preview": "\"\"\"Utilities for logging and progress tracking.\n\"\"\"\nimport logging\nimport os\nimport sys\n\nfrom fabric.main import load_se"
},
{
"path": "config/README.md",
"chars": 1791,
"preview": "This directory contains details of the software installed with\n[CloudBioLinux][1]. This is the right place to dig around"
},
{
"path": "config/biodata.yaml",
"chars": 5896,
"preview": "---\n# Configuration file defining biological data to retrieve and install.\n# These are stored in an Amazon S3 buckets:\n#"
},
{
"path": "config/chef/cookbooks/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "config/chef_recipes.yaml",
"chars": 90,
"preview": "minimal:\nglobus:\n - globus::client-tools\n - globus::myproxy\n - globus::gridftp-default\n"
},
{
"path": "config/custom.yaml",
"chars": 1970,
"preview": "---\n# List of custom programs to install that are not packaged. These each require\n# a install_name function in the appr"
},
{
"path": "config/fabricrc.txt",
"chars": 5059,
"preview": "# Configuration file for Fabric specifying filesystem information\n# This can be adjusted and passed to fab on the comman"
},
{
"path": "config/haskell-libs.yaml",
"chars": 419,
"preview": "---\n# Define haskell libraries to be installed via cabal.\ncabal:\n # base\n - network\n - cabal-install\n - cabal-dev\n "
},
{
"path": "config/main.yaml",
"chars": 876,
"preview": "---\n# Top level configuration file that specifies which groups of programs\n# should be installed. New sections that are "
},
{
"path": "config/node_extra.json",
"chars": 285,
"preview": "// Properties used by chef to configure node. Merged with fabric\n// properties to produce final node.json file used by C"
},
{
"path": "config/packages-debian.yaml",
"chars": 502,
"preview": "---\n# Debian packages use the standard packages.yaml list with naming substitutions\n# specified in this file.\nclojure: \""
},
{
"path": "config/packages-homebrew.yaml",
"chars": 815,
"preview": "# Packages available in the Homebrew and Linuxbrew package manager\n---\nbio_nextgen:\n alignment:\n - bwa\n - bowtie2"
},
{
"path": "config/packages-nix.yaml",
"chars": 294,
"preview": "# These are packages defined in Nix packages. Nix is an independent and\n# distribution agnostic packaging system, with m"
},
{
"path": "config/packages-scientificlinux.yaml",
"chars": 3061,
"preview": "minimal:\n version_control:\n - git-core\n - subversion\n - mercurial\n build:\n - cmake\n - gcc\n - gcc-c++"
},
{
"path": "config/packages-yum.yaml",
"chars": 4611,
"preview": "---\n# List of packages to be installed via the yum package manager.\nminimal:\n version_control:\n - git-core\n - sub"
},
{
"path": "config/packages.yaml",
"chars": 11210,
"preview": "---\n# List of packages to be installed via the package manager.\n#\n# This is derived from the Infochimps machetEC2 packag"
},
{
"path": "config/perl-libs.yaml",
"chars": 4505,
"preview": "---\n# Configuration file defining perl libraries to install via CPAN\ncpan:\n# build\n - 'Locale::Maketext::Lexicon'\n - '"
},
{
"path": "config/puppet/modules/.gitkeep",
"chars": 0,
"preview": ""
},
{
"path": "config/puppet_classes.yaml",
"chars": 428,
"preview": "minimal:\nbiocloudcentral:\n # Idea is still being developed, I wonder if actually defining a puppet class\n # like cloud"
},
{
"path": "config/python-libs.yaml",
"chars": 1179,
"preview": "---\n# Configuration file defining python specific libraries on pypi to install\n# using easy install\npypi:\n # infrastruc"
},
{
"path": "config/r-libs.yaml",
"chars": 2764,
"preview": "---\n# Configuration file defining R specific libraries that are installed \n# via CRAN and Bioconductor.\ncranrepo: http:/"
},
{
"path": "config/ruby-libs.yaml",
"chars": 1125,
"preview": "---\n# Configuration file defining ruby libraries to install\n# using gem install (avoiding the distribution package manag"
},
{
"path": "contrib/__init__.py",
"chars": 44,
"preview": "\"\"\"Module containing user contributions\n\"\"\"\n"
},
{
"path": "contrib/flavor/__init__.py",
"chars": 23,
"preview": "\"\"\"BioLinux flavors\"\"\"\n"
},
{
"path": "contrib/flavor/biocloudcentral/main.yaml",
"chars": 40,
"preview": "packages:\n - minimal\n - biocloudcentral\n"
},
{
"path": "contrib/flavor/biopython/custom.yaml",
"chars": 43,
"preview": "---\nbio_general:\n - anaconda\nbio_nextgen:\n"
},
{
"path": "contrib/flavor/biopython/fabricrc.txt",
"chars": 1210,
"preview": "# Configuration file for Fabric specifying filesystem information\n# This can be adjusted and passed to fab on the comman"
},
{
"path": "contrib/flavor/biopython/main.yaml",
"chars": 266,
"preview": "---\n# Flavor containing with minimal instructions to install tools for\n# running next-generation sequencing pipelines.\np"
},
{
"path": "contrib/flavor/biopython/packages-homebrew.yaml",
"chars": 195,
"preview": "# Packages available in the Homebrew and Linuxbrew package manager\n---\nbio_alignment:\n - clustal-w\n - clustal-omega\n "
},
{
"path": "contrib/flavor/biopython/packages.yaml",
"chars": 678,
"preview": "---\nminimal:\n build:\n - cmake\n - gcc\n - g++\n - gfortran\n - libtool\n - make\n - patch\n - pkg-conf"
},
{
"path": "contrib/flavor/biopython/python-libs.yaml",
"chars": 119,
"preview": "---\npypi:\n - MySQL-python\n - psycopg2\n - rdflib\n - reportlab\nconda:\n - ipython\n - matplotlib\n - numpy\n - pandas"
},
{
"path": "contrib/flavor/boinc/__init__.py",
"chars": 123,
"preview": "\"\"\"Boinc flavor\n\n Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl> and Steffen Moeller <moeller@debian.org>\n\n\"\""
},
{
"path": "contrib/flavor/boinc/boincflavor.py",
"chars": 710,
"preview": "from fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom cloudbio.flavor import Flavor\n\nfrom cloudbio.custom.sh"
},
{
"path": "contrib/flavor/boinc/fabricrc_debian.txt",
"chars": 406,
"preview": "edition = minimal\nedition_version = 0.10\nflavor_path = boinc.boincflavor\ndistribution = debian\ndist_name = squeeze\n\n# Up"
},
{
"path": "contrib/flavor/boinc/main.yaml",
"chars": 109,
"preview": "---\npackages:\n # - minimal : skip, as we don't need the build system\n - boinc\n - desktop-cloud\nlibraries:\n"
},
{
"path": "contrib/flavor/cloudman/README.md",
"chars": 4619,
"preview": "# CloudMan Flavors\n\nThis document briefly describes the CloudMan/[Galaxy][10] flavors of\nCloudBioLinux available and how"
},
{
"path": "contrib/flavor/cloudman/cloudman/main.yaml",
"chars": 50,
"preview": "---\npackages:\n - minimal\n - cloudman\nlibraries:\n"
},
{
"path": "contrib/flavor/cloudman/cloudman_and_galaxy/main.yaml",
"chars": 61,
"preview": "---\npackages:\n - minimal\n - cloudman\n - galaxy\nlibraries:\n"
},
{
"path": "contrib/flavor/cloudman/cloudman_and_galaxyp/main.yaml",
"chars": 97,
"preview": "---\npackages:\n - minimal\n - cloudman\n - galaxy\n - galaxyp\n - bio_proteomics_wine\nlibraries:\n"
},
{
"path": "contrib/flavor/cloudman/cloudman_desktop_and_galaxyp/main.yaml",
"chars": 154,
"preview": "---\npackages:\n - minimal\n - ruby\n# - python\n# - databases\n - cloudman\n - galaxy\n - galaxyp\n - java\n - desktop\nl"
},
{
"path": "contrib/flavor/cloudman/cloudman_desktop_and_galaxyp/ruby-libs.yaml",
"chars": 230,
"preview": "---\n# Configuration file defining ruby libraries to install\n# using gem install (avoiding the distribution package manag"
},
{
"path": "contrib/flavor/cloudman/migration_checklist.md",
"chars": 10167,
"preview": "This document is meant to layout work that is to be done and/or that\nhas been done in merging mi-deployment and galaxy-v"
},
{
"path": "contrib/flavor/cloudman/tools.yaml",
"chars": 2125,
"preview": "---\napplications:\n ## Coming from bio_nextgen.py\n # bwa: \"0.5.9\" # ToolShed installed\n ucsc_tools: \"default\"\n # bowt"
},
{
"path": "contrib/flavor/cwl_dockers/packages-bcbio-alignment.yaml",
"chars": 152,
"preview": "bcbio-process_alignment:\n- bwa\n- bwakit\n- novoalign\n- snap-aligner\n- samtools\n- sambamba\n- fgbio\n- umis\n- biobambam\nchan"
},
{
"path": "contrib/flavor/demo/README.md",
"chars": 75,
"preview": "Small example flavor to demonstrate ability to provide local installations\n"
},
{
"path": "contrib/flavor/demo/custom.yaml",
"chars": 16,
"preview": "---\nbio_nextgen:"
},
{
"path": "contrib/flavor/demo/fabricrc.txt",
"chars": 667,
"preview": "# -- Installation details\n\n# Global installation directory for packages and standard programs\nsystem_install = ~/tmp/cbl"
},
{
"path": "contrib/flavor/demo/main.yaml",
"chars": 124,
"preview": "---\n# Demonstration flavor for learning use\npackages:\n - minimal\n - libraries\n - python\n - java\n - ruby\n - bio_nex"
},
{
"path": "contrib/flavor/demo/packages-homebrew.yaml",
"chars": 193,
"preview": "# Packages available in the Homebrew and Linuxbrew package manager\n---\nbio_nextgen:\n alignment:\n - bwa\n utilities:\n"
},
{
"path": "contrib/flavor/edx_course/custom.yaml",
"chars": 27,
"preview": "---\nbio_nextgen:\n - picard"
},
{
"path": "contrib/flavor/edx_course/edx_setup.sh",
"chars": 1565,
"preview": "#!/bin/bash\nset -eu -o pipefail\n\n# Vagrant installation script for creating VM to use in the\n# edX variant analysis cour"
},
{
"path": "contrib/flavor/edx_course/fabricrc.txt",
"chars": 1124,
"preview": "# Configuration file for Fabric specifying filesystem information\n# This can be adjusted and passed to fab on the comman"
},
{
"path": "contrib/flavor/edx_course/main.yaml",
"chars": 241,
"preview": "---\n# Flavor containing with minimal instructions to install tools for\n# running next-generation sequencing pipelines.\np"
},
{
"path": "contrib/flavor/edx_course/packages-homebrew.yaml",
"chars": 399,
"preview": "# Packages available in the Homebrew and Linuxbrew package manager\n---\nminimal:\n - cmake\nperl:\nbio_nextgen:\n alignment"
},
{
"path": "contrib/flavor/edx_course/python-libs.yaml",
"chars": 22,
"preview": "---\npypi:\n - cutadapt"
},
{
"path": "contrib/flavor/globus/main.yaml",
"chars": 47,
"preview": "---\npackages:\n - minimal\n - globus\nlibraries:"
},
{
"path": "contrib/flavor/millstone/README.md",
"chars": 5858,
"preview": "The Millstone flavor of cloudbiolinux eases deploying the Church Lab's\n[Millstone](http://churchlab.github.io/millstone/"
},
{
"path": "contrib/flavor/millstone/__init__.py",
"chars": 0,
"preview": ""
},
{
"path": "contrib/flavor/millstone/custom.yaml",
"chars": 49,
"preview": "---\njava: []\n\npython: []\n\nmillstone:\n - unafold\n"
},
{
"path": "contrib/flavor/millstone/installer.py",
"chars": 10662,
"preview": "from fabric.api import *\nfrom fabric.contrib.files import *\nimport os.path\nimport os\n\nINSTALLATION_PATH = \"$HOME/millsto"
},
{
"path": "contrib/flavor/millstone/main.yaml",
"chars": 222,
"preview": "---\n# NOTE: rabbitmq and postgres installed manually.\npackages:\n - minimal\n - libraries\n - amazon\n - python\n - ruby"
},
{
"path": "contrib/flavor/millstone/millstoneflavor.py",
"chars": 1340,
"preview": "\"\"\"\nMillstone flavor.\n\"\"\"\n\nfrom fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom cloudbio.custom import shar"
},
{
"path": "contrib/flavor/millstone/python-libs.yaml",
"chars": 85,
"preview": "---\n# Millstone libaries installed from millstone's requirements/deploy.txt\npypi: []\n"
},
{
"path": "contrib/flavor/minimal/fabricrc_debian.txt",
"chars": 841,
"preview": "# BioNode configuration file for Fabric specifying filesystem information\n#\n# For example, to configure a minimal instal"
},
{
"path": "contrib/flavor/minimal/main.yaml",
"chars": 404,
"preview": "---\n# Top level configuration file that specifies which groups of programs\n# should be installed. New sections that are "
},
{
"path": "contrib/flavor/neuro/__init__.py",
"chars": 31,
"preview": "\"\"\"Neuroinformatics flavor\n\"\"\"\n"
},
{
"path": "contrib/flavor/neuro/custom.yaml",
"chars": 17,
"preview": "---\nbio_nextgen:\n"
},
{
"path": "contrib/flavor/neuro/fabricrc.txt",
"chars": 281,
"preview": "edition = minimal\nflavor_path = neuro.neuro\ndistribution = ubuntu\nuser = vagrant \n\n# Global installation directory for p"
},
{
"path": "contrib/flavor/neuro/main.yaml",
"chars": 37,
"preview": "---\npackages:\n - neuro\nlibraries:\n"
},
{
"path": "contrib/flavor/neuro/neuro.py",
"chars": 1276,
"preview": "from fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom cloudbio.flavor import Flavor\n\n# This flavour installs"
},
{
"path": "contrib/flavor/neuro/packages.yaml",
"chars": 9241,
"preview": "---\nneuro:\n - afni\n - afni-atlases\n - afni-common\n - afni-dbg\n - afni-dev\n - aghermann\n - ants\n - arno-iptables-"
},
{
"path": "contrib/flavor/ngs_pipeline_minimal/custom.yaml",
"chars": 66,
"preview": "---\nbio_nextgen:\n alignment:\n utilities:\n analysis:\n variant:\n"
},
{
"path": "contrib/flavor/ngs_pipeline_minimal/main.yaml",
"chars": 236,
"preview": "---\n# Flavor containing with minimal instructions to install tools for\n# running next-generation sequencing pipelines.\np"
},
{
"path": "contrib/flavor/ngs_pipeline_minimal/packages-conda.yaml",
"chars": 9638,
"preview": "---\n# some ideas how to debug solving issues\n# not pinning is best - easier to solve, assuming latest possible versions\n"
},
{
"path": "contrib/flavor/ngs_pipeline_minimal/packages-homebrew.yaml",
"chars": 1179,
"preview": "# Packages available in the Homebrew and Linuxbrew package manager\n---\n# Migrated to use conda/bioconda builds\nto_remove"
},
{
"path": "contrib/flavor/ngs_pipeline_minimal/perl-libs.yaml",
"chars": 1193,
"preview": "---\ncpan:\n # mirdeep2\n # - PDF::API2\n # ## VEP dependencies\n # don't need with conda perl\n # - parent\n # - Archive"
},
{
"path": "contrib/flavor/ngs_pipeline_minimal/r-libs.yaml",
"chars": 184,
"preview": "---\ncranrepo: http://cran.fhcrc.org/\ncran:\ncran-after-bioc:\n#biocrepo: http://bioconductor.org/biocLite.R\nbioc:\ngithub:\n"
},
{
"path": "contrib/flavor/phylogeny/__init__.py",
"chars": 86,
"preview": "\"\"\"Phylogeny flavor\n\n Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl>\n\n\"\"\"\n\n"
},
{
"path": "contrib/flavor/phylogeny/fabricrc_debian.txt",
"chars": 678,
"preview": "# Flavor targetting runningy Phylogeny programs in the Cloud,\n# such as recent edititions of mrbayes-mpi, raxml, BEAST a"
},
{
"path": "contrib/flavor/phylogeny/fabricrc_ubuntu.txt",
"chars": 680,
"preview": "# Flavor targetting runningy Phylogeny programs in the Cloud,\n# such as recent edititions of mrbayes-mpi, raxml, BEAST a"
},
{
"path": "contrib/flavor/phylogeny/install_debian.sh",
"chars": 671,
"preview": "#! /bin/sh \n#\n# Install the biolinux-phylogeny on a host\n#\n# Usage:\n#\n# ./contrib/flavor/phylogeny/install_host.sh use"
},
{
"path": "contrib/flavor/phylogeny/install_ubuntu.sh",
"chars": 671,
"preview": "#! /bin/sh \n#\n# Install the biolinux-phylogeny on a host\n#\n# Usage:\n#\n# ./contrib/flavor/phylogeny/install_host.sh use"
},
{
"path": "contrib/flavor/phylogeny/main.yaml",
"chars": 125,
"preview": "---\npackages:\n - minimal\n - desktop-cloud\n - editors\n - phylogeny\n # - phylogeny_testing - Debian only\n - r\nlibrar"
},
{
"path": "contrib/flavor/phylogeny/phylogenyflavor.py",
"chars": 1031,
"preview": "from fabric.api import *\nfrom fabric.contrib.files import *\nfrom fabfile import _freenx_scripts\n\nfrom cloudbio.flavor im"
},
{
"path": "contrib/flavor/phylogeny/virtualbox.md",
"chars": 1806,
"preview": "= Running the Phylogeny VM in Virtualbox\n\n== Install Virtualbox\n\nDownload and install Virtualbox from https://www.virtua"
},
{
"path": "contrib/flavor/pjotrp/__init__.py",
"chars": 22,
"preview": "\"\"\"Pjotr's flavors\"\"\"\n"
},
{
"path": "contrib/flavor/pjotrp/biotest/__init__.py",
"chars": 82,
"preview": "\"\"\"BioTest flavor\n\n Copyright (C) 2011 Pjotr Prins <pjotr.prins@thebird.nl>\n\"\"\"\n"
},
{
"path": "contrib/flavor/pjotrp/biotest/biotestflavor.py",
"chars": 1800,
"preview": "from fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom cloudbio.flavor import Flavor\n\nfrom cloudbio.custom.sh"
},
{
"path": "contrib/flavor/pjotrp/biotest/fabricrc_debian.txt",
"chars": 416,
"preview": "edition = minimal\nedition_version = 0.10\nflavor_path = pjotrp.biotest.biotestflavor\ndistribution = debian\ndist_name = sq"
},
{
"path": "contrib/flavor/pjotrp/biotest/main.yaml",
"chars": 435,
"preview": "---\n# Top level configuration file that specifies which groups of programs\n# should be installed. New sections that are "
},
{
"path": "contrib/flavor/proteomics/galaxyp/README.md",
"chars": 373,
"preview": "This directory contains a stand-alone (no CloudMan) Galaxy-P flavor\nfor installing Galaxy-P in desktop or cluster enviro"
},
{
"path": "contrib/flavor/proteomics/galaxyp/main.yaml",
"chars": 60,
"preview": "---\npackages:\n - minimal\n - galaxy\n - galaxyp\nlibraries:\n"
},
{
"path": "contrib/flavor/proteomics/galaxyp/settings-sample-galaxyp.yaml",
"chars": 766,
"preview": "--- "
},
{
"path": "contrib/flavor/proteomics/galaxyp/tools.yaml",
"chars": 2710,
"preview": "--- \n\n# This file contains the install procedures for Galaxy tools used by\n# the stock Galaxy-P configuration (hosted on"
},
{
"path": "contrib/flavor/proteomics/swift/custom.yaml",
"chars": 221,
"preview": "---\nbio_proteomics_wine:\n - proteomics_wine_env\n - proteowizard\nbio_proteomics:\n - idpqonvert\n - myrimatch\n# - tran"
},
{
"path": "contrib/flavor/proteomics/swift/main.yaml",
"chars": 305,
"preview": "---\npackages:\n # Editors, utils, build systems, system administration, etc..\n - minimal\n # Brings in JDK and maven\n "
},
{
"path": "contrib/flavor/proteomics/swift/r-libs.yaml",
"chars": 203,
"preview": "---\n# Configuration file defining R specific libraries that are installed \n# via CRAN and Bioconductor.\ncranrepo: http:/"
},
{
"path": "contrib/flavor/seal/__init__.py",
"chars": 19,
"preview": "\"\"\"Seal flavor\n\"\"\"\n"
},
{
"path": "contrib/flavor/seal/fabricrc_sl.txt",
"chars": 347,
"preview": "edition = minimal\n#edition_version = 0.10\nflavor_path = seal.sealflavor\ndistribution = scientificlinux\n#dist_name = \n\n# "
},
{
"path": "contrib/flavor/seal/main.yaml",
"chars": 77,
"preview": "---\npackages:\n - distributed\n - java\n - programming\n - python\nlibraries:\n"
},
{
"path": "contrib/flavor/seal/sealflavor.py",
"chars": 1798,
"preview": "from fabric.api import *\nfrom fabric.contrib.files import *\n\nfrom cloudbio.flavor import Flavor\n\nfrom cloudbio.custom.sh"
},
{
"path": "contrib/flavor/variantviz/custom.yaml",
"chars": 279,
"preview": "---\n# List of custom programs to install that are not packaged. These each require \n# a install_name function in the app"
},
{
"path": "contrib/flavor/variantviz/fabricrc.txt",
"chars": 1319,
"preview": "# Configuration file for Fabric specifying filesystem information\n# This can be adjusted and passed to fab on the comman"
},
{
"path": "contrib/flavor/variantviz/main.yaml",
"chars": 330,
"preview": "---\n# Top level configuration file that specifies which groups of programs\n# should be installed. New sections that are "
},
{
"path": "contrib/flavor/variantviz/packages-yum.yaml",
"chars": 502,
"preview": "---\n# List of packages to be installed via the yum package manager.\nprogramming:\n build:\n - gcc\n - gcc-c++\n - "
},
{
"path": "contributors.mkd",
"chars": 423,
"preview": "## Contributors\n\nAn open source project is nothing without the great people who generously\ncontribute time and expertise"
},
{
"path": "data_fabfile.py",
"chars": 3179,
"preview": "\"\"\"Fabric deployment file to install genomic data on remote instances.\n\nDesigned to automatically download and manage bi"
},
{
"path": "deploy/README.md",
"chars": 4193,
"preview": "# CloudBioLinux Deployer\n\nThis CloudBioLinux deployer has grown out of the galaxy-vm-launcher and\ncan be used to launch "
},
{
"path": "deploy/TODO",
"chars": 282,
"preview": "TODO: Refactor use_existing_instance out of cloud specific config, make command-line option\nTODO: Allow specification of"
},
{
"path": "deploy/Vagrantfile",
"chars": 4811,
"preview": "# -*- mode: ruby -*-\n# vi: set ft=ruby :\n\n# Vagrantfile API/syntax version. Don't touch unless you know what you're doin"
},
{
"path": "deploy/cloudman.html",
"chars": 3878,
"preview": "<h1>CloudBioLinux Deployer CloudMan QuickStart</h1>\n\n<p>As far as I can determine there is no current documentation on h"
},
{
"path": "deploy/cloudman.md",
"chars": 6363,
"preview": "# CloudBioLinux Deployer CloudMan QuickStart\n\nAs far as I can determine there is no current documentation on how to buil"
},
{
"path": "deploy/config/tool_data_table_conf.xml",
"chars": 5792,
"preview": "<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision"
},
{
"path": "deploy/deploy.sh",
"chars": 834,
"preview": "#!/bin/bash\n\n# Name of virtualenv to create using virtualenvwrapper\nVIRTUALENV_NAME=cbl_deploy\n\n# Ensure working directo"
},
{
"path": "deploy/deploy_bourne.sh",
"chars": 1294,
"preview": "#!/bin/sh\n\nVIRTUALENV_VERSION=${VIRTUALENV_VERSION:-1.10.1}\n\ncd `dirname $0`\nPROJECT_DIRECTORY=${PROJECT_DIRECTORY:-`pwd"
},
{
"path": "deploy/deploy_no_deps.sh",
"chars": 187,
"preview": "#!/bin/sh\n\nexport PROJECT_DIRECTORY=\".\"\n\n# Add cloudbiolinux to python path and run deployment.\nexport PYTHONPATH=..:$PY"
},
{
"path": "deploy/requirements.txt",
"chars": 69,
"preview": "fabric\nparamiko==2.10.1\nargparse\npyyaml\napache-libcloud==0.11.3\nboto\n"
},
{
"path": "deploy/settings-sample-cm.yaml",
"chars": 4962,
"preview": "--- "
},
{
"path": "deploy/settings-sample-minimal.yaml",
"chars": 1826,
"preview": " --- "
},
{
"path": "deploy/settings-sample-oldgalaxyvmlauncher.yaml",
"chars": 10699,
"preview": "---\n\n## Set VM provider, default is aws. Other options include\n## vagrant, openstack, and eucalyptus. Specify connection"
},
{
"path": "deploy/test_install_galaxy_tool.py",
"chars": 2616,
"preview": "\"\"\"\nTest script for building Python API for installing Galaxy tools using\nCBL without any dependencies (i.e. it clones d"
},
{
"path": "deploy/update_dependencies.sh",
"chars": 274,
"preview": "#!/bin/bash\n\n# Name of virtualenv to create using virtualenvwrapper \nVIRTUALENV_NAME=cbl_deploy\n\n# Configure virtualenv "
},
{
"path": "doc/Makefile",
"chars": 5601,
"preview": "# Makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line.\nSPHINXOPTS =\nSPHINXBUILD "
},
{
"path": "doc/hacking.md",
"chars": 10114,
"preview": "# Hacking BioLinux tips and tricks\n\nThe BioLinux tools allow building a full environment for Bioinformatics. The\ndesign "
},
{
"path": "doc/intro/FAQ.tex",
"chars": 167,
"preview": "\n\n\n\nAre there online or local user groups for Amazon Web Services?\n\n\n\n\nUser groups are listed on the \\href{https://aws.a"
},
{
"path": "doc/intro/README",
"chars": 309,
"preview": "These files make up an introductory tutorial about running CloudBioLinux on Amazon EC2. \n\nThe controlling file is called"
},
{
"path": "doc/intro/basicTerminology.aux",
"chars": 2030,
"preview": "\\relax \n\\@writefile{toc}{\\contentsline {section}{\\numberline {A.2}Basic terminology}{29}{section.A.2}}\n\\@writefile{toc}{"
},
{
"path": "doc/intro/basicTerminology.tex",
"chars": 4445,
"preview": "\\section{Basic terminology}\n\\paragraph{}One of the most daunting parts of starting to work on the cloud is the abundance"
},
{
"path": "doc/intro/cloudbl_desktopIntro.aux",
"chars": 2062,
"preview": "\\relax \n\\@writefile{toc}{\\contentsline {section}{\\numberline {A.1}The CloudBioLinux Desktop}{27}{section.A.1}}\n\\newlabel"
},
{
"path": "doc/intro/cloudbl_desktopIntro.tex",
"chars": 2699,
"preview": "\\section{The CloudBioLinux Desktop}\n\\label{section:cloudblDesktop}\n\\paragraph{}This section provides only a few tips abo"
},
{
"path": "doc/intro/getReady.aux",
"chars": 2845,
"preview": "\\relax \n\\newlabel{SC@1}{{1.1}{3}{\\relax }{section*.2}{}}\n\\@writefile{lof}{\\contentsline {figure}{\\numberline {1.1}{\\igno"
},
{
"path": "doc/intro/getReady.tex",
"chars": 6105,
"preview": "\\section{Get an Amazon AWS account}\n\\label{section:getaccount}\n\\paragraph{}Anyone can set up an account with Amazon to a"
},
{
"path": "doc/intro/gettingStarted_CloudBioLinux.aux",
"chars": 1244,
"preview": "\\relax \n\\ifx\\hyper@anchor\\@undefined\n\\global \\let \\oldcontentsline\\contentsline\n\\gdef \\contentsline#1#2#3#4{\\oldcontents"
},
{
"path": "doc/intro/gettingStarted_CloudBioLinux.out",
"chars": 1985,
"preview": "\\BOOKMARK [0][-]{chapter.1}{Preparing to work on the Amazon Cloud}{}\n\\BOOKMARK [1][-]{section.1.1}{Get an Amazon AWS acc"
}
]
// ... and 258 more files (download for full content)
About this extraction
This page contains the full source code of the chapmanb/cloudbiolinux GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 458 files (1.7 MB), approximately 538.7k tokens, and a symbol index with 940 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.