[
  {
    "path": ".Rbuildignore",
    "content": ".travis.yml\n.svnignore\n.gitignore\n^.*\\.DS_Store\nMakefile\nREADME.Rmd\nappveyor.yml\nGEODATA\ndocs\nmkdocs\n.github\n^CONDUCT\\.md$\n"
  },
  {
    "path": ".github/issue_template.md",
    "content": "### Prerequisites\n\n+ [ ] Have you read [Feedback](https://guangchuangyu.github.io/chipseeker/#feedback) and follow the [guide](https://guangchuangyu.github.io/2016/07/how-to-bug-author/)?\n\t* [ ] make sure your are using the latest release version\n\t* [ ] read the [documents](https://guangchuangyu.github.io/chipseeker/documentation/)\n\t* [ ] google your quesion/issue\n\n### Describe you issue\n\n* [ ] Make a reproducible example (*e.g.* [1](https://gist.github.com/talonsensei/e1fad082657054207f249ec98f0920eb))\n* [ ] your code should contain comments to describe the problem (*e.g.* what expected and actually happened?)\n\n\n### Ask in right place\n\n* [ ] for bugs or feature requests, post here (github issue)\n* [ ] for questions, please post to [Bioconductor](https://support.bioconductor.org/) or [Biostars](https://www.biostars.org/) with tag `ChIPseeker`\n"
  },
  {
    "path": ".gitignore",
    "content": ".DS_Store\ndata/.DS_Store\ninst/.DS_Store\ninst/extdata/.DS_Store\ninst/extdata/GEO_sample_data/.DS_Store\n.svn\n*~\ndocs/__init__.py\ndocs/__pycache__\n__init__.pyc\n"
  },
  {
    "path": ".svnignore",
    "content": ".git\n*.Rhistory\n.travis.yml\nappveyor.yml\ndocs\nmkdocs\n.github\n"
  },
  {
    "path": ".travis.yml",
    "content": "## reference: http://docs.travis-ci.com/user/languages/r/\n\nlanguage: r\nr: bioc-devel\n  \ncache: packages\nbioc_required: true\nbioc_use_devel: true\n\nos:\n  - linux\n  - osx\n\n\nenv:\n  global:\n    - _R_CHECK_FORCE_SUGGESTS_=False\n    - R_LIBS=\"http://cran.rstudio.com\"\n\nr_packages:\n    - knitr\n    - rmarkdown\n\nbioc_packages:\n    - DO.db\n    - DOSE\n    - graphite\n    - ReactomePA\n    - org.Hs.eg.db\n    - TxDb.Hsapiens.UCSC.hg19.knownGene\n    - GenomicRanges\n    - GenomicFeatures\n    \nafter_failure:\n  - ./travis-tool.sh dump_logs\n\nr_github_packages:\n  - jimhester/covr\n\nafter_success:\n  - Rscript -e 'library(covr); codecov()'\n\nnotifications:\n  email:\n    recipients: gcyu@connect.hku.hk\n    on_success: never\n    on_failure: always\n\n"
  },
  {
    "path": "CONDUCT.md",
    "content": "# Contributor Code of Conduct\n\nAs contributors and maintainers of this project, we pledge to respect all people who \ncontribute through reporting issues, posting feature requests, updating documentation,\nsubmitting pull requests or patches, and other activities.\n\nWe are committed to making participation in this project a harassment-free experience for\neveryone, regardless of level of experience, gender, gender identity and expression,\nsexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.\n\nExamples of unacceptable behavior by participants include the use of sexual language or\nimagery, derogatory comments or personal attacks, trolling, public or private harassment,\ninsults, or other unprofessional conduct.\n\nProject maintainers have the right and responsibility to remove, edit, or reject comments,\ncommits, code, wiki edits, issues, and other contributions that are not aligned to this \nCode of Conduct. Project maintainers who do not follow the Code of Conduct may be removed \nfrom the project team.\n\nInstances of abusive, harassing, or otherwise unacceptable behavior may be reported by \nopening an issue or contacting one or more of the project maintainers.\n\nThis Code of Conduct is adapted from the Contributor Covenant \n(http:contributor-covenant.org), version 1.0.0, available at \nhttp://contributor-covenant.org/version/1/0/0/\n"
  },
  {
    "path": "DESCRIPTION",
    "content": "Package: ChIPseeker\nType: Package\nTitle: ChIPseeker for ChIP peak Annotation, Comparison, and Visualization\nVersion: 1.49.0\nAuthors@R: c( \n    person(given = \"Guangchuang\", family = \"Yu\",       email = \"guangchuangyu@gmail.com\",      role  = c(\"aut\", \"cre\"), comment = c(ORCID = \"0000-0002-6485-8781\")),\n    person(given = \"Ming\",        family = \"Li\",       email = \"limiang929@gmail.com\",         role = \"ctb\"),\n    person(given = \"Qianwen\",     family = \"Wang\",     email = \"treywea@gmail.com\",            role = \"ctb\"),\n    person(given = \"Yun\",         family = \"Yan\",      email = \"youryanyun@gmail.com\",         role = \"ctb\"),\n    person(given = \"Hervé\",       family = \"Pagès\",    email = \"hpages.on.github@gmail.com\",   role  = \"ctb\"),\n    person(given = \"Michael\",     family = \"Kluge\",    email = \"michael.kluge@bio.ifi.lmu.de\", role  = \"ctb\"),\n    person(given = \"Thomas\",      family = \"Schwarzl\", email = \"schwarzl@embl.de\",             role  = \"ctb\"),\n    person(given = \"Zhougeng\",    family = \"Xu\",       email = \"xuzhougeng@163.com\",           role  = \"ctb\"),\n\tperson(given = \"Chun-Hui\",    family = \"Gao\",      email=\"gaospecial@gmail.com\",           role  = \"ctb\", comment=c(ORCID = \"0000-0002-1445-7939\"))\n\t)\nMaintainer: Guangchuang Yu <guangchuangyu@gmail.com>\nDescription: This package implements functions to retrieve the nearest genes\n    around the peak, annotate genomic region of the peak, statstical methods\n    for estimate the significance of overlap among ChIP peak data sets, and\n    incorporate GEO database for user to compare the own dataset with those\n    deposited in database. The comparison can be used to infer cooperative\n    regulation and thus can be used to generate hypotheses. Several\n    visualization functions are implemented to summarize the coverage of the\n    peak experiment, average profile and heatmap of peaks binding to TSS\n    regions, genomic annotation, distance to TSS, and overlap of peaks or\n    genes.\nDepends:\n    R (>= 3.5.0)\nImports:\n    AnnotationDbi,\n    aplot,\n    BiocGenerics,\n    boot,\n    dplyr,\n    enrichplot,\n    IRanges,\n    GenomeInfoDb,\n    GenomicRanges,\n    GenomicFeatures,\n    ggplot2,\n    gplots,\n    graphics,\n    grDevices,\n    gtools,\n    magrittr,\n    methods,\n    plotrix,\n    parallel,\n    RColorBrewer,\n    rlang,\n    rtracklayer,\n    S4Vectors,\n    scales,\n    stats,\n    tibble,\n    TxDb.Hsapiens.UCSC.hg19.knownGene,\n    utils,\n    yulab.utils (>= 0.2.0)\nSuggests:\n    clusterProfiler,\n    ggimage,\n    ggplotify,\n    ggupset,\n    ggVennDiagram,\n    knitr,\n    org.Hs.eg.db,\n    prettydoc,\n    ReactomePA,\n    rmarkdown,\n    testthat,\n    TxDb.Hsapiens.UCSC.hg38.knownGene\nRemotes:\n    GuangchuangYu/enrichplot\nURL: https://yulab-smu.top/contribution-knowledge-mining/\nBugReports: https://github.com/YuLab-SMU/ChIPseeker/issues\nEncoding: UTF-8\nVignetteBuilder: knitr\nByteCompile: true\nLicense: Artistic-2.0\nbiocViews: Annotation, ChIPSeq, Software, Visualization, MultipleComparison\nRoxygenNote: 7.3.3\n"
  },
  {
    "path": "GEODATA",
    "content": "UPDATE OF GEO DATA\n + 20947 bed file information in ChIPseeker (version >=1.9.8) <2016-09-20, Tue>\n + 19348 bed file information in ChIPseeker (version >= 1.7.15) <2016-03-21, Mon>\n + 18813 bed file information in ChIPseeker (version >= 1.5.11, BioC 3.2 devel) <2015-09-24, Thu>\n + >17,726 bed file information in ChIPseeker (version>=1.4.0, BioC 3.1)\n + >15,000 bed file information ChIPseeker (version>=1.2.0, BioC 3.0)\n"
  },
  {
    "path": "Makefile",
    "content": "PKGNAME := $(shell sed -n \"s/Package: *\\([^ ]*\\)/\\1/p\" DESCRIPTION)\nPKGVERS := $(shell sed -n \"s/Version: *\\([^ ]*\\)/\\1/p\" DESCRIPTION)\nPKGSRC  := $(shell basename `pwd`)\nBIOCVER := RELEASE_3_23\n\n\nall: rd check clean\n\nalldocs: rd readme\n\nrd:\n\tRscript -e 'roxygen2::roxygenise(\".\")'\n\nreadme:\n\tRscript -e 'rmarkdown::render(\"README.Rmd\", encoding=\"UTF-8\")'\n\nbuild:\n\t#cd ..;\\\n\t#R CMD build $(PKGSRC)\n\tRscript -e 'devtools::build()'\n\t\nbuild2:\n\tcd ..;\\\n\tR CMD build --no-build-vignettes $(PKGSRC)\n\ninstall:\n\tcd ..;\\\n\tR CMD INSTALL $(PKGNAME)_$(PKGVERS).tar.gz\n\ncheck: \n\tRscript -e 'devtools::check()'\n\t## cd ..;\\\n\t## Rscript -e 'rcmdcheck::rcmdcheck(\"$(PKGNAME)_$(PKGVERS).tar.gz\")'\n\ncheck2: build\n\tcd ..;\\\n\tR CMD check $(PKGNAME)_$(PKGVERS).tar.gz\n\nbioccheck:\n\tcd ..;\\\n\tRscript -e 'BiocCheck::BiocCheck(\"$(PKGNAME)_$(PKGVERS).tar.gz\")'\n\nclean:\n\tcd ..;\\\n\t$(RM) -r $(PKGNAME).Rcheck/\n\n\ngitmaintain:\n\tgit gc --auto;\\\n\tgit prune -v;\\\n\tgit fsck --full\n\nrmrelease:\n\tgit branch -D $(BIOCVER)\n\nrelease:\n\tgit checkout $(BIOCVER);\\\n\tgit fetch --all\n\n\nupdate:\n\tgit fetch --all;\\\n\tgit checkout devel;\\\n\tgit merge upstream/devel;\\\n\tgit merge origin/devel\n\nbiocinit:\n\tgit remote add upstream git@git.bioconductor.org:packages/$(PKGNAME).git;\\\n\tgit fetch --all\n\npush:\n\tgit push upstream devel;\\\n\tgit push origin devel\n\n\n"
  },
  {
    "path": "NAMESPACE",
    "content": "# Generated by roxygen2: do not edit by hand\n\nS3method(arrange,GRanges)\nS3method(as.data.frame,csAnno)\nS3method(filter,GRanges)\nS3method(mutate,GRanges)\nS3method(rename,GRanges)\nS3method(subset,csAnno)\nexport(.)\nexport(GRangesList)\nexport(annotatePeak)\nexport(as.GRanges)\nexport(combine_csAnno)\nexport(covplot)\nexport(downloadGEObedFiles)\nexport(downloadGSMbedFiles)\nexport(dropAnno)\nexport(enrichAnnoOverlap)\nexport(enrichPeakOverlap)\nexport(getAnnoStat)\nexport(getBioRegion)\nexport(getGEOInfo)\nexport(getGEOgenomeVersion)\nexport(getGEOspecies)\nexport(getPromoters)\nexport(getSampleFiles)\nexport(getTagMatrix)\nexport(makeBioRegionFromGranges)\nexport(overlap)\nexport(peakHeatmap)\nexport(peakHeatmap_multiple_Sets)\nexport(peak_Profile_Heatmap)\nexport(plotAnnoBar)\nexport(plotAnnoPie)\nexport(plotAnnoPie.csAnno)\nexport(plotAvgProf)\nexport(plotAvgProf2)\nexport(plotDistToTSS)\nexport(plotPeakProf)\nexport(plotPeakProf2)\nexport(readPeakFile)\nexport(rel)\nexport(seq2gene)\nexport(shuffle)\nexport(tagHeatmap)\nexport(vennpie)\nexport(vennplot)\nexport(vennplot.peakfile)\nexportClasses(csAnno)\nexportMethods(plotAnnoBar)\nexportMethods(plotAnnoPie)\nexportMethods(plotDistToTSS)\nexportMethods(show)\nexportMethods(upsetplot)\nexportMethods(vennpie)\nimport(BiocGenerics)\nimport(GenomeInfoDb)\nimport(GenomicRanges)\nimport(IRanges)\nimport(S4Vectors)\nimportFrom(AnnotationDbi,get)\nimportFrom(AnnotationDbi,select)\nimportFrom(BiocGenerics,end)\nimportFrom(BiocGenerics,start)\nimportFrom(GenomicFeatures,exonsBy)\nimportFrom(GenomicFeatures,fiveUTRsByTranscript)\nimportFrom(GenomicFeatures,genes)\nimportFrom(GenomicFeatures,intronsByTranscript)\nimportFrom(GenomicFeatures,threeUTRsByTranscript)\nimportFrom(GenomicFeatures,transcripts)\nimportFrom(GenomicFeatures,transcriptsBy)\nimportFrom(GenomicRanges,GRangesList)\nimportFrom(S4Vectors,metadata)\nimportFrom(S4Vectors,subset)\nimportFrom(TxDb.Hsapiens.UCSC.hg19.knownGene,TxDb.Hsapiens.UCSC.hg19.knownGene)\nimportFrom(aplot,insert_bottom)\nimportFrom(aplot,plot_list)\nimportFrom(boot,boot)\nimportFrom(boot,boot.ci)\nimportFrom(dplyr,arrange)\nimportFrom(dplyr,filter)\nimportFrom(dplyr,group_by)\nimportFrom(dplyr,mutate)\nimportFrom(dplyr,summarise)\nimportFrom(enrichplot,upsetplot)\nimportFrom(ggplot2,aes)\nimportFrom(ggplot2,aes_)\nimportFrom(ggplot2,aes_string)\nimportFrom(ggplot2,coord_fixed)\nimportFrom(ggplot2,coord_flip)\nimportFrom(ggplot2,element_blank)\nimportFrom(ggplot2,element_text)\nimportFrom(ggplot2,facet_grid)\nimportFrom(ggplot2,geom_bar)\nimportFrom(ggplot2,geom_blank)\nimportFrom(ggplot2,geom_hline)\nimportFrom(ggplot2,geom_line)\nimportFrom(ggplot2,geom_rect)\nimportFrom(ggplot2,geom_ribbon)\nimportFrom(ggplot2,geom_segment)\nimportFrom(ggplot2,geom_text)\nimportFrom(ggplot2,geom_tile)\nimportFrom(ggplot2,geom_vline)\nimportFrom(ggplot2,ggplot)\nimportFrom(ggplot2,ggtitle)\nimportFrom(ggplot2,guide_legend)\nimportFrom(ggplot2,labs)\nimportFrom(ggplot2,rel)\nimportFrom(ggplot2,scale_color_manual)\nimportFrom(ggplot2,scale_fill_brewer)\nimportFrom(ggplot2,scale_fill_distiller)\nimportFrom(ggplot2,scale_fill_hue)\nimportFrom(ggplot2,scale_fill_manual)\nimportFrom(ggplot2,scale_x_continuous)\nimportFrom(ggplot2,scale_y_continuous)\nimportFrom(ggplot2,theme)\nimportFrom(ggplot2,theme_bw)\nimportFrom(ggplot2,theme_classic)\nimportFrom(ggplot2,theme_minimal)\nimportFrom(ggplot2,xlab)\nimportFrom(ggplot2,xlim)\nimportFrom(ggplot2,ylab)\nimportFrom(gplots,plot.venn)\nimportFrom(grDevices,colorRampPalette)\nimportFrom(graphics,layout)\nimportFrom(graphics,legend)\nimportFrom(graphics,par)\nimportFrom(graphics,pie)\nimportFrom(graphics,plot.new)\nimportFrom(gtools,permutations)\nimportFrom(magrittr,\"%<>%\")\nimportFrom(magrittr,\"%>%\")\nimportFrom(methods,as)\nimportFrom(methods,is)\nimportFrom(methods,missingArg)\nimportFrom(methods,new)\nimportFrom(methods,show)\nimportFrom(parallel,detectCores)\nimportFrom(parallel,mclapply)\nimportFrom(plotrix,floating.pie)\nimportFrom(rlang,.data)\nimportFrom(rlang,quos)\nimportFrom(rtracklayer,import.chain)\nimportFrom(rtracklayer,liftOver)\nimportFrom(stats,p.adjust)\nimportFrom(stats,phyper)\nimportFrom(utils,data)\nimportFrom(utils,download.file)\nimportFrom(utils,read.delim)\nimportFrom(utils,setTxtProgressBar)\nimportFrom(utils,txtProgressBar)\nimportFrom(yulab.utils,get_cache_element)\nimportFrom(yulab.utils,get_cache_item)\nimportFrom(yulab.utils,initial_cache_item)\nimportFrom(yulab.utils,mat2df)\nimportFrom(yulab.utils,rm_cache_item)\nimportFrom(yulab.utils,update_cache_item)\nimportFrom(yulab.utils,yulab_msg)\n"
  },
  {
    "path": "NEWS",
    "content": "CHANGES IN VERSION 1.15.2\n------------------------\n o bug fixed for 'overlap = \"all\"' to consider strand information <2017-12-12, Tue>\n\nCHANGES IN VERSION 1.15.1\n------------------------\n o define downstream distance via options(ChIPseeker.downstreamDistance = 3000)\n   + https://support.bioconductor.org/p/103135/\n\nCHANGES IN VERSION 1.13.1\n------------------------\n o fixed issue of naming intronList <2017-07-06, Thu>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/57#issuecomment-313342399\n\nCHANGES IN VERSION 1.12.0\n------------------------\n o BioC 3.5 release <2017-04-26, Wed>\n\nCHANGES IN VERSION 1.11.4\n------------------------\n o bug fixed of intron rank <2017-04-19, Wed>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/54\n\nCHANGES IN VERSION 1.11.3\n------------------------\n o bug fixed of dropAnno <2017-04-10, Mon>\n o bug fixed of peak width generated by shuffle <2017-03-31, Fri>\n   + <https://github.com/GuangchuangYu/ChIPseeker/issues/51>\n\nCHANGES IN VERSION 1.11.2\n------------------------\n o optimize getGeneAnno <2016-12-21, Wed>\n o change plotAnnoBar and plotDistToTSS according to stacking bar order change in ggplot2 (v2.2.0) <2016-12-16, Fri>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/47\n   + https://blog.rstudio.org/2016/11/14/ggplot2-2-2-0/\n\nCHANGES IN VERSION 1.11.1\n------------------------\n o update startup message <2016-11-09, Wed>\n\nCHANGES IN VERSION 1.10.0\n------------------------\n o BioC 3.4 released <2016-10-18, Tue>\n\nCHANGES IN VERSION 1.9.8\n------------------------\n o plotAvgProf/plotAvgProf2 order of panel by names of input tagMatrix List <2016-09-25, Sun>\n o test ENSEMBL ID using '^ENS' instead of '^ENSG' <2016-09-20, Tue>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/41\n\nCHANGES IN VERSION 1.9.7\n------------------------\n o unit test <2016-08-16, Tue>\n\nCHANGES IN VERSION 1.9.6\n------------------------\n o update vignette <2016-08-16, Tue>\n\nCHANGES IN VERSION 1.9.5\n------------------------\n o when TxDb doesn't have gene_id information, converting gene ID (ensembl/entrez and symbol) will be omitted instead of throw error. <2016-08-02, Tue>\n   + https://www.biostars.org/p/204142\n o bug fixed if testing targetPeak is a list of GRanges objects in enrichPeakOverlap function <2016-07-20, Wed>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/37\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/36\n o fixed typo in determine gene ID type <2016-06-21, Tue>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/28#issuecomment-227212519\n o move upsetplot generics to DOSE and import from DOSE to prevent function name conflict <2016-06-14, Tue>\n\nCHANGES IN VERSION 1.9.4\n------------------------\n o bug fixed <2016-06-08, Wed>\n   + https://github.com/GuangchuangYu/ChIPseeker/issues/17#issuecomment-224407402\n   + https://github.com/GuangchuangYu/ChIPseeker/pull/24/files\n\nCHANGES IN VERSION 1.9.3\n------------------------\n o use byte compiler <2016-05-18, Wed>\n o https://github.com/Bioconductor-mirror/ChIPseeker/commit/f1ada57b9c66a1a44355bbbbdaf5b0a88e10cf7d\n\nCHANGES IN VERSION 1.9.2\n------------------------\n o name tagMatrix in plotAvgProf automatically if missing <2016-05-12, Thu>\n o https://github.com/Bioconductor-mirror/ChIPseeker/commit/d5f16b2bc01725e30282c3acb33007ef521a514c\n\nCHANGES IN VERSION 1.9.1\n------------------------\n o bug fixed in getNearestFeatureIndicesAndDistances <2016-05-11, Wed>\n   + correct metadata in dummy NA feature\n\nCHANGES IN VERSION 1.8.0\n------------------------\n o BioC 3.3 released <2016-05-05, Thu>\n\nCHANGES IN VERSION 1.7.15\n------------------------\n o update GEO data <2016-03-21, Mon>\n\nCHANGES IN VERSION 1.7.14\n------------------------\n o list_to_dataframe works with data frames that have different colnames <2016-03-10, Thu>\n\nCHANGES IN VERSION 1.7.13\n------------------------\n o support annotate peaks with custom regions via passing TxDb=user_defined_GRanges to annotatePeak <2016-03-06, Sun>\n\nCHANGES IN VERSION 1.7.12\n------------------------\n o fixed R check <2016-03-05, Sat>\n o implement list_to_dataframe that mimic ldply and remove ldply dependency <2016-03-05, Sat>\n\nCHANGES IN VERSION 1.7.11\n------------------------\n o fixed issue in testing list in covplot introduced in 1.7.9 <2016-03-02, Wed>\n\nCHANGES IN VERSION 1.7.10\n------------------------\n o determined gene ID type if TxDb doesn't contain corresponding metadata <2016-03-01, Tue>\n   + fixed https://github.com/GuangchuangYu/ChIPseeker/issues/28\n\nCHANGES IN VERSION 1.7.9\n------------------------\n o covplot support GRangesList <2016-02-24, Wed>\n o update ReactomePA citation info <2016-02-17, Wed>\n\nCHANGES IN VERSION 1.7.8\n------------------------\n o fixed BUG of Peaks upstream first or downstream last gene not annotated <2016-01-20, Wed>\n   + contributed by Michael Kluge\n   + see https://github.com/GuangchuangYu/ChIPseeker/pull/24\n\nCHANGES IN VERSION 1.7.7\n------------------------\n o bug fixed in newly introduced parameter 'overlap'. solve NA issue. <2016-01-13, Wed>\n\nCHANGES IN VERSION 1.7.6\n------------------------\n o introduce 'overlap' parameter in annotatePeak, by default overlap=\"TSS\" and only overlap with TSS will be reported as the nearest gene.\n   if overlap=\"all\", then gene overlap with peak will be reported as nearest gene, no matter the overlap is at TSS region or not. <2016-01-12, Tue>\n o bug fixed in find overlap with peaks have strand info. <2016-01-12, Tue>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/23\n\nCHANGES IN VERSION 1.7.5\n------------------------\n o add paramters, sameStrand,ignoreOverlap, ignoreUpstream and ignoreDownstream in annotatePeak <2016-01-10, Sun>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/17\n o bug fixed in peak orientation <2016-01-10, Sun>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/22\n\nCHANGES IN VERSION 1.7.4\n------------------------\n o stop if input list of csAnno object has no name attribute\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/21\n   + plotAnnoBar\n   + plotDistToTSS\n o [covplot] xlim now not only restrict the window of data but also set the limit of the graphic object <2015-12-30, Wed>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/20\n\nCHANGES IN VERSION 1.7.3\n------------------------\n o fixed R check <2015-12-29, Tue>\n\nCHANGES IN VERSION 1.7.2\n------------------------\n o use geom_rect instead of geom_segment in covplot <2015-11-30, Mon>\n o open lower parameter (by default =1) to specific lower cutoff of coverage signal <2015-11-29, Sun>\n o fixed covplot to work with None RleViews of specific chromosome <2015-11-29, Sun>\n o addFlankGeneInfo now works with level=\"gene\" <2015-11-19, Thu>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/18\n\nCHANGES IN VERSION 1.7.1\n------------------------\n o fixed extracting ID type from TxDb object, since the change of metadata(TxDb). now using grep to extract. <2015-10-27, Tue>\n o add vp parameter to set viewport of vennpie on top of upsetplot by user request <2015-10-26, Mon>\n   + see http://ygc.name/2015/07/28/upsetplot-in-chipseeker/#comment-19470\n o getBioRegion function <2015-10-20, Tue>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/16\n\nCHANGES IN VERSION 1.7.0\n------------------------\n o BioC 3.3 branch\n\nCHANGES IN VERSION 1.5.11\n------------------------\n o remove ellipsis parameter in enrichPeakOverlap function and extend it to support GRanges objects <2015-10-08, Thu>\n    + see https://support.bioconductor.org/p/73069/\n o fixed the issue, https://github.com/GuangchuangYu/ChIPseeker/issues/13 <2015-10-05, Mon>\n o update GEO info, now contains >18,000 bed file information <2015-09-24, Thu>\n\nCHANGES IN VERSION 1.5.10\n------------------------\n o dropAnno function, eg. drop nearest gene annotation that far from TSS (>10k). <2015-09-17, Thu>\n   + see https://github.com/GuangchuangYu/ChIPseeker/issues/9\n   + add parameter distanceToTSS_cutoff to enrichAnnoOverlap\n o use base::subset in plotDistToTSS instead of subsetting data within geom_bar <2015-09-17, Thu>\n   + see https://github.com/hadley/ggplot2/issues/1295\n   + subset parameter in layer will be removed in next release of ggplot2.\n\nCHANGES IN VERSION 1.5.9\n------------------------\n o bug fixed of enrichAnnoOverlap <2015-08-26, Wed>\n o change parameter order.matrix to order.by in upsetplot to meet the change of UpSetR pkg <2015-08-26, Wed>\n\nCHANGES IN VERSION 1.5.8\n------------------------\n o better implementation of getFirstHitIndex.  <2015-07-29, Wed>\n   + contributed by Herve Pages.\n   + see https://support.bioconductor.org/p/70432/#70545.\n\nCHANGES IN VERSION 1.5.7\n------------------------\n o add vennpie parameter in upsetplot <2015-07-20, Mon>\n o upsetplot function for csAnno object <2015-07-20, Mon>\n\nCHANGES IN VERSION 1.5.6\n------------------------\n o update citation info <2015-07-09, Thu>\n o BED file +1 shift for BED coordinate system start at 0 <2015-07-07, Tue>\n\nCHANGES IN VERSION 1.5.5\n------------------------\n o seq2gene for linking genomic regions to genes by many-to-many mapping. <2015-06-29, Mon>\n\nCHANGES IN VERSION 1.5.4\n------------------------\n o add pseudocount in enrichPeakOverlap to prevent 0 pvalue <2015-05-22, Fri>\n\nCHANGES IN VERSION 1.5.3\n------------------------\n o convert the vignette from Rnw to Rmd format <2015-05-17, Sun>\n\nCHANGES IN VERSION 1.5.1\n------------------------\n o minor bug fixed in getChrCov <2015-04-27, Mon>\n\nCHANGES IN VERSION 1.3.15\n------------------------\n o update vignette <2015-03-31, Tue>\n\nCHANGES IN VERSION 1.3.14\n------------------------\n o add pool parameter in enrichPeakOverlap <2015-03-30, Mon>\n\nCHANGES IN VERSION 1.3.13\n------------------------\n o update enrichPeakOverlap to support nShuffle = 0, which now will report only overlay with pvalue = NA <2015-03-29, Sun>\n o add facet and free_y parameter for plotAvgProf and plotAvgProf2 <2015-03-29, Sun>\n o update docs <2015-03-29, Sun>\n o update plotAvgProf and plotAvgProf2 to fully supporting confidence interval,\n   see https://github.com/GuangchuangYu/ChIPseeker/pull/6 <2015-03-29, Sun>\n\nCHANGES IN VERSION 1.3.12\n------------------------\n o add confidence interval for plotAvgProf, see https://github.com/GuangchuangYu/ChIPseeker/issues/3 <2015-03-26, Thu>\n\nCHANGES IN VERSION 1.3.11\n------------------------\n o add citation <2015-03-16, Mon>\n\nCHANGES IN VERSION 1.3.10\n------------------------\n o update GEO data <2015-03-03, Tue>\n\nCHANGES IN VERSION 1.3.9\n------------------------\n o add parameter *genomicAnnotationPriority* for annotatePeak function <2015-02-27, Fri>\n\nCHANGES IN VERSION 1.3.8\n------------------------\n o add DOSE citation <2015-02-13, Fri>\n\nCHANGES IN VERSION 1.3.7\n------------------------\n o bug fixed in plotDistToTSS <2015-02-06, Fri>\n\nCHANGES IN VERSION 1.3.6\n------------------------\n o when peak is exactly located at gene end and near the end of chromosome,\n   NA will be generated and throw error when assigning downstream of gene end.\n   This bug has been fixed <2015-02-03, Tue>\n\nCHANGES IN VERSION 1.3.5\n------------------------\n o bug fixed in getNearestFeatureIndicesAndDistances when peak in the\n   very begining or end of the chromosome <2015-01-30, Fri>\n\nCHANGES IN VERSION 1.3.4\n------------------------\n o bug fixed for introducing dplyr in plotDistToTSS <2015-01-28, Wed>\n\nCHANGES IN VERSION 1.3.3\n------------------------\n o update vignette to use BiocStyle::latex() <2015-01-26, Mon>\n\nCHANGES IN VERSION 1.3.2\n------------------------\n o fixed import issue to meet the changes of AnnotationDbi and S4Vectors <2015-01-22, Thu>\n\nCHANGES IN VERSION 1.1.21\n------------------------\n o use data.table instead of data.frame to optimize covplot <2014-10-06, Mon>\n\nCHANGES IN VERSION 1.1.20\n------------------------\n o update annotatePeak to store the seqinfo information <2014-09-30, Tue>\n o modified runValue(x) to sapply(x, runValue) <2014-09-30, Tue>\n\nCHANGES IN VERSION 1.1.19\n------------------------\n o implement csAnno S4 object <2014-09-28, Sun>\n o modify plot function for csAnno instance <2014-09-28, Sun>\n o implement vennpie function <2014-09-28, Sun>\n\nCHANGES IN VERSION 1.1.17\n------------------------\n o deprecate plotChrCov to new function covplot <2014-08-18, Mon>\n o add new paramter chrs and xlim to covplot <2014-08-18, Mon>\n\nCHANGES IN VERSION 1.1.16\n------------------------\n o optimize plotChrCov, running time reduce drastically <2014-08-15, Fri>\n\nCHANGES IN VERSION 1.1.15\n------------------------\n o remove un-mappable peak to prevent fail in peak annotation <2014-08-14, Thu>\n\nCHANGES IN VERSION 1.1.14\n------------------------\n o bug fixed in plotDistToTSS <2014-08-14, Thu>\n\nCHANGES IN VERSION 1.1.13\n------------------------\n o change TranscriptDb to TxDb according to GenomicFeatures <2014-07-29, Tue>\n\nCHANGES IN VERSION 1.1.12\n------------------------\n o bug fixed in plotChrCov <2014-07-21, Mon>\n\nCHANGES IN VERSION 1.1.10\n------------------------\n o bug fixed in calculating distances from peak end <2014-06-18, Wed>\n\nCHANGES IN VERSION 1.1.9\n------------------------\n o add level parameter to annotatePeak, and set it to \"transcript\" by default.\n   Now annotatePeak will annotate peaks in transcript level\n   except user specify level = \"gene\" <2014-06-16, Mon>\n o add addFlankGeneInfo parameter to annotatePeak.\n   If it set to true, all features within the flankDistance will be annotated. <2014-06-16, Mon>\n\nCHANGES IN VERSION 1.1.8\n------------------------\n o bug fixed when peak overlap with feature <2014-06-11, Wed>\n o optimize for getting overlap features of peaks <2014-06-11, Wed>\n o update plotAnnoPie, separate the pie and legend to prevent label overlap <2014-06-12, Thu>\n\nCHANGES IN VERSION 1.1.7\n------------------------\n o bug fixed in calculating distanceToTSS <2014-06-03, Tue>\n\nCHANGES IN VERSION 1.1.6\n------------------------\n o add chainFile parameter in enrichAnnoOverlap and enrichPeakOverlap to support different genome version comparision <2014-06-01, Sun>\n o fixed color bug in peakHeatmap.internal2 and plotAnnoBar <2014-06-02, Mon>\n o update vignettes <2014-06-02, Mon>\n\nCHANGES IN VERSION 1.1.5\n------------------------\n o export getPromoters and getTagMatrix <2014-05-31, Sat>\n o rename plotAvgProf to plotAvgProf2 and implement plotAvgProf based on tagMatrix  <2014-05-31, Sat>\n o implement tagHeatmap for visualize heatmap of the tagMatrix or a list of tagMatrix <2014-05-31, Sat>\n o implement shuffle function to generate a random ChIP data based on a real one <2014-05-31, Sat>\n o implement enrichPeakOverlap to calcuate significant of ChIP experiments based on the genome coordinations <2014-05-31, Sat>\n o implement enrichAnnoOverlap to calculate significant of ChIP experiments based on their nearest gene annotation <2014-05-31, Sat>\n o incorporate GEO database for mining significant overlap of ChIP data <2014-05-31, Sat>\n   + getGEOspecies summarize the collected data by species\n   + getGEOgenomeVersion summarize the colleted data by genome version\n   + getGEOInfo extract the information by genome version query\n   + downloadGEObedFiles download all bed files of a particular genome version\n   + downloadGSMbedFiles download the bed files of the input GSM list.\n\nCHANGES IN VERSION 1.1.4\n------------------------\n o in the annotation column of output of annotatePeak function,\n\tif Exon/Intron, the output change to 'Transcript_Name/GeneID, Exon no. of total_no.' <2014-05-14, Wed>\n\nCHANGES IN VERSION 1.1.3\n------------------------\n o bug fixed when metadata(TranscriptDb) contained NA <2014-04-30, Wed>\n o support ID type of Ensembl in annotatePeak (Entrez was supported) <2014-04-30, Wed>\n\nCHANGES IN VERSION 1.1.2\n------------------------\n o implemented plotChrCov <2014-04-25, Fri>\n o implemented plotAvgProf and peakHeatmap <2014-04-24, Thu>\n\nCHANGES IN VERSION 1.1.1\n------------------------\n o output of annotatePeak now contain chromosome length information <2014-04-22, Tue>\n o re-implement plotAnnoPie to use ordinary pie plot instead of pie3D <2014-04-21, Mon>\n\nCHANGES IN VERSION 1.0.0\n------------------------\n o initial version with the following functions implemented:\n   + annotatePeak\n   + overlap\n   + plotAnnoBar\n   + plotAnnoPie\n   + plotDistToTSS\n   + readPeakFile\n   + vennplot\n   + vennplot.peakfile\n"
  },
  {
    "path": "NEWS.md",
    "content": "# ChIPseeker 1.48.0\n\n+ Bioconductor RELEASE_3_23 (2026-04-29, Wed)\n\n# ChIPseeker 1.47.1\n\n+ fixed issue in 'test-txdb.R' as 'TxDb.Hsapiens.UCSC.hg19.knownGene' changes its transcript ID from UCSC (e.g., uc002qsd.4) to Ensembl (e.g., ENST00000487630.1_3) (2025-11-04, Tue)\n\n# ChIPseeker 1.46.0\n\n+ Bioconductor RELEASE_3_22 (2025-11-01, Sat)\n\n# ChIPseeker 1.45.2\n\n+ new cache mechanism from 'yulab.utils' (2025-10-15, Wed)\n\n# ChIPseeker 1.44.0\n\n+ Bioconductor RELEASE_3_21 (2025-04-17, Thu)\n\n# ChIPseeker 1.42.0\n\n+ Bioconductor RELEASE_3_20 (2024-10-30, Wed)\n\n# ChIPseeker 1.41.3\n\n+ Better `covplot()`. Support universal chromosome names, and keep the default order of multiple peaks when plot a list of `GRanges` object.\n+ Robust `generate_colors()`. Edit the logical of decision, and can validate color code automatically.\n+ Extend dplyr verbs (`filter()`, `mutate()`, `arrange()`, `rename()`) to peak (`GRanges` object or `data.frame`), see #242.\n\n# ChIPseeker 1.41.2\n\n+ Enhancement of `plotDistToTSS()`, see #241.\n\n# ChIPseeker 1.41.1\n\n+ use `yulab.utils::yulab_msg()` for startup message (2024-07-26, Fri)\n\n# ChIPseeker 1.40.0\n\n+ Bioconductor RELEASE_3_19 (2024-05-15, Wed)\n\n# ChIPseeker 1.38.0\n\n+ Bioconductor RELEASE_3_18 (2023-10-25, Wed)\n\n# ChIPseeker 1.36.0\n\n+ Bioconductor RELEASE_3_17 (2023-05-03, Wed)\n\n# ChIPseeker 1.35.3\n\n+ fixed R check by removing calling `BiocStyle::Biocpkg()` in vignette, instead we use `yulab.utils::Biocpkg()` (2023-04-11, Tue)\n\n# ChIPseeker 1.35.2\n\n+ fixed R check by adding 'prettydoc' to Suggests (2023-04-04, Tue)\n\n# ChIPseeker 1.35.1\n\n+ use `ggplot` to plot heatmap (2022-12-30, Fri, #203)\n+ update startup message to display the 'Current Protocols (2022)' paper. \n\n# ChIPseeker 1.34.0\n\n+ Bioconductor RELEASE_3_16 (2022-11-02, Wed)\n\n\n# ChIPseeker 1.33.4\n\n+ add citation Q. Wang (2022) (2022-10-29, Sat)\n\n# ChIPseeker 1.33.3\n\n+ allows passing user defined color to `vennpie()` (2022-10-20, Thu, #202, #207)\n+ add `columns` paramter to `annotatePeak()` to better support passing `EnsDb` to `annoDb` (#193, #205)\n+ export `getAnnoStat()` (#200, #204)\n\n# ChIPseeker 1.33.2\n\n+ supports `by = \"ggVennDiagram\"` in `vennplot` function (2022-09-13, Tue)\n\n# ChIPseeker 1.33.1\n\n+ `plotPeakProf()` allows passing GRanges object or a list of GRanges objects to TxDb parameter (2022-06-04, Sat)\n+ add test files for `getTagMatrix()` and `plotTagMatrix()`\n+ `getBioRegion()` supports UTR regions (3'UTR + 5'UTR)\n+ `makeBioRegionFromGranges()` supports generating windoes from self-made GRanges object\n+ allow specify colors in `covplot()` (2022-05-09, Mon, #185, #188)\n\n# ChIPseeker 1.32.0\n\n+ Bioconductor 3.15 release\n\n# ChIPseeker 1.31.4\n\n+ `readPeakFile` now supports `.broadPeak` and `.gappedPeak` files (2021-12-17, Fri, #173) \n\n# ChIPseeker 1.31.3\n\n+ bug fixed of determining promoter region in minus strand (2021-12-16, Thu, #172)\n\n# ChIPseeker 1.31.2\n\n+ update vignette\n\n# ChIPseeker 1.31.1\n\n+ bug fixed to take strand information (2021-11-10, Wed, #167)\n\n# ChIPseeker 1.30.0\n\n+ Bioconductor 3.14 release\n\n# ChIPseeker 1.29.2\n\n+ extend functions for plotting peak profiles to support other types of bioregions (2021-10-15, Fri, @MingLi-929, #156, #160, #162, #163)\n\n# ChIPseeker 1.29.1\n\n+ add example for `seq2gene` function (2021-05-21, Fri)\n\n# ChIPseeker 1.28.0\n\n+ Bioconductor 3.13 release (2021-05-20, Thu)\n\n# ChIPseeker 1.27.5\n\n+ update GEO data (103398/1973025 GSM) (2021-05-14, Fri)\n\n# ChIPseeker 1.27.4\n\n+ bug fixed in determine downstream gene (2021-04-27, Thu)\n  - <https://github.com/YuLab-SMU/ChIPseeker/pull/148>\n+ `getBioRegion` now supports '3UTR' and '5UTR' (2021-03-30, Tue)\n  - <https://github.com/YuLab-SMU/ChIPseeker/pull/146>\n\n# ChIPseeker 1.27.3\n\n+ add two parameter, cex and radius, to `plotAnnoPie` (2021-03-12, Fri)\n  - <https://github.com/YuLab-SMU/ChIPseeker/pull/144>\n\n# ChIPseeker 1.27.2\n\n+ bug fixed of `getGenomicAnnotation` (2021-03-03, Wed)\n  - <https://github.com/YuLab-SMU/ChIPseeker/issues/142>\n\n# ChIPseeker 1.27.1\n\n+ Add support for `EnsDb` annotation databases in `annotatePeak`. \n  - <https://github.com/YuLab-SMU/ChIPseeker/pull/120>\n\n# ChIPseeker 1.26.0\n\n+ Bioconductor 3.12 release (2020-10-28, Wed)\n\n\n# ChIPseeker 1.23.1\n\n+ update GEO data (51079/762820 GSM) (2019-12-20, Fri)\n\n# ChIPseeker 1.22.0\n\n+ Bioconductor 3.10 release\n \n# ChIPseeker 1.21.1\n\n+ new implementation of `upsetplot` (2019-08-29, Thu)\n  - use `ggupset`, `ggimage` and `ggplotify`\n+ `subset` method for `csAnno` object (2019-08-27, Tue)\n\n# ChIPseeker 1.20.0\n\n+ Bioconductor 3.9 release\n\n# ChIPseeker 1.19.1\n\n+ add `origin_label = \"TSS\"` parameter to `plotAvgProf` (2018-12-12, Wed)\n  - <https://github.com/GuangchuangYu/ChIPseeker/issues/91>\n  \n# ChIPseeker 1.18.0\n\n+ Bioconductor 3.8 release\n\n# ChIPseeker 1.17.2\n\n+ add `flip_minor_strand` parameter in `getTagMatrix` (2018-08-10, Fri)\n  - should set to FALSE if windows if not symetric\n  \n# ChIPseeker 1.17.1\n\n+ fixed issue of `vennpie` by adding pseudo-count +1 (2018-07-21, Sat)\n  - <https://www.biostars.org/p/326456/>\n\n# ChIPseeker 1.16.0\n\n+ Bioconductor 3.7 release\n\n# ChIPseeker 1.15.4\n\n+ If the required input is a named list and user input a list without name,\n  set the name automatically and throw warning msg instead of error <2018-03-14,\n  Wed>\n    - <https://support.bioconductor.org/p/106903/#106936>\n+ change `plotAvgProf`'s default y label <2018-03-14, Wed>\n    - <https://github.com/GuangchuangYu/ChIPseeker/issues/76>\n+ plotAnnoBar now visualize barplot according to the order of input list\n  (y-axis) (2018-02-27, Tue)\n    - <https://github.com/GuangchuangYu/ChIPseeker/issues/73>\n+ follow renaming of RangesList class -> IntegerRangesList in IRanges v2.13.12\n    - <https://github.com/GuangchuangYu/ChIPseeker/commit/b62d7922fb61e58620bbb685e4def4fb863c8e81>\n\n# ChIPseeker 1.15.3\n\n+ options to ignore '1st exon', '1st intron', 'downstream' and promoter\n  subcategory when summarizing result and visualization (2018-01-09, Tue)\n    - <https://support.bioconductor.org/p/104676/#104689>\n+ throw msg of 'file not found and skip' when requested url is not available\n  when downloading BED file from GEO (2017-12-28, Thu)\n    - <https://support.bioconductor.org/p/104491/#104507>\n+ bug fixed of getGene (2017-12-27, Wed)\n"
  },
  {
    "path": "R/AllGenerics.R",
    "content": "##' vennpie method generics\n##'\n##'\n##' @docType methods\n##' @name vennpie\n##' @rdname vennpie-methods\n##' @export\nsetGeneric(\"vennpie\", \n  function(x, r = 0.2, cex = 1.2, ...) \n  standardGeneric(\"vennpie\")\n)\n\n\n##' plotDistToTSS method generics\n##'\n##'\n##' @docType methods\n##' @name plotDistToTSS\n##' @rdname plotDistToTSS-methods\n##' @export\nsetGeneric(\"plotDistToTSS\", \n  function(x, \n    distanceColumn=\"distanceToTSS\",\n    xlab=\"\", ylab=\"Binding sites (%) (5'->3')\",\n    title=\"Distribution of transcription factor-binding loci relative to TSS\", \n    ...)\n  standardGeneric(\"plotDistToTSS\")\n)\n\n##' plotAnnoBar method generics\n##'\n##'\n##' @docType methods\n##' @name plotAnnoBar\n##' @rdname plotAnnoBar-methods\n##' @export\nsetGeneric(\"plotAnnoBar\", \n  function(x,\n    xlab=\"\",\n    ylab=\"Percentage(%)\",\n    title=\"Feature Distribution\",\n    ...)\n  standardGeneric(\"plotAnnoBar\")\n)\n\n\n##' plotAnnoPie method generics\n##'\n##'\n##' @docType methods\n##' @name plotAnnoPie\n##' @rdname plotAnnoPie-methods\n##' @export\nsetGeneric(\"plotAnnoPie\", \n  function(x, \n    ndigit=2,\n    cex=0.9,\n    col=NA,\n    legend.position=\"rightside\",\n    pie3D=FALSE,\n    radius=0.8,\n    ...)\n  standardGeneric(\"plotAnnoPie\")\n)\n"
  },
  {
    "path": "R/ChIPseeker-package.R",
    "content": "#' @keywords internal\n\"_PACKAGE\"\n\n\n\n##' Information Datasets\n##' \n##' ucsc genome version, precalcuated data and gsm information\n##' \n##' @name info\n##' @aliases ucsc_release\n##' gsminfo\n##' tagMatrixList\n##' @docType data\n##' @keywords datasets\nNULL\n\n##' Name of the ChIPseeker cache environment (internal static variable)\n##' @format character vector \nChIPseekerCache <- \"ChIPseekerEnv\"\n\n"
  },
  {
    "path": "R/GEO.R",
    "content": "########################################\n##                                    ##\n## data last update: Mar 03, 2015     ##\n##                                    ##\n########################################\n\n\n\n##' accessing species statistics collecting from GEO database\n##'\n##'\n##' @title getGEOspecies\n##' @return data.frame\n##' @author G Yu\n##' @export\ngetGEOspecies <- function() {\n    gsminfo <- get_gsminfo()\n    species <- gsminfo$organism\n    res <- as.data.frame(table(species))\n    return(res)\n}\n\n##' get genome version statistics collecting from GEO ChIPseq data\n##'\n##'\n##' @title getGEOgenomeVersion\n##' @return data.frame\n##' @author G Yu\n##' @export\ngetGEOgenomeVersion <- function() {\n    gsminfo <- get_gsminfo()\n    gv <- gsminfo[, c(\"organism\",\n                      \"genomeVersion\")]\n    genomeVersion <- gv$genomeVersion\n    res <- as.data.frame(table(genomeVersion))\n    gv <- unique(gv)\n\n    res <- merge(gv, res, by.x=\"genomeVersion\", by.y=\"genomeVersion\", all.y=TRUE)\n    res <- res[, c(\"organism\", \"genomeVersion\", \"Freq\")]\n    return(res)\n}\n\n##' get subset of GEO information by genome version keyword\n##'\n##'\n##' @title getGEOInfo\n##' @param genome genome version\n##' @param simplify simplify result or not\n##' @return data.frame\n##' @author G Yu\n##' @export\ngetGEOInfo <- function(genome, simplify =TRUE) {\n    gsminfo <- get_gsminfo()\n    genomeVersion <- NULL ## to satisfy codetools\n    res <- subset(gsminfo, subset = genomeVersion == genome)\n    if (simplify) {\n        res <- res[,c(\"series_id\", \"gsm\", \"organism\", \"title\", \"supplementary_file\", \"genomeVersion\", \"pubmed_id\")]\n    }\n    return(res)\n}\n\n##' download all BED files of a particular genome version\n##'\n##'\n##' @title downloadGEObedFiles\n##' @param genome genome version\n##' @param destDir destination folder\n##' @return NULL\n##' @author G Yu\n##' @export\ndownloadGEObedFiles <- function(genome, destDir=getwd()) {\n    info <- getGEOInfo(genome)\n    downloadGEO.internal(info, destDir)\n}\n\n##' download BED supplementary files of a list of GSM accession numbers\n##'\n##'\n##' @title downloadGSMbedFiles\n##' @param GSM GSM accession numbers\n##' @param destDir destination folder\n##' @return NULL\n##' @author G Yu\n##' @export\ndownloadGSMbedFiles <- function(GSM, destDir=getwd()) {\n    gsminfo <- get_gsminfo()\n    info <- gsminfo[gsminfo$gsm %in% GSM,]\n    downloadGEO.internal(info, destDir)\n}\n\n##' @importFrom utils download.file\ndownloadGEO.internal <- function(info, destDir) {\n    fnames <- as.character(info$supplementary_file)\n    destfiles <- sub(\".*\\\\/\", paste(destDir, \"/\", sep=\"\"), fnames)\n    names(destfiles) <- NULL\n\n    for (i in seq_along(fnames)) {\n        if ( ! file.exists(destfiles[i]) )\n            tryCatch(download.file(fnames[i],\n                          destfile=destfiles[i],\n                          mode=\"wb\"),\n                     error = function(e) message(fnames[i], ': file not found and skip'))\n    }\n}\n\n##' @importFrom utils data\n## @importFrom GEOmetadb\n## @importFrom RSQLite dbConnect\n## @importFrom RSQLite dbGetQuery\nprepareGSMInfo <- function() {\n    pkg <- \"GEOmetadb\"\n    require(pkg, character.only=TRUE)\n    getSQLiteFile <- eval(parse(text=\"getSQLiteFile\"))\n    ## get the latest version of sql file\n    is.dl <- tryCatch(getSQLiteFile(), error = function(e) NULL)\n\n    if (is.null(is.dl)) {\n        url <- 'http://starbuck1.s3.amazonaws.com/sradb/GEOmetadb.sqlite.gz'\n        HEAD <- eval(parse(text = \"httr::HEAD\"))\n        hh <- HEAD(url)\n        size <- hh$headers[[\"content-length\"]]\n        cmd <- paste('wget -c', url)\n        while(file.info(\"GEOmetadb.sqlite.gz\")$size < size) {\n            system(cmd)\n        }\n        if (file.exists('GEOmetadb.sqlite') && file.exists('GEOmetadb.sqlite.gz')) {\n            file.remove(\"GEOmetadb.sqlite\")\n        }\n        system('gunzip GEOmetadb.sqlite.gz')\n    }\n\n    GEOmetadbFile=\"GEOmetadb.sqlite\"\n    file.info(GEOmetadbFile)\n\n    sqlpkg <- \"RSQLite\"\n    require(sqlpkg, character.only=TRUE)\n    dbConnect <- eval(parse(text=\"dbConnect\"))\n    dbGetQuery <- eval(parse(text=\"dbGetQuery\"))\n    SQLite <- eval(parse(text=\"SQLite\"))\n\n    con <- dbConnect(SQLite(),GEOmetadbFile)\n    ## dbListTables(con)\n\n\n    pkg <- \"GEOquery\"\n    require(pkg, character.only=TRUE)\n    getGEO <- eval(parse(text=\"getGEO\"))\n    Meta <- eval(parse(text=\"Meta\"))\n\n    ## get all GPL IDs\n    ## download soft using gpl = getGEO(\"GPLXXX\")\n    ## using Meta(gpl) find the technology match sequencing\n    ## get all gsm IDs\n    ## parse it\n\n    gpl <- dbGetQuery(con, 'select gpl, technology from gpl')\n    gpl <- gpl[gpl[,2] == \"high-throughput sequencing\",1]\n    gpl <- gpl[!is.na(gpl)]\n\n    ## save the processedGSM vector that contain all the GSM that have been processed.\n    ## next time when preparing GSMInfo, filter those have been processed before.\n    load(system.file(\"extdata/processedGSM.rda\", package=\"ChIPseeker\"))\n    processedGSM <- get(\"processedGSM\")\n    newGSM <- c()\n\n    gpldir <- \"GPL\"\n    if (!file.exists(gpldir)) {\n        dir.create(gpldir)\n    }\n\n    for (gid in gpl) {\n        gg <- tryCatch(getGEO(gid, destdir=gpldir), error=function(e) NULL)\n        if (is.null(gg)) {\n            next\n        }\n        gsm <- Meta(gg)$sample_id\n        gsm <- gsm[! (gsm %in% processedGSM) ]\n        if (length(gsm) == 0) {\n            next\n        }\n        newGSM <- c(newGSM, gsm)\n\n        sf <- batchGetGSMsuppFile(gsm)\n        if (!is.null(sf)) {\n            save(sf, file=paste(gid, \"_sf.rda\", sep=\"\"))\n        }\n    }\n\n    processedGSM <- c(processedGSM, newGSM)\n    processedGSM <- unique(processedGSM)\n    save(processedGSM, file=\"../processedGSM.rda\", compress=\"xz\")\n\n\n    sfiles <- list.files(pattern=\"_sf.rda\")\n    res <- data.frame(gsm=NULL, remoteFile=NULL)\n    for (ff in sfiles) {\n        load(ff)\n        if (!is.null(sf)) {\n            res <- rbind(res, sf)\n        }\n    }\n    colnames(res)[2] <- \"supplementary_file\"\n\n\n    GSMInfo <- lapply(unique(as.character(res$gsm)), function(i) {\n        dbGetQuery(con,paste(\"select gsm,series_id,gpl,organism_ch1,title,characteristics_ch1,source_name_ch1,extract_protocol_ch1,description,data_processing,submission_date \",\n                             \"from gsm where gsm='\", i, \"'\", sep=\"\"))\n    })\n\n    GSMInfo <- do.call(\"rbind\", GSMInfo)\n\n    colnames(GSMInfo) <- sub(\"_ch1\", \"\", colnames(GSMInfo))\n\n    gsminfo <- merge(GSMInfo, res, by.x=\"gsm\", by.y=\"gsm\")\n\n    tryCatch(utils::data(\"ucsc_release\", package=\"ChIPseeker\"))\n    ucsc_release <- get(\"ucsc_release\")\n\n    genVer <- lapply(1:nrow(gsminfo), function(i)\n                     getGenomicVersion(ucsc_release,\n                                       gsminfo[i, \"data_processing\"],\n                                       gsminfo[i, \"organism\"],\n                                       gsminfo[i, \"supplementary_file\"])\n                     )\n\n    gsminfo$genomeVersion <- unlist(genVer)\n\n    gse <- as.character(gsminfo$series_id)\n    pubmed <- lapply(gse, function(i) {\n        dbGetQuery(con,paste(\"select gse,pubmed_id \",\n                             \"from gse where gse='\", i, \"'\", sep=\"\"))\n    })\n    pm <- do.call(rbind, pubmed)\n    pm <- unique(pm)\n    gsminfo <- merge(gsminfo, pm, by.x=\"series_id\", by.y=\"gse\", all.x=TRUE)\n\n    ## remove non-ASCII characters\n    for(i in 1:ncol(gsminfo)) {\n        gsminfo[,i] = iconv(gsminfo[,i], \"latin1\", \"ASCII\", sub=\"\")\n    }\n    gsminfo2 <- gsminfo\n    rm(gsminfo)\n\n    utils::data(\"gsminfo\", package=\"ChIPseeker\")\n    gsminfo <- get(\"gsminfo\")\n    gsminfo <- rbind(gsminfo, gsminfo2)\n    gsminfo <- unique(gsminfo)\n\n    save(gsminfo, file=\"../gsminfo.rda\", compress=\"xz\")\n}\n\n\ngetGenomicVersion <- function(ucsc_release, data_processing, organism, supplementary_file) {\n    data_processing <- as.character(data_processing)\n    organism <- as.character(organism)\n    supplementary_file <- as.character(supplementary_file)\n\n    species <- NULL\n    gs <- subset(ucsc_release, subset = species == organism)\n    if (nrow(gs) == 0) return(NA)\n\n    genMatch <- unlist(sapply(gs$ucsc_version, grep, data_processing))\n    if (length(genMatch) == 0) {\n        genMatch <- unlist(sapply(gs$ucsc_version, grep, supplementary_file))\n        if (length(genMatch) == 0) {\n            return(NA)\n        }\n    }\n\n    genVer <- names(genMatch)\n    if (length(genVer) > 1) {\n        genVer <- genVer[1]\n    }\n\n    return(genVer)\n}\n\n## getGSE_ENCODE <- function() {\n##     encode=readLines(\"http://www.ncbi.nlm.nih.gov/geo/info/ENCODE.html\")\n##     encode.chipseq <- encode[grep(\"ChIP-Seq\", encode)]\n##     ## require(gsubfn)\n##     ## gse <- sapply(encode.chipseq, function(i) {\n##     ##     res <- strapply(i, \"(GSE\\\\d+)\")\n##     ##     unique(unlist(res))\n##     ## })\n##     gse <- sapply(encode.chipseq, gsub, pattern='.*(GSE\\\\d+).*', replacement='\\\\1')\n##     names(gse) <- NULL\n##     return(gse)\n## }\n\n## GSE2GSM <- function(GSE) {\n##     info <- getGEO(GSE, GSEMatrix=FALSE)\n##     metaInfo <- Meta(info)\n##     gsm <- metaInfo$sample_id\n##     return(gsm)\n## }\n\n##' @importFrom parallel mclapply\n##' @importFrom parallel detectCores\nbatchGetGSMsuppFile <- function(gsm) {\n    suppfiles <- mclapply(seq_along(gsm), function(i) {\n        cat(\"processing \", gsm[i], \"\\t\",  i , \" of \", length(gsm), \"\\n\")\n        tryCatch(getGSMsuppFile(gsm[i]), error=function(e) NULL)\n    }, mc.cores=detectCores())\n\n    suppfiles <- suppfiles[!unlist(lapply(suppfiles, is.null))]\n\n    sf <- do.call(\"rbind\", suppfiles)\n    return(sf)\n}\n\ngetGSMsuppFile <- function(GSM) {\n    pkg <- \"GEOquery\"\n    require(pkg, character.only=TRUE)\n    getGEO <- eval(parse(text=\"getGEO\"))\n    Meta <- eval(parse(text=\"Meta\"))\n\n    destdir=\"geo_soft\"\n    if (!file.exists(destdir)) {\n        dir.create(destdir)\n    }\n    info <- getGEO(GSM, GSEMatrix=FALSE, destdir=destdir)\n    ## http://www.ncbi.nlm.nih.gov/geo/info/soft2.html\n    metaInfo <- Meta(info)\n\n    ## suppmentary file names\n    fnames <- unlist(metaInfo[grep(\"supplementary\", names(metaInfo))])\n    names(fnames) <- NULL\n    i <- c(grep(\"bed.gz\", fnames), grep(\"Peak.gz\", fnames), grep(\"bedGraph.gz\", fnames))\n\n    if (length(i) == 0) {\n        message(\"No bed files found\")\n        return(NULL)\n    }\n    fnames <- fnames[i]\n    res <- data.frame(gsm=GSM, remoteFile = fnames)\n    return(res)\n}\n\nget_gsminfo <- function() {\n    tryCatch(utils::data(\"gsminfo\", package=\"ChIPseeker\"))\n    gsminfo <- get(\"gsminfo\")\n    return(gsminfo)\n}\n"
  },
  {
    "path": "R/addGeneAnno.R",
    "content": "##' get gene annotation, symbol, gene name etc.\n##'\n##'\n##' @title getGeneAnno\n##' @param annoDb annotation package\n##' @param geneID query geneID\n##' @param type gene ID type\n##' @param columns names of columns to be obtained from database\n##' @return data.frame\n##' @importFrom AnnotationDbi select\n##' @author G Yu\ngetGeneAnno <- function(annoDb, geneID, type, columns){\n    kk <- unlist(geneID)\n    require(annoDb, character.only = TRUE)\n    annoDb <- eval(parse(text=annoDb))\n\n    if (type == \"Entrez Gene ID\") {\n        kt <- \"ENTREZID\"\n    } else if (type ==\"Ensembl gene ID\" || type == \"Ensembl Gene ID\") {\n        kt <- \"ENSEMBL\"\n    } else {\n        message(\"geneID type is not supported...\\tPlease report it to developer...\\n\")\n        return(NA)\n    }\n\n    i <- which(!is.na(kk))\n    kk <- gsub(\"\\\\.\\\\d+$\", \"\", kk)\n    ann <- tryCatch(\n        suppressWarnings(select(annoDb,\n                                keys=unique(kk[i]),\n                                keytype=kt,\n                                columns=columns)),\n        error = function(e) NULL)\n\n    if (is.null(ann)) {\n        warning(\"ID type not matched, gene annotation will not be added...\")\n        return(NA)\n    }\n    idx <- getFirstHitIndex(ann[,kt])\n    ann <- ann[idx,]\n\n    ## idx <- unlist(sapply(kk, function(x) which(x==ann[,kt])))\n    ## res <- matrix(NA, ncol=ncol(ann), nrow=length(kk)) %>% as.data.frame\n    ## colnames(res) <- colnames(ann)\n    ## res[i,] <- ann[idx,]\n\n    rownames(ann) <- ann[, kt]\n    res <- ann[as.character(kk),]\n\n    return(res)\n}\n\n\naddGeneAnno <- function(peak.gr, annoDb, type, columns) {\n    geneAnno <- getGeneAnno(annoDb, peak.gr$geneId, type, columns)\n    if (! all(is.na(geneAnno))) {\n        for(cn in colnames(geneAnno)[-1]) {\n            mcols(peak.gr)[[cn]] <- geneAnno[, cn]\n        }\n    }\n    return(peak.gr)\n}\n\n"
  },
  {
    "path": "R/annotatePeak.R",
    "content": "##' Annotate peaks\n##'\n##'\n##' @title annotatePeak\n##' @param peak peak file or GRanges object\n##' @param tssRegion Region Range of TSS\n##' @param TxDb TxDb or EnsDb annotation object\n##' @param level one of transcript and gene\n##' @param assignGenomicAnnotation logical, assign peak genomic annotation or not\n##' @param genomicAnnotationPriority genomic annotation priority\n##' @param annoDb annotation package\n##' @param addFlankGeneInfo logical, add flanking gene information from the peaks\n##' @param flankDistance distance of flanking sequence\n##' @param sameStrand logical, whether find nearest/overlap gene in the same strand\n##' @param ignoreOverlap logical, whether ignore overlap of TSS with peak\n##' @param ignoreUpstream logical, if True only annotate gene at the 3' of the peak.\n##' @param ignoreDownstream logical, if True only annotate gene at the 5' of the peak.\n##' @param overlap one of 'TSS' or 'all', if overlap=\"all\", then gene overlap with peak will be reported as nearest gene, no matter the overlap is at TSS region or not.\n##' @param verbose print message or not\n##' @param columns names of columns to be obtained from database\n##' @return data.frame or GRanges object with columns of:\n##'\n##' all columns provided by input.\n##'\n##' annotation: genomic feature of the peak, for instance if the peak is\n##' located in 5'UTR, it will annotated by 5'UTR. Possible annotation is\n##' Promoter-TSS, Exon, 5' UTR, 3' UTR, Intron, and Intergenic.\n##'\n##' geneChr: Chromosome of the nearest gene\n##'\n##' geneStart: gene start\n##'\n##' geneEnd: gene end\n##'\n##' geneLength: gene length\n##'\n##' geneStrand: gene strand\n##'\n##' geneId: entrezgene ID\n##'\n##' distanceToTSS: distance from peak to gene TSS\n##'\n##' if annoDb is provided, extra column will be included:\n##'\n##' ENSEMBL: ensembl ID of the nearest gene\n##'\n##' SYMBOL: gene symbol\n##'\n##' GENENAME: full gene name\n##' @import BiocGenerics S4Vectors GenomeInfoDb\n##' @examples\n##' \\dontrun{\n##' require(TxDb.Hsapiens.UCSC.hg19.knownGene)\n##' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n##' peakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"ChIPseeker\")\n##' peakAnno <- annotatePeak(peakfile, tssRegion=c(-3000, 3000), TxDb=txdb)\n##' peakAnno\n##' }\n##' @seealso \\code{\\link{plotAnnoBar}} \\code{\\link{plotAnnoPie}} \\code{\\link{plotDistToTSS}}\n##' @export\n##' @author G Yu\nannotatePeak <- function(peak,\n                         tssRegion=c(-3000, 3000),\n                         TxDb=NULL,\n                         level = \"transcript\",\n                         assignGenomicAnnotation=TRUE,\n                         genomicAnnotationPriority = c(\"Promoter\", \"5UTR\", \"3UTR\", \"Exon\", \"Intron\", \"Downstream\", \"Intergenic\"),\n                         annoDb=NULL,\n                         addFlankGeneInfo=FALSE,\n                         flankDistance=5000,\n                         sameStrand = FALSE,\n                         ignoreOverlap=FALSE,\n                         ignoreUpstream=FALSE,\n                         ignoreDownstream=FALSE,\n                         overlap = \"TSS\",\n                         verbose=TRUE,\n                         columns=c(\"ENTREZID\", \"ENSEMBL\", \"SYMBOL\", \"GENENAME\")) {\n\n    is_GRanges_of_TxDb <- FALSE\n    if (is(TxDb, \"GRanges\")) {\n        is_GRanges_of_TxDb <- TRUE\n        assignGenomicAnnotation <- FALSE\n        annoDb <- NULL\n        addFlankGeneInfo <- FALSE\n        message(\"#\\n#.. 'TxDb' is a self-defined 'GRanges' object...\\n#\")\n        message(\"#.. Some parameters of 'annotatePeak' will be disable,\")\n        message(\"#.. including:\")\n        message(\"#..\\tlevel, assignGenomicAnnotation, genomicAnnotationPriority,\")\n        message(\"#..\\tannoDb, addFlankGeneInfo and flankDistance.\")\n        message(\"#\\n#.. Some plotting functions are designed for visualizing genomic annotation\")\n        message(\"#.. and will not be available for the output object.\\n#\")\n    }\n\n    if (is_GRanges_of_TxDb) {\n        level <- \"USER_DEFINED\"\n    } else {\n        level <- match.arg(level, c(\"transcript\", \"gene\"))\n    }\n\n    if (assignGenomicAnnotation && all(genomicAnnotationPriority %in% c(\"Promoter\", \"5UTR\", \"3UTR\", \"Exon\", \"Intron\", \"Downstream\", \"Intergenic\")) == FALSE) {\n        stop('genomicAnnotationPriority should be any order of c(\"Promoter\", \"5UTR\", \"3UTR\", \"Exon\", \"Intron\", \"Downstream\", \"Intergenic\")')\n    }\n\n    if ( is(peak, \"GRanges\") ){\n        ## this test will be TRUE\n        ## when peak is an instance of class/subclass of \"GRanges\"\n        input <- \"gr\"\n        peak.gr <- peak\n    } else {\n        input <- \"file\"\n        peak.gr <- loadPeak(peak, verbose)\n    }\n\n    peakNum <- length(peak.gr)\n\n    if (verbose)\n        cat(\">> preparing features information...\\t\\t\",\n            format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n\n    if (is_GRanges_of_TxDb) {\n        features <- TxDb\n    } else {\n        TxDb <- loadTxDb(TxDb)\n\n        if (level==\"transcript\") {\n            features <- getGene(TxDb, by=\"transcript\")\n        } else {\n            features <- getGene(TxDb, by=\"gene\")\n        }\n    }\n    if (verbose)\n        cat(\">> identifying nearest features...\\t\\t\",\n            format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n\n    ## nearest features\n    idx.dist <- getNearestFeatureIndicesAndDistances(peak.gr, features,\n                                                     sameStrand, ignoreOverlap,\n                                                     ignoreUpstream,ignoreDownstream,\n                                                     overlap=overlap)\n\n    if (verbose)\n        cat(\">> calculating distance from peak to TSS...\\t\",\n            format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    ## distance\n    distance <- idx.dist$distance\n\n    ## update peak, remove un-map peak if exists.\n    peak.gr <- idx.dist$peak\n\n    ## annotation\n    if (assignGenomicAnnotation == TRUE) {\n        if (verbose)\n            cat(\">> assigning genomic annotation...\\t\\t\",\n                format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n\n        anno <- getGenomicAnnotation(peak.gr, distance, tssRegion, TxDb, level, genomicAnnotationPriority, sameStrand=sameStrand)\n        annotation <- anno[[\"annotation\"]]\n        detailGenomicAnnotation <- anno[[\"detailGenomicAnnotation\"]]\n    } else {\n        annotation <- NULL\n        detailGenomicAnnotation <- NULL\n    }\n\n    ## append annotation to peak.gr\n    if (!is.null(annotation))\n        mcols(peak.gr)[[\"annotation\"]] <- annotation\n\n\n    has_nearest_idx <- which(idx.dist$index <= length(features))\n    nearestFeatures <- features[idx.dist$index[has_nearest_idx]]\n\n    ## duplicated names since more than 1 peak may annotated by only 1 gene\n    names(nearestFeatures) <- NULL\n    nearestFeatures.df <- as.data.frame(nearestFeatures)\n    if (is_GRanges_of_TxDb) {\n        colnames(nearestFeatures.df)[1:5] <- c(\"geneChr\", \"geneStart\", \"geneEnd\",\n                                          \"geneLength\", \"geneStrand\")\n    } else if (level == \"transcript\") {\n        if (is(TxDb, \"EnsDb\")) {\n            nearestFeatures.df <- nearestFeatures.df[, c(\"seqnames\", \"start\",\n                                                         \"end\", \"width\",\n                                                         \"strand\", \"gene_id\",\n                                                         \"tx_id\", \"tx_biotype\"),\n                                                     drop = FALSE]\n            colnames(nearestFeatures.df) <- c(\n                \"geneChr\", \"geneStart\", \"geneEnd\", \"geneLength\", \"geneStrand\",\n                \"geneId\", \"transcriptId\", \"transcriptBiotype\")\n        } else {\n            colnames(nearestFeatures.df) <- c(\"geneChr\", \"geneStart\", \"geneEnd\",\n                                              \"geneLength\", \"geneStrand\",\n                                              \"geneId\", \"transcriptId\")\n            nearestFeatures.df$geneId <- TXID2EG(\n                as.character(nearestFeatures.df$geneId), geneIdOnly=TRUE)\n        }\n    } else {\n        if (is(TxDb, \"EnsDb\")) {\n            nearestFeatures.df <- nearestFeatures.df[, c(\"seqnames\", \"start\",\n                                                         \"end\", \"width\",\n                                                         \"strand\", \"gene_id\",\n                                                         \"gene_biotype\"),\n                                                     drop = FALSE]\n            colnames(nearestFeatures.df) <- c(\"geneChr\", \"geneStart\", \"geneEnd\",\n                                              \"geneLength\", \"geneStrand\",\n                                              \"geneId\", \"geneBiotype\")\n        } else\n            colnames(nearestFeatures.df) <- c(\"geneChr\", \"geneStart\", \"geneEnd\",\n                                              \"geneLength\", \"geneStrand\",\n                                              \"geneId\")\n    }\n\n    for(cn in colnames(nearestFeatures.df)) {\n        mcols(peak.gr)[[cn]][has_nearest_idx] <- unlist(nearestFeatures.df[, cn])\n    }\n\n    mcols(peak.gr)[[\"distanceToTSS\"]] <- distance\n\n    if (!is.null(annoDb)) {\n        if (verbose)\n            cat(\">> adding gene annotation...\\t\\t\\t\",\n                format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n        .idtype <- IDType(TxDb)\n        if (length(.idtype) == 0 || is.na(.idtype) || is.null(.idtype)) {\n            n <- length(peak.gr)\n            if (n > 100)\n                n <- 100\n            sampleID <- peak.gr$geneId[1:n]\n\n            if (all(grepl('^ENS', sampleID))) {\n                .idtype <- \"Ensembl Gene ID\"\n            } else if (all(grepl('^\\\\d+$', sampleID))) {\n                .idtype <- \"Entrez Gene ID\"\n            } else {\n                warning(\"Unknown ID type, gene annotation will not be added...\")\n                .idtype <- NA\n            }\n        }\n\n        if (!is.na(.idtype)) {\n            peak.gr %<>% addGeneAnno(annoDb, .idtype, columns)\n        }\n    }\n\n    if (addFlankGeneInfo == TRUE) {\n        if (verbose)\n            cat(\">> adding flank feature information from peaks...\\t\",\n                format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n\n        flankInfo <- getAllFlankingGene(peak.gr, features, level, flankDistance)\n\n        if (level == \"transcript\") {\n            mcols(peak.gr)[[\"flank_txIds\"]] <- NA\n            mcols(peak.gr)[[\"flank_txIds\"]][flankInfo$peakIdx] <- flankInfo$flank_txIds\n        }\n\n        mcols(peak.gr)[[\"flank_geneIds\"]] <- NA\n        mcols(peak.gr)[[\"flank_gene_distances\"]] <- NA\n\n        mcols(peak.gr)[[\"flank_geneIds\"]][flankInfo$peakIdx] <- flankInfo$flank_geneIds\n        mcols(peak.gr)[[\"flank_gene_distances\"]][flankInfo$peakIdx] <- flankInfo$flank_gene_distances\n\n    }\n\n    if (!is_GRanges_of_TxDb) {\n        if(verbose)\n            cat(\">> assigning chromosome lengths\\t\\t\\t\",\n                format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n\n        peak.gr@seqinfo <- seqinfo(TxDb)[names(seqlengths(peak.gr))]\n    }\n\n    if(verbose)\n        cat(\">> done...\\t\\t\\t\\t\\t\",\n            format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n\n    if (assignGenomicAnnotation) {\n        res <- new(\"csAnno\",\n                   anno = peak.gr,\n                   tssRegion = tssRegion,\n                   level=level,\n                   hasGenomicAnnotation = TRUE,\n                   detailGenomicAnnotation=detailGenomicAnnotation,\n                   annoStat=getGenomicAnnoStat(peak.gr),\n                   peakNum=peakNum\n                   )\n    } else {\n        res <- new(\"csAnno\",\n                   anno = peak.gr,\n                   tssRegion = tssRegion,\n                   level=level,\n                   hasGenomicAnnotation = FALSE,\n                   peakNum=peakNum\n                   )\n    }\n\n    return(res)\n}\n\n\n##' dropAnno\n##'\n##' drop annotation exceeding distanceToTSS_cutoff\n##' @title dropAnno\n##' @param csAnno output of annotatePeak\n##' @param distanceToTSS_cutoff distance to TSS cutoff\n##' @return csAnno object\n##' @export\n##' @author Guangchuang Yu\ndropAnno <- function(csAnno, distanceToTSS_cutoff=10000) {\n    idx <- which(abs(mcols(csAnno@anno)[[\"distanceToTSS\"]]) < distanceToTSS_cutoff)\n    csAnno@anno <- csAnno@anno[idx]\n    csAnno@peakNum <- length(idx)\n    if (csAnno@hasGenomicAnnotation) {\n        csAnno@annoStat <- getGenomicAnnoStat(csAnno@anno)\n        csAnno@detailGenomicAnnotation = csAnno@detailGenomicAnnotation[idx,]\n    }\n    csAnno\n}\n"
  },
  {
    "path": "R/covplot.R",
    "content": "\n##' plot peak coverage\n##'\n##' \n##' @title covplot\n##' @param peak peak file or GRanges object\n##' @param weightCol weight column of peak\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param title title\n##' @param chrs selected chromosomes to plot, all chromosomes by default\n##' @param xlim ranges to plot, default is whole chromosome\n##' @param lower lower cutoff of coverage signal\n##' @param fill_color specify the color/palette for the plot. Order matters\n##' @return ggplot2 object\n##' @import GenomeInfoDb\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 geom_segment\n##' @importFrom ggplot2 geom_blank\n##' @importFrom ggplot2 geom_rect\n##' @importFrom ggplot2 facet_grid\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 theme_classic\n##' @importFrom ggplot2 element_text\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 xlim\n##' @importFrom ggplot2 ggtitle\n##' @export\n##' @author G Yu\ncovplot <- function(peak, weightCol=NULL,\n                    xlab  = \"Chromosome Size (bp)\",\n                    ylab  = \"\",\n                    title = \"ChIP Peaks over Chromosomes\",\n                    chrs  = NULL,\n                    xlim  = NULL,\n                    lower = 1,\n                    fill_color = \"black\") {\n    isList <- is.list(peak)\n    if(!isList) {  # Note: don't support data.frame\n        tm <- getChrCov(peak = peak, weightCol = weightCol, chrs = chrs, xlim = xlim, lower = lower)\n    } else {\n        ltm <- lapply(peak, getChrCov, weightCol = weightCol, chrs = chrs, xlim = xlim, lower = lower)\n        if (is.null(names(ltm))) {\n            nn <- paste0(\"peak\", seq_along(ltm))\n            warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse = ' '))\n            names(ltm) <- nn\n        }\n        tm <- dplyr::bind_rows(ltm, .id = \".id\")\n        chr.sorted <- sortChrName(as.character(unique(tm$chr)))\n        tm$chr <- factor(tm$chr, levels = chr.sorted)\n    }\n    \n    chr <- start <- end <- value <- .id <- NULL\n    \n    if(length(tm$chr) == 0){\n        p <- ggplot(data.frame(x = 1)) + geom_blank()\n    } else {\n        p <- ggplot(tm, aes(start, value))\n        \n        ## p <- p + geom_segment(aes(x=start, y=0, xend=end, yend= value))\n        if (isList) {\n            if (length(fill_color) == length(peak) && all(is_valid_color(fill_color))){\n                cols = fill_color\n            } else {\n                cols = generate_colors(fill_color, n = length(peak))\n            }\n            p <- p + geom_rect(aes(xmin = start, ymin = 0, xmax = end, ymax = value, fill = .id, color = .id)) +\n                scale_color_manual(values = cols) +\n                scale_fill_manual(values = cols)\n        } else {\n            p <- p + geom_rect(aes(xmin = start, ymin = 0, xmax = end, ymax = value), fill = fill_color, color = fill_color)\n        }\n        \n        if(length(unique(tm$chr)) > 1) {\n            p <- p + facet_grid(chr ~., scales=\"free\")\n        }\n        \n    }\n    \n    p <- p + theme_classic()\n    p <- p + labs(x = xlab, y = ylab, title = title, fill = NULL, color = NULL)\n    p <- p + scale_y_continuous(expand = c(0,0))\n    p <- p + theme(strip.text.y=element_text(angle=360))\n    p <- p + scale_x_continuous(labels = scales::label_number(scale_cut = scales::cut_si(\"\")))\n    \n    if (!is.null(xlim) && !all(is.na(xlim)) && is.numeric(xlim) && length(xlim) == 2) {\n        p <- p + xlim(xlim)\n    }\n    \n    return(p)\n}\n\n##' @import S4Vectors IRanges\n##' @importFrom dplyr group_by\n##' @importFrom dplyr summarise\n##' @importFrom magrittr %>%\ngetChrCov <- function(peak, weightCol, chrs, xlim, lower=1) {\n    if (is(peak, \"GRanges\")) {\n        peak.gr <- peak\n    } else if (file.exists(peak)) {\n        peak.gr <- readPeakFile(peak, as=\"GRanges\")\n    } else {\n        stop(\"peak should be a GRanges object or a peak file...\")\n    }\n\n    if ( is.null(weightCol)) {\n        peak.cov <- coverage(peak.gr)\n    } else {\n        weight <- mcols(peak.gr)[[weightCol]]\n        peak.cov <- coverage(peak.gr, weight=weight)\n    }\n\n    cov <- lapply(peak.cov, IRanges::slice, lower=lower)\n\n    get.runValue <- function(x) {\n        y <- runValue(x)\n        sapply(y@listData, mean)\n        ## value <- x@subject@values\n        ## value[value != 0]\n    }\n\n    chr <- start <- end <- cnt <- NULL\n    \n    ldf <- lapply(1:length(cov), function(i) {\n        x <- cov[[i]]\n        if (length(x@ranges) == 0) {\n            msg <- paste0(names(cov[i]),\n                          \" dosen't contain signal higher than \",\n                          lower)\n            message(msg)\n            return(NA)\n        }\n        data.frame(chr   = names(cov[i]),\n                   start = start(x),\n                   end   = end(x),\n                   cnt   = get.runValue(x)\n                                        # the following versions are more slower\n                                        # unlist(runValue(x)) \n                                        # sapply(x, runValue)\n                   )\n    })\n\n    ldf <- ldf[!is.na(ldf)]\n    df <- do.call(\"rbind\", ldf)\n    \n    chr.sorted <- sortChrName(as.character(unique(df$chr)))\n    df$chr <- factor(df$chr, levels=chr.sorted)\n    if (!is.null(chrs) && !all(is.na(chrs)) && all(chrs %in% chr.sorted)) {\n        df <- df[df$chr %in% chrs, ]\n    }\n    if (!is.null(xlim) && !all(is.na(xlim)) && is.numeric(xlim) && length(xlim) == 2) {\n        df <- df[df$start >= xlim[1] & df$end <= xlim[2],]\n    }\n\n    df2 <- group_by(df, chr, start, end) %>% summarise(value=sum(cnt), .groups = \"drop\")\n    return(df2)\n}\n\n# a simple `stringr::str_sort(numeric=TRUE)` implementation\nsortChrName <- function(chr.name, decreasing = FALSE) {\n    ## universal sort function, support organisms other than human\n    chr_part <- sub(\"^(\\\\D*)(\\\\d*)$\", \"\\\\1\", chr.name)\n    num_part <- as.numeric(sub(\"^(\\\\D*)(\\\\d*)$\", \"\\\\2\", chr.name))\n    chr.name[order(chr_part, num_part, decreasing = decreasing)]\n}\n\n\n"
  },
  {
    "path": "R/csAnno.R",
    "content": "##' Class \"csAnno\"\r\n##' This class represents the output of ChIPseeker Annotation\r\n##'\r\n##'\r\n##' @name csAnno-class\r\n##' @aliases csAnno-class\r\n##' show,csAnno-method vennpie,csAnno-method\r\n##' plotDistToTSS,csAnno-method plotAnnoBar,csAnno-method\r\n##' plotAnnoPie,csAnno-method upsetplot,csAnno-method\r\n##' subset,csAnno-method\r\n##'\r\n##' @docType class\r\n##' @slot anno annotation\r\n##' @slot tssRegion TSS region\r\n##' @slot level transcript or gene\r\n##' @slot hasGenomicAnnotation logical\r\n##' @slot detailGenomicAnnotation Genomic Annotation in detail\r\n##' @slot annoStat annotation statistics\r\n##' @slot peakNum number of peaks\r\n##' @exportClass csAnno\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\n##' @seealso \\code{\\link{annotatePeak}}\r\n##' @keywords classes\r\nsetClass(\"csAnno\",\r\n         representation=representation(\r\n             anno = \"GRanges\",\r\n             tssRegion = \"numeric\",\r\n             level = \"character\",\r\n             hasGenomicAnnotation = \"logical\",\r\n             detailGenomicAnnotation=\"data.frame\",\r\n             annoStat=\"data.frame\",\r\n             peakNum=\"numeric\"\r\n             ))\r\n\r\n\r\n##' convert csAnno object to GRanges\r\n##'\r\n##'\r\n##' @title as.GRanges\r\n##' @param x csAnno object\r\n##' @return GRanges object\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\n##' @export\r\nas.GRanges <- function(x) {\r\n    if (!is(x, \"csAnno\"))\r\n        stop(\"not supported...\")\r\n    return(x@anno)\r\n}\r\n\r\n##' getting status of annotation\r\n##' \r\n##' \r\n##' @title getAnnoStat\r\n##' @param x csAnno object\r\n##' @export\r\ngetAnnoStat <- function(x) {\r\n    if (!is(x, \"csAnno\"))\r\n        stop(\"not supported...\")\r\n    return(x@annoStat)\r\n}\r\n\r\n\r\n\r\n##' Combine csAnno Object\r\n##'\r\n##'\r\n##' https://github.com/YuLab-SMU/ChIPseeker/issues/157\r\n##' @title combine_csAnno\r\n##' @param x csAnno object\r\n##' @param ... csAnno objects\r\n##' @return csAnno object\r\n##' @export\r\ncombine_csAnno <- function(x, ...){\r\n    z <- list(x, ...)\r\n    \r\n    if(sum(vapply(z, function(x) !is(x, \"csAnno\"), FUN.VALUE = logical(1))) != 0){\r\n        stop(\"not supported...\")\r\n    }\r\n    \r\n    if(length(z)<2){\r\n        stop(\"need two or more csAnno object...\")\r\n    }\r\n    \r\n    \r\n    if(sum(!duplicated(lapply(z, function(x) x@tssRegion[1]))) != 1 \r\n       && sum(!duplicated(lapply(z, function(x) x@tssRegion[2]))) != 1){\r\n        stop(\"the tss regions of different csAnno objects should be the same...\")\r\n    }\r\n    \r\n    if(sum(!duplicated(lapply(z, function(x) x@level))) != 1){\r\n        stop(\"the level of different csAnno object should be the same...\")\r\n    }\r\n    \r\n    if(sum(!duplicated(lapply(z, function(x) x@hasGenomicAnnotation))) != 1){\r\n        stop(\"the status of GenomicAnnotation should be the same...\")\r\n    }\r\n    \r\n    combine_tssRegion <- x@tssRegion\r\n    combine_level <- x@level\r\n    combine_hasGenomicAnnotation <- x@hasGenomicAnnotation\r\n    \r\n    combine_anno <- x@anno\r\n    for(i in 2:length(z)){\r\n        combine_anno <- c(combine_anno,z[[i]]@anno)\r\n    }\r\n    \r\n    combine_detailGenomicAnnotation <- lapply(z, function(x) x@detailGenomicAnnotation)\r\n    combine_detailGenomicAnnotation <- do.call(\"rbind\",combine_detailGenomicAnnotation)\r\n    \r\n    combine_peakNum <- x@peakNum\r\n    for(i in 2:length(z)){\r\n        combine_peakNum <- combine_peakNum+z[[i]]@peakNum\r\n    }\r\n    \r\n    feature <- x@annoStat$Feature\r\n    for(i in 2:length(z)){\r\n        if(length(feature)<length(z[[i]]@annoStat$Feature)){\r\n            feature_levels <- levels(z[[i]]@annoStat$Feature)\r\n            feature <- c(as.vector(feature),as.vector(z[[i]]@annoStat$Feature))\r\n            feature <- feature[!duplicated(feature)]\r\n            feature <- factor(feature, \r\n                              levels = feature_levels)\r\n            feature <- sort(feature)\r\n        }else{\r\n            feature_levels <- levels(feature)\r\n            feature <- c(as.vector(feature),as.vector(z[[i]]@annoStat$Feature))\r\n            feature <- feature[!duplicated(feature)]\r\n            feature <- factor(feature, \r\n                              levels = feature_levels)\r\n            feature <- sort(feature)\r\n        }\r\n    }\r\n    \r\n    combine_annoStat <- data.frame(Feature=feature)\r\n    \r\n    for(i in 1:length(z)){\r\n        combine_annoStat <- merge(combine_annoStat, z[[i]]@annoStat, \r\n                                  by = \"Feature\", all = T, sort = F)\r\n        combine_annoStat[is.na(combine_annoStat)] <- 0\r\n        combine_annoStat <- combine_annoStat[order(combine_annoStat$Feature),]\r\n    }\r\n    \r\n    total <- (ncol(combine_annoStat)-1)*100\r\n    combine_annoStat$sum <- rowSums(combine_annoStat[, 2:ncol(combine_annoStat)])\r\n    \r\n    \r\n    for (i in 1:length(combine_annoStat$sum)) {\r\n        combine_annoStat$result[i] <- (combine_annoStat$sum[i]/total)*100\r\n    }\r\n    \r\n    annoStat_result <- data.frame(Feature=combine_annoStat[,1],Frequency=combine_annoStat[,ncol(combine_annoStat)])\r\n    \r\n    res <- new(\"csAnno\",\r\n               anno = combine_anno,\r\n               tssRegion = combine_tssRegion,\r\n               level = combine_level,\r\n               hasGenomicAnnotation = combine_hasGenomicAnnotation,\r\n               detailGenomicAnnotation = combine_detailGenomicAnnotation,\r\n               annoStat = annoStat_result,\r\n               peakNum = combine_peakNum\r\n    )\r\n    \r\n    return(res)\r\n}\r\n\r\n##' vennpie method generics\r\n##'\r\n##' @name vennpie\r\n##' @docType methods\r\n##' @rdname vennpie-methods\r\n##' \r\n##' @title vennpie method\r\n##' @param x A \\code{csAnno} instance\r\n##' @param r initial radius\r\n##' @param cex value to adjust legend\r\n##' @param ... additional parameter\r\n##' @return plot\r\n##' @usage vennpie(x, r = 0.2, cex=1.2, ...)\r\n##' @exportMethod vennpie\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\nsetMethod(\"vennpie\", signature(x=\"csAnno\"),\r\n          function(x, \r\n                   r = 0.2, \r\n                   cex = 1.2, \r\n                   ...) {\r\n            vennpie.csAnno(x, r, cex, ...)\r\n          }\r\n          )\r\n\r\n\r\n##' upsetplot method generics\r\n##'\r\n##' @name upsetplot\r\n##' @docType methods\r\n##' @rdname upsetplot-methods\r\n##'\r\n##' @title upsetplot method\r\n##' @param x A \\code{csAnno} instance\r\n##' @param ... additional parameter\r\n##' @return plot\r\n##' @usage upsetplot(x, ...)\r\n##' @importFrom enrichplot upsetplot\r\n##' @exportMethod upsetplot\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\nsetMethod(\"upsetplot\", signature(x=\"csAnno\"),\r\n          function(x, ...) {\r\n              upsetplot.csAnno(x, ...)\r\n          }\r\n          )\r\n\r\n##' convert csAnno object to data.frame\r\n##'\r\n##'\r\n##' @title as.data.frame.csAnno\r\n##' @param x csAnno object\r\n##' @param row.names row names\r\n##' @param optional should be omitted.\r\n##' @param ... additional parameters\r\n##' @return data.frame\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\n##' @method as.data.frame csAnno\r\n##' @export\r\nas.data.frame.csAnno <- function(x, row.names=NULL, optional=FALSE, ...) {\r\n    y <- as.GRanges(x)\r\n    if (!(is.null(row.names) || is.character(row.names)))\r\n        stop(\"'row.names' must be NULL or a character vector\")\r\n    df <- as.data.frame(y)\r\n    rownames(df) <- row.names\r\n    return(df)\r\n}\r\n\r\n##' show method for \\code{csAnno} instance\r\n##'\r\n##' @name show\r\n##' @docType methods\r\n##' @rdname show-methods\r\n##' @aliases show,csAnno,ANY-method\r\n##' @title show method\r\n##' @param object A \\code{csAnno} instance\r\n##' @return message\r\n##' @importFrom methods show\r\n##' @exportMethod show\r\n##' @usage show(object)\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\nsetMethod(\"show\", signature(object=\"csAnno\"),\r\n          function(object) {\r\n              cat(\"Annotated peaks generated by ChIPseeker\\n\")\r\n              cat(paste(length(object@anno), object@peakNum, sep=\"/\"),\r\n                  \" peaks were annotated\\n\")\r\n              if (object@hasGenomicAnnotation) {\r\n                  cat(\"Genomic Annotation Summary:\\n\")\r\n                  print(object@annoStat)\r\n              }\r\n          }\r\n          )\r\n\r\n##' plotAnnoBar method for list of \\code{csAnno} instances\r\n##'\r\n##' @name plotAnnoBar\r\n##' @docType methods\r\n##' @rdname plotAnnoBar-methods\r\n##' @aliases plotAnnoBar,list-method\r\n##' @exportMethod plotAnnoBar\r\nsetMethod(\"plotAnnoBar\", signature(x=\"list\"),\r\n          function(x,\r\n                   xlab=\"\",\r\n                   ylab='Percentage(%)',\r\n                   title=\"Feature Distribution\",\r\n                   ...) {\r\n              if (is.null(names(x))) {\r\n                  nn <- paste0(\"Peak\", seq_along(x))\r\n                  warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse = \" \"))\r\n                  names(x) <- nn\r\n                  ## stop(\"input object should be a named list...\")\r\n              }\r\n              anno <- lapply(x, getAnnoStat)\r\n              ## anno.df <- ldply(anno)\r\n              anno.df <- list_to_dataframe(anno)\r\n              categoryColumn <- \".id\"\r\n              plotAnnoBar.data.frame(anno.df, xlab, ylab, title, categoryColumn)\r\n          })\r\n\r\n##' plotAnnoBar method for \\code{csAnno} instance\r\n##'\r\n##' @name plotAnnoBar\r\n##' @docType methods\r\n##' @rdname plotAnnoBar-methods\r\n##' @aliases plotAnnoBar,csAnno,ANY-method\r\n##' @title plotAnnoBar method\r\n##' @param x \\code{csAnno} instance\r\n##' @param xlab xlab\r\n##' @param ylab ylab\r\n##' @param title title\r\n##' @param ... additional paramter\r\n##' @return plot\r\n##' @exportMethod plotAnnoBar\r\n##' @usage plotAnnoBar(x, xlab=\"\", ylab='Percentage(\\%)',title=\"Feature Distribution\", ...)\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\nsetMethod(\"plotAnnoBar\", signature(x=\"csAnno\"),\r\n          function(x,\r\n                   xlab=\"\",\r\n                   ylab=\"Percentage(%)\",\r\n                   title=\"Feature Distribution\",\r\n                   ...) {\r\n              anno.df <- getAnnoStat(x)\r\n              categoryColumn <- 1\r\n              plotAnnoBar.data.frame(anno.df, xlab, ylab, title, categoryColumn)\r\n          })\r\n\r\n\r\n\r\n##' plotAnnoPie method for \\code{csAnno} instance\r\n##'\r\n##' @name plotAnnoPie\r\n##' @docType methods\r\n##' @rdname plotAnnoPie-methods\r\n##' @aliases plotAnnoPie,csAnno,ANY-method\r\n##' @title plotAnnoPie method\r\n##' @param x \\code{csAnno} instance\r\n##' @param ndigit number of digit to round\r\n##' @param cex label cex\r\n##' @param col color\r\n##' @param legend.position topright or other.\r\n##' @param pie3D plot in 3D or not\r\n##' @param radius radius of the pie\r\n##' @param ... extra parameter\r\n##' @return plot\r\n##' @exportMethod plotAnnoPie\r\n##' @usage plotAnnoPie(x,ndigit=2,cex=0.9,col=NA,legend.position=\"rightside\",pie3D=FALSE,radius=0.8,...)\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\nsetMethod(\"plotAnnoPie\", signature(x=\"csAnno\"),\r\n          function(x,\r\n                   ndigit=2,\r\n                   cex=0.9,\r\n                   col=NA,\r\n                   legend.position=\"rightside\",\r\n                   pie3D=FALSE,\r\n                   radius=0.8,\r\n                   ...){\r\n              plotAnnoPie.csAnno(x, ndigit, cex, col, legend.position, pie3D, radius, ...)\r\n          })\r\n\r\n\r\n\r\n##' plotDistToTSS method for list of \\code{csAnno} instances\r\n##'\r\n##' @name plotDistToTSS\r\n##' @docType methods\r\n##' @rdname plotDistToTSS-methods\r\n##' @aliases plotDistToTSS,list-method\r\n##' @exportMethod plotDistToTSS\r\nsetMethod(\"plotDistToTSS\", signature(x=\"list\"),\r\n          function(x, distanceColumn=\"distanceToTSS\",\r\n                                     xlab=\"\", ylab=\"Binding sites (%) (5'->3')\",\r\n                                     title=\"Distribution of transcription factor-binding loci relative to TSS\",                       \r\n                     distanceBreaks=c(0, 1000, 3000, 5000, 10000, 100000),\r\n                     palette = NULL, ...) {\r\n              if (is.null(names(x))) {\r\n                  nn <- paste0(\"Peak\", seq_along(x))\r\n                  warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse = \" \"))\r\n                  names(x) <- nn\r\n                  ## stop(\"input object should be a named list...\")\r\n              }\r\n\r\n              peakAnno <- lapply(x, as.data.frame)\r\n              ## peakDist <- ldply(peakAnno)\r\n              peakDist <- list_to_dataframe(peakAnno)\r\n              categoryColumn <- \".id\"\r\n              plotDistToTSS.data.frame(peakDist, distanceColumn = distanceColumn,\r\n                                       distanceBreaks = distanceBreaks, palette = palette,\r\n                                       xlab = xlab, ylab = ylab, title = title, categoryColumn = categoryColumn)\r\n          })\r\n\r\n\r\n##' plotDistToTSS method for \\code{csAnno} instance\r\n##'\r\n##' @name plotDistToTSS\r\n##' @docType methods\r\n##' @rdname plotDistToTSS-methods\r\n##' @aliases plotDistToTSS,csAnno,ANY-method\r\n##' @title plotDistToTSS method\r\n##' @param distanceColumn distance column name\r\n##' @param distanceBreaks breaks of distance, default is 'c(0, 1000, 3000, 5000, 10000, 100000)'\r\n##' @param palette palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.\r\n##' @param x \\code{csAnno} instance\r\n##' @param xlab xlab\r\n##' @param ylab ylab\r\n##' @param title title\r\n##' @param ... additional parameter\r\n##' @return plot\r\n##' @exportMethod plotDistToTSS\r\n##' @usage plotDistToTSS(x,distanceColumn=\"distanceToTSS\", xlab=\"\",\r\n##' ylab=\"Binding sites (\\%) (5'->3')\",\r\n##' title=\"Distribution of transcription factor-binding loci relative to TSS\",...)\r\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\r\nsetMethod(\"plotDistToTSS\", signature(x=\"csAnno\"),\r\n          function(x, distanceColumn=\"distanceToTSS\",\r\n                                     xlab=\"\", ylab=\"Binding sites (%) (5'->3')\",\r\n                                     title=\"Distribution of transcription factor-binding loci relative to TSS\", \r\n                                     distanceBreaks=c(0, 1000, 3000, 5000, 10000, 100000),\r\n                                     palette = NULL,...) {\r\n              peakDist <- as.data.frame(x)\r\n              categoryColumn <- 1\r\n              plotDistToTSS.data.frame(peakDist, distanceColumn = distanceColumn, distanceBreaks = distanceBreaks, palette = palette,\r\n                                       xlab = xlab, ylab = ylab, title = title, categoryColumn = categoryColumn)\r\n          })\r\n\r\n"
  },
  {
    "path": "R/dplyr-verb.R",
    "content": "# extend filter to Peak (GRanges class object)\n#' @method filter GRanges\n#' @importFrom dplyr filter\n#' @export\nfilter.GRanges = function(.data, ..., .by = NULL, .preserve = FALSE) {\n  dots = rlang::quos(...)\n  as.data.frame(.data) |> \n    dplyr::filter(!!!dots, .by = .by, .preserve = .preserve) |> \n    droplevels() |> \n    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)\n}\n\n# extend mutate to Peak (GRanges class object)\n#' @method mutate GRanges\n#' @importFrom dplyr mutate\n#' @export\nmutate.GRanges = function(.data, ..., .by = NULL, \n                           .keep = c(\"all\", \"used\", \"unused\", \"none\"),\n                           .before = NULL,\n                           .after = NULL) {\n  dots = rlang::quos(...)\n  df = as.data.frame(.data)\n  \n  if (!is.null(.before) && !is.null(.after)) {\n    stop(\"You can't supply both `.before` and `.after`.\")\n  }\n  \n  if (!is.null(.before)) {\n    df = df |> \n      dplyr::mutate(!!!dots, .by = .by, .keep = .keep, .before = .before)\n  } else if (!is.null(.after)) {\n    df = df |> \n      dplyr::mutate(!!!dots, .by = .by, .keep = .keep, .after = .after)\n  } else {\n    df = df |> dplyr::mutate(!!!dots, .by = .by, .keep = .keep)\n  }\n  \n  df |> \n    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)\n}\n\n# S4Vectors::rename\n#' @method rename GRanges\n#' @importFrom rlang quos\n#' @export\nrename.GRanges = function(x, ...){\n  dots = rlang::quos(...)\n  as.data.frame(x) |> \n    dplyr::rename(!!!dots) |> \n    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)\n}\n\n#' @method arrange GRanges\n#' @importFrom dplyr arrange\n#' @export\narrange.GRanges = function(.data, ..., .by_group = FALSE){\n  dots = rlang::quos(...)\n  as.data.frame(.data) |> \n    dplyr::arrange(!!!dots, .by_group = .by_group) |> \n    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)\n}\n\n\n"
  },
  {
    "path": "R/enrichOverlap.R",
    "content": "##' calcuate overlap significant of ChIP experiments based on their nearest gene annotation\n##'\n##'\n##' @title enrichAnnoOverlap\n##' @param queryPeak query bed file\n##' @param targetPeak target bed file(s) or folder containing bed files\n##' @param TxDb TxDb\n##' @param pAdjustMethod pvalue adjustment method\n##' @param chainFile chain file for liftOver\n##' @param distanceToTSS_cutoff restrict nearest gene annotation by distance cutoff\n##' @return data.frame\n##' @importFrom stats p.adjust\n##' @importFrom stats phyper\n##' @export\n##' @importFrom rtracklayer import.chain\n##' @importFrom rtracklayer liftOver\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\n##' @author G Yu\nenrichAnnoOverlap <- function(queryPeak, targetPeak, TxDb=NULL, pAdjustMethod=\"BH\", chainFile=NULL, distanceToTSS_cutoff=NULL) {\n\n    TxDb <- loadTxDb(TxDb)\n\n    query.anno <- annotatePeak(queryPeak, TxDb=TxDb,\n                               assignGenomicAnnotation=FALSE, annoDb=NULL, verbose=FALSE)\n\n\n    if (is(targetPeak[1], \"GRanges\") || is(targetPeak[[1]], \"GRanges\")) {\n        target.gr <- targetPeak\n        targetFiles <- NULL\n    } else {\n        targetFiles <- parse_targetPeak_Param(targetPeak)\n        target.gr <- lapply(targetFiles, loadPeak)\n    }\n\n    if (!is.null(chainFile)) {\n        chain <- import.chain(chainFile)\n        target.gr <- lapply(target.gr, liftOver, chain=chain)\n    }\n\n    target.anno <- lapply(target.gr, annotatePeak, TxDb=TxDb,\n                          assignGenomicAnnotation=FALSE, annoDb=NULL, verbose=FALSE)\n\n\n    if (!is.null(distanceToTSS_cutoff)) {\n        query.anno <- dropAnno(query.anno, distanceToTSS_cutoff)\n        target.anno <- lapply(target.anno, dropAnno, distanceToTSS_cutoff = distanceToTSS_cutoff)\n    }\n\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n    features <- get_cache_element(item = ChIPseekerCache, elements = \"Transcripts\")\n\n    if(is.null(features)){\n        features <- transcriptsBy(TxDb)\n        features <- unlist(features)\n        update_cache_item(item = ChIPseekerCache, list(\"Transcripts\" = features))\n    }\n\n    # if ( exists(\"Transcripts\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n    #     features <- get(\"Transcripts\", envir=ChIPseekerEnv)\n    # } else {\n    #     features <- transcriptsBy(TxDb)\n    #     features <- unlist(features)\n    #     assign(\"Transcripts\", features, envir=ChIPseekerEnv)\n    # }\n\n    ol <- lapply(target.anno, function(i) unique(intersect(as.GRanges(query.anno)$geneId, as.GRanges(i)$geneId)))\n    oln <- unlist(lapply(ol, length))\n    N <- length(features)\n    ## white ball\n    m <- length(unique(as.GRanges(query.anno)$geneId))\n    ## black ball\n    n <- N - m\n    ## drawn\n    k <- unlist(lapply(target.anno, function(i) length(unique(as.GRanges(i)$geneId))))\n    p <- phyper(oln, m, n, k, lower.tail=FALSE)\n\n\n    if (is(queryPeak, \"GRanges\")) {\n        qSample <- \"queryPeak\"\n    } else {\n        qSample <- basename(queryPeak)\n    }\n\n    if (is.null(targetFiles)) {\n        tSample <- names(target.gr)\n        if(is.null(tSample)) {\n            tSample <- paste0(\"targetPeak\", seq_along(target.gr))\n        }\n    } else {\n        tSample <- basename(targetFiles)\n    }\n\n    padj <- p.adjust(p, method=pAdjustMethod)\n    res <- data.frame(qSample=qSample,\n                      tSample=tSample,\n                      qLen=length(unique(as.GRanges(query.anno)$geneId)),\n                      tLen=unlist(lapply(target.anno, function(i) length(unique(as.GRanges(i)$geneId)))),\n                      N_OL=oln,\n                      pvalue=p,\n                      p.adjust=padj)\n    return(res)\n}\n\n##' calculate overlap significant of ChIP experiments based on the genome coordinations\n##'\n##'\n##' @title enrichPeakOverlap\n##' @param queryPeak query bed file or GRanges object\n##' @param targetPeak target bed file(s) or folder that containing bed files or a list of GRanges objects\n##' @param TxDb TxDb\n##' @param pAdjustMethod pvalue adjustment method\n##' @param nShuffle shuffle numbers\n##' @param chainFile chain file for liftOver\n##' @param pool logical, whether pool target peaks\n##' @param mc.cores number of cores, see \\link[parallel]{mclapply}\n##' @param verbose logical\n##' @return data.frame\n##' @export\n##' @importFrom rtracklayer import.chain\n##' @importFrom rtracklayer liftOver\n##' @author G Yu\nenrichPeakOverlap <- function(queryPeak, targetPeak, TxDb=NULL, pAdjustMethod=\"BH\", nShuffle=1000,\n                              chainFile=NULL, pool=TRUE, mc.cores=detectCores()-1, verbose=TRUE) {\n    TxDb <- loadTxDb(TxDb)\n    query.gr <- loadPeak(queryPeak)\n    if (is(targetPeak[1], \"GRanges\") || is(targetPeak[[1]], \"GRanges\")) {\n        target.gr <- targetPeak\n        targetFiles <- NULL\n    } else {\n        targetFiles <- parse_targetPeak_Param(targetPeak)\n        target.gr <- lapply(targetFiles, loadPeak)\n    }\n\n    if (!is.null(chainFile)) {\n        chain <- import.chain(chainFile)\n        target.gr <- lapply(target.gr, liftOver, chain=chain)\n    }\n\n    if (pool) {\n        p.ol <- enrichOverlap.peak.internal(query.gr, target.gr, TxDb, nShuffle,\n                                            mc.cores=mc.cores,verbose=verbose)\n    } else {\n        res_list <- lapply(1:length(target.gr), function(i) {\n            enrichPeakOverlap(queryPeak = queryPeak,\n                              targetPeak = target.gr[i],\n                              TxDb = TxDb,\n                              pAdjustMethod = pAdjustMethod,\n                              nShuffle = nShuffle,\n                              chainFile = chainFile,\n                              mc.cores = mc.cores,\n                              verbose = verbose)\n        })\n        res <- do.call(\"rbind\", res_list)\n        return(res)\n    }\n\n    if (is.null(p.ol$pvalue)) {\n        p <- padj <- NA\n    } else {\n        p <- p.ol$pvalue\n        padj <- p.adjust(p, method=pAdjustMethod)\n    }\n\n    ol <- p.ol$overlap\n\n\n    if (is(queryPeak, \"GRanges\")) {\n        qSample <- \"queryPeak\"\n    } else {\n        ## remove path, only keep file name\n        qSample <- basename(queryPeak)\n    }\n\n    if (is.null(targetFiles)) {\n        tSample <- names(target.gr)\n        if(is.null(tSample)) {\n            tSample <- paste0(\"targetPeak\", seq_along(target.gr))\n        }\n    } else {\n        tSample <- basename(targetFiles)\n    }\n\n    res <- data.frame(qSample=qSample,\n                      tSample=tSample,\n                      qLen=length(query.gr),\n                      tLen=unlist(lapply(target.gr, length)),\n                      N_OL=ol,\n                      pvalue=p,\n                      p.adjust=padj)\n\n    return(res)\n}\n\n\n\n##' shuffle the position of peak\n##'\n##'\n##' @title shuffle\n##' @param peak.gr GRanges object\n##' @param TxDb TxDb\n##' @return GRanges object\n##' @export\n##' @author G Yu\nshuffle <- function(peak.gr, TxDb) {\n    chrLens <- seqlengths(TxDb)[names(seqlengths(peak.gr))]\n    nn <- as.vector(seqnames(peak.gr))\n    ii <- order(nn)\n    w <- width(peak.gr)\n    nnt <- table(nn)\n    jj <- order(names(nnt))\n    nnt <- nnt[jj]\n    chrLens <- chrLens[jj]\n    ss <- unlist(sapply(1:length(nnt), function(i) sample(chrLens[i],nnt[i])))\n\n    res <- GRanges(seqnames=nn[ii], ranges=IRanges(ss, width=w[ii]), strand=\"*\")\n    return(res)\n}\n\n\n\n\n##' @import GenomeInfoDb\n##' @importFrom utils txtProgressBar\n##' @importFrom utils setTxtProgressBar\n##' @importFrom parallel mclapply\n##' @importFrom parallel detectCores\nenrichOverlap.peak.internal <- function(query.gr, target.gr, TxDb, nShuffle=1000, mc.cores=detectCores()-1, verbose=TRUE) {\n    if (verbose) {\n        cat(\">> permutation test of peak overlap...\\t\\t\",\n            format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    }\n\n    idx <- sample(1:length(target.gr), nShuffle, replace=TRUE)\n    len <- unlist(lapply(target.gr, length))\n\n    if(Sys.info()[1] == \"Windows\") {\n        qLen <- lapply(target.gr, function(tt) {\n            length(intersect(query.gr, tt))\n        })\n    } else {\n        qLen <- mclapply(target.gr, function(tt) {\n            length(intersect(query.gr, tt))\n        }, mc.cores=mc.cores\n                         )\n    }\n    qLen <- unlist(qLen)\n    ## query ratio\n    qr <- qLen/len\n\n    if (nShuffle < 1) {\n        res <- list(pvalue=NULL, overlap=qLen)\n        return(res)\n    }\n\n    if (verbose) {\n        pb <- txtProgressBar(min=0, max=nShuffle, style=3)\n    }\n    if(Sys.info()[1] == \"Windows\") {\n        rr <- lapply(seq_along(idx), function(j) {\n            if (verbose) {\n                setTxtProgressBar(pb, j)\n            }\n            i <- idx[j]\n            tarShuffle <- shuffle(target.gr[[i]], TxDb)\n            length(intersect(query.gr, tarShuffle))/len[i]\n        })\n    } else {\n        rr <- mclapply(seq_along(idx), function(j) {\n            if (verbose) {\n                setTxtProgressBar(pb, j)\n            }\n            i <- idx[j]\n            tarShuffle <- shuffle(target.gr[[i]], TxDb)\n            length(intersect(query.gr, tarShuffle))/len[i]\n        }, mc.cores=mc.cores\n                       )\n    }\n\n    if (verbose) {\n        close(pb)\n    }\n\n    rr <- unlist(rr) ## random ratio\n\n    ## p <- lapply(qr, function(q) mean(rr>q))\n    p <- lapply(qr, function(q) (sum(rr>q)+1)/(length(rr)+1))\n    res <- list(pvalue=unlist(p), overlap=qLen)\n    return(res)\n}\n\n"
  },
  {
    "path": "R/getFlankingGene.R",
    "content": "\n##' @import IRanges\n##' @importFrom dplyr mutate\n##' @importFrom dplyr group_by\ngetAllFlankingGene <- function(peak.gr, features, level=\"transcript\", distance=5000) {\n    peak.gr2 <- peak.gr\n    start(ranges(peak.gr)) = start(ranges(peak.gr)) - distance\n    end(ranges(peak.gr)) = end(ranges(peak.gr)) + distance\n    hit <- findOverlaps(peak.gr, unstrand(features))\n    qh <- queryHits(hit)\n    sh <- subjectHits(hit)\n    \n    featureHit <- features[sh]\n    names(featureHit)=NULL\n    hitInfo <- as.data.frame(featureHit)\n\n    if (level == \"transcript\") {\n        eg <- TXID2EG(featureHit$tx_id, geneIdOnly=TRUE)\n        hitInfo$geneId <- eg\n    } else {\n        cn <- colnames(hitInfo)\n        colnames(hitInfo)[cn == \"gene_id\"] <- \"geneId\"\n    }\n\n\n    hitInfo$peakIdx <- qh\n\n    overlapHit <- findOverlaps(peak.gr2, unstrand(featureHit))\n    hitInfo$distance <- NA\n    hitInfo$distance[subjectHits(overlapHit)] <- 0\n\n    psD <- ifelse(strand(featureHit) == \"+\",\n                  start(peak.gr2[qh]) - start(featureHit),\n                  end(featureHit)-end(peak.gr2[qh]))\n    \n    peD <- ifelse(strand(featureHit) == \"+\",\n                  end(peak.gr2[qh]) - start(featureHit),\n                  end(featureHit)-start(peak.gr2[qh]))\n\n    idx <- abs(psD) > abs(peD)\n    dd <- psD\n    dd[idx] <- peD[idx]\n\n    ii <- is.na(hitInfo$distance)\n    hitInfo$distance[ii] <- dd[ii]\n\n    peakIdx <- tx_name <- geneId <- distance <- NULL\n\n    if (level == \"transcript\") {\n        hitInfo2 <- group_by(hitInfo, peakIdx) %>%\n            mutate(flank_txIds=paste(tx_name, collapse=\";\"),\n                   flank_geneIds=paste(geneId, collapse=\";\"),\n                   flank_gene_distances=paste(distance, collapse=\";\"))\n        res <- hitInfo2[,c(\"peakIdx\", \"flank_txIds\", \"flank_geneIds\", \"flank_gene_distances\")]\n        res$flank_txIds <- as.character(res$flank_txIds)\n    } else {\n        hitInfo2 <- group_by(hitInfo, peakIdx) %>%\n            mutate(flank_geneIds=paste(geneId, collapse=\";\"),\n                   flank_gene_distances=paste(distance, collapse=\";\"))\n        res <- hitInfo2[,c(\"peakIdx\", \"flank_geneIds\", \"flank_gene_distances\")]\n    }\n\n    res <- unique(res)\n    res$flank_geneIds <- as.character(res$flank_geneIds)\n    res$flank_gene_distances <- as.character(res$flank_gene_distances)\n    \n    return(res)    \n}\n"
  },
  {
    "path": "R/getGenomicAnnotation.R",
    "content": "updateGenomicAnnotation <- function(peaks, genomicRegion, type, anno, sameStrand=FALSE) {\n    hits <- getGenomicAnnotation.internal(peaks, genomicRegion, type, sameStrand=sameStrand)\n    if (length(hits) > 1) {\n        hitIndex <- hits$queryIndex\n        anno[[\"annotation\"]][hitIndex] <- hits$annotation\n        anno[[\"detailGenomicAnnotation\"]][hitIndex, type] <- TRUE\n    }\n    return(anno)\n}\n\n\n##' get Genomic Annotation of peaks\n##'\n##'\n##' @title getGenomicAnnotation\n##' @param peaks peaks in GRanges object\n##' @param distance distance of peak to TSS\n##' @param tssRegion tssRegion, default is -3kb to +3kb\n##' @param TxDb TxDb object\n##' @param level one of gene or transcript\n##' @param genomicAnnotationPriority genomic Annotation Priority\n##' @param sameStrand whether annotate gene in same strand\n##' @importFrom GenomicFeatures threeUTRsByTranscript\n##' @importFrom GenomicFeatures fiveUTRsByTranscript\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\n##' @return character vector\n##' @author G Yu\ngetGenomicAnnotation <- function(peaks,\n                                 distance,\n                                 tssRegion=c(-3000, 3000),\n                                 TxDb,\n                                 level,\n                                 genomicAnnotationPriority,\n                                 sameStrand = FALSE\n                                 ) {\n\n    ##\n    ## since some annotation overlap,\n    ## a priority is assign based on *genomicAnnotationPriority*\n    ## use the following priority by default:\n    ##\n    ## 1. Promoter\n    ## 2. 5' UTR\n    ## 3. 3' UTR\n    ## 4. Exon\n    ## 5. Intron\n    ## 6. Downstream\n    ## 7. Intergenic\n    ##\n\n\n\n    .ChIPseekerEnv(TxDb, item = ChIPseekerCache)\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n\n    annotation <- rep(NA, length(distance))\n\n    flag <- rep(FALSE, length(distance))\n    detailGenomicAnnotation <- data.frame(\n        genic=flag,\n        Intergenic=flag,\n        Promoter=flag,\n        fiveUTR=flag,\n        threeUTR=flag,\n        Exon=flag,\n        Intron=flag,\n        downstream=flag,\n        distal_intergenic=flag)\n\n    anno <- list(annotation=annotation,\n                 detailGenomicAnnotation=detailGenomicAnnotation)\n\n    genomicAnnotationPriority <- rev(genomicAnnotationPriority)\n    for (AP in genomicAnnotationPriority) {\n        if (AP == \"Intron\") {\n            ## Introns\n            # intronList <- get_intronList(ChIPseekerEnv)\n            intronList <- get_intronList(item = ChIPseekerCache)\n            anno <- updateGenomicAnnotation(peaks, intronList, \"Intron\", anno, sameStrand=sameStrand)\n        } else if (AP == \"Exon\") {\n            ## Exons\n            # exonList <- get_exonList(ChIPseekerEnv)\n            exonList <- get_exonList(item = ChIPseekerCache)\n            anno <- updateGenomicAnnotation(peaks, exonList, \"Exon\", anno, sameStrand=sameStrand)\n        } else if (AP == \"3UTR\") {\n            ## 3' UTR Exons\n            threeUTRList <- get_cache_element(item = ChIPseekerCache, elements = \"threeUTRList\")\n\n            if(is.null(threeUTRList)){\n                threeUTRList <- threeUTRsByTranscript(TxDb)\n                update_cache_item(item = ChIPseekerCache, list(\"threeUTRList\" = threeUTRList))\n            }\n\n            # if ( exists(\"threeUTRList\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n            #     threeUTRList <- get(\"threeUTRList\", envir=ChIPseekerEnv)\n            # } else {\n            #     threeUTRList <- threeUTRsByTranscript(TxDb)\n            #     assign(\"threeUTRList\", threeUTRList, envir=ChIPseekerEnv)\n            # }\n            anno <- updateGenomicAnnotation(peaks, threeUTRList, \"threeUTR\", anno, sameStrand=sameStrand)\n        } else if (AP == \"5UTR\") {\n            ## 5' UTR Exons\n            fiveUTRList <- get_cache_element(item = ChIPseekerCache, elements = \"fiveUTRList\")\n\n            if(is.null(fiveUTRList)){\n                fiveUTRList <- fiveUTRsByTranscript(TxDb)\n                update_cache_item(item = ChIPseekerCache, list(\"fiveUTRList\" = fiveUTRList))\n            }\n\n            # if ( exists(\"fiveUTRList\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n            #     fiveUTRList <- get(\"fiveUTRList\", envir=ChIPseekerEnv)\n            # } else {\n            #     fiveUTRList <- fiveUTRsByTranscript(TxDb)\n            #     assign(\"fiveUTRList\", fiveUTRList, envir=ChIPseekerEnv)\n            # }\n            anno <- updateGenomicAnnotation(peaks, fiveUTRList, \"fiveUTR\", anno, sameStrand=sameStrand)\n        } else if (AP == \"Promoter\") {\n            annotation <- anno[[\"annotation\"]]\n            ## detailGenomicAnnotation <- anno[[\"detailGenomicAnnotation\"]]\n\n            ## TSS\n            tssIndex <- distance >= tssRegion[1] & distance <= tssRegion[2]\n            annotation[tssIndex] <- \"Promoter\"\n            anno$detailGenomicAnnotation[tssIndex, \"Promoter\"] <- TRUE\n\n            pm <- max(abs(tssRegion))\n            if (pm/1000 >= 2) {\n                dd <- seq(1:ceiling(pm/1000))*1000\n                for (i in 1:length(dd)) {\n                    if (i == 1) {\n                        lbs <- paste(\"Promoter\", \" (<=\", dd[i]/1000, \"kb)\", sep=\"\")\n                        annotation[abs(distance) <= dd[i] &\n                                   annotation == \"Promoter\"] <- lbs\n                    } else {\n                        lbs <- paste(\"Promoter\", \" (\", dd[i-1]/1000, \"-\", dd[i]/1000, \"kb)\", sep=\"\")\n                        annotation[abs(distance) <= dd[i] &\n                                   abs(distance) > dd[i-1] &\n                                   annotation == \"Promoter\"] <- lbs\n                    }\n                }\n            }\n            anno[[\"annotation\"]] <- annotation\n        } else {\n            ## Intergenic\n            annotation[is.na(annotation)] <- \"Intergenic\"\n            anno[[\"annotation\"]] <- annotation\n        }\n    }\n\n    annotation <- anno[[\"annotation\"]]\n    detailGenomicAnnotation <- anno[[\"detailGenomicAnnotation\"]]\n    genicIndex <- which(apply(detailGenomicAnnotation[, c(\"Exon\", \"Intron\")], 1, any))\n    detailGenomicAnnotation[-genicIndex, \"Intergenic\"] <- TRUE\n    detailGenomicAnnotation[genicIndex, \"genic\"] <- TRUE\n\n    ## intergenicIndex <- anno[[\"annotation\"]] == \"Intergenic\"\n    ## anno[[\"detailGenomicAnnotation\"]][intergenicIndex, \"Intergenic\"] <- TRUE\n    ## anno[[\"detailGenomicAnnotation\"]][!intergenicIndex, \"genic\"] <- TRUE\n\n\n    features <- getGene(TxDb, by=level)\n\n    ## nearest from gene end\n    if (sameStrand) {\n        idx <- follow(peaks, features)\n    } else {\n        idx <- follow(peaks, unstrand(features))\n    }\n    \n    na.idx <- which(is.na(idx))\n    if (length(na.idx)) {\n        idx <- idx[-na.idx]\n        peaks <- peaks[-na.idx]\n    }\n    \n    peF <- features[idx]\n    dd <- ifelse(strand(peF) == \"+\",\n\t\t start(peaks) - end(peF),\n\t\t end(peaks) - start(peF))\n    \n    if (length(na.idx)) {\n        dd2 <- numeric(length(idx) + length(na.idx))\n        dd2[-na.idx] <- dd\n    } else {\n        dd2 <- dd\n    }\n\n    dsd <- getOption(\"ChIPseeker.downstreamDistance\")\n    if (is.null(dsd))\n\t    dsd <- 3000 ## downstream 3k by default\n\n    ## downstream within dsd\n    if(dsd/1000<=1){\n        j <- which(annotation == \"Intergenic\" & abs(dd2) <= dsd & dd2 != 0)\n        if(length(j)>0){\n            lbs <- paste(\"Downstream (<=\", dsd, \"bp)\", sep=\"\")\n            annotation[j] <- lbs\n        }\n    }else{\n        \n        ## downstream within 0-dsd/1000 kb\n        for(i in 1:(dsd/1000)){\n            j <- which(annotation == \"Intergenic\" & abs(dd2) <= i*1000 & dd2 != 0)\n            if (length(j) > 0){\n                if (i == 1){\n                    lbs <- \"Downstream (<1kb)\"\n                }else{\n                    lbs <- paste(\"Downstream (\", i-1, \"-\", i, \"kb)\", sep=\"\")\n                }\n\t\tannotation[j] <- lbs\n            }\n        }\n        \n        ## downstream (dsd/1000) kb - dsd bp\n        z <- which(annotation == \"Intergenic\" & abs(dd2) <= dsd & dd2 != 0)\n        if(length(z)>0){\n            lbs <- paste(\"Downstream (\",dsd/1000,\"kb-\", dsd, \"bp)\", sep=\"\")\n            annotation[z] <- lbs\n        }\n    }\n    annotation[which(annotation == \"Intergenic\")] = \"Distal Intergenic\"\n\n    downstreamIndex <- dd2 > 0 & dd2 < dsd\n    detailGenomicAnnotation[downstreamIndex, \"downstream\"] <- TRUE\n    detailGenomicAnnotation[which(annotation == \"Distal Intergenic\"), \"distal_intergenic\"] <- TRUE\n    return(list(annotation=annotation, detailGenomicAnnotation=detailGenomicAnnotation))\n}\n\n\n##' @import BiocGenerics S4Vectors IRanges\ngetGenomicAnnotation.internal <- function(peaks, genomicRegion, type, sameStrand=FALSE){\n    GRegion <- unlist(genomicRegion)\n    GRegionLen <- elementNROWS(genomicRegion)\n\n    names(GRegionLen) <- names(genomicRegion)\n    GRegion$gene_id <- rep(names(genomicRegion), times=GRegionLen)\n\n\n    if (type == \"Intron\") {\n        gr2 <- GRegion[!duplicated(GRegion$gene_id)]\n        strd <- as.character(strand(gr2))\n        len <- GRegionLen[GRegionLen != 0]\n\n        GRegion$intron_rank <- lapply(seq_along(strd), function(i) {\n            rank <- seq(1, len[i])\n            if (strd[i] == '-')\n                rank <- rev(rank)\n            return(rank)\n        }) %>% unlist\n    }\n\n    if (type == \"Intron\" || type ==\"Exon\") {\n        nn <- TXID2EG(names(genomicRegion))\n        names(GRegionLen) <- nn\n        GRegion$gene_id <- rep(nn, times=GRegionLen)\n    }\n\n    ## find overlap\n    if (sameStrand) {\n        GRegionHit <- findOverlaps(peaks, GRegion)\n    } else {\n        GRegionHit <- findOverlaps(peaks, unstrand(GRegion))\n    }\n\n    if (length(GRegionHit) == 0) {\n        return(NA)\n    }\n    qh <- queryHits(GRegionHit)\n    hit.idx <- getFirstHitIndex(qh)\n    GRegionHit <- GRegionHit[hit.idx]\n    queryIndex <- queryHits(GRegionHit)\n    subjectIndex <- subjectHits(GRegionHit)\n\n    hits <- GRegion[subjectIndex]\n    geneID <- hits$gene_id\n\n    if (type == \"Intron\") {\n        anno <- paste(type, \" (\", geneID, \", intron \", hits$intron_rank,\n                      \" of \", GRegionLen[geneID], \")\", sep=\"\")\n    } else if (type == \"Exon\") {\n        anno <- paste(type, \" (\", geneID, \", exon \", hits$exon_rank,\n                      \" of \", GRegionLen[geneID], \")\", sep=\"\")\n    } else if (type == \"fiveUTR\") {\n        anno <- \"5' UTR\"\n    } else if (type == \"threeUTR\") {\n        anno <- \"3' UTR\"\n    } else {\n        anno <- type\n    }\n    res <- list(queryIndex=queryIndex, annotation=anno, gene=geneID)\n    return(res)\n}\n"
  },
  {
    "path": "R/getNearestFeatureIndicesAndDistances.R",
    "content": "##' get index of features that closest to peak and calculate distance\n##'\n##'\n##' @title getNearestFeatureIndicesAndDistances\n##' @param peaks peak in GRanges\n##' @param features features in GRanges\n##' @param sameStrand logical, whether find nearest gene in the same strand\n##' @param ignoreOverlap logical, whether ignore overlap of TSS with peak\n##' @param ignoreUpstream logical, if True only annotate gene at the 3' of the peak.\n##' @param ignoreDownstream logical, if True only annotate gene at the 5' of the peak.\n##' @param overlap one of \"TSS\" or \"all\"\n##' @return list\n##' @import BiocGenerics IRanges GenomicRanges\n##' @author G Yu\ngetNearestFeatureIndicesAndDistances <- function(peaks, features,\n                                                 sameStrand = FALSE,\n                                                 ignoreOverlap=FALSE,\n                                                 ignoreUpstream=FALSE,\n                                                 ignoreDownstream=FALSE,\n                                                 overlap = \"TSS\") {\n\n    overlap <- match.arg(overlap, c(\"TSS\", \"all\"))\n\n    if (!ignoreOverlap && overlap == \"all\") {\n        overlap_hit <- findOverlaps(peaks, unstrand(features))\n    }\n\n    ## peaks only conatin all peak records, in GRanges object\n    ## feature is the annotation in GRanges object\n\n    ## only keep start position based on strand\n    ## start(features) <- end(features) <- ifelse(strand(features) == \"+\", start(features), end(features))\n    features <- resize(features, width=1) # faster\n\n    ## add dummy NA feature for peaks that are at the last or first feature\n    ## suggested by Michael Kluge\n    features.bak <- features\n    seqlevels(features) <- c(seqlevels(features), \"chrNA\")\n    dummy <- GRanges(\"chrNA\", IRanges(1,1))\n\n    ## dummy$tx_id <- -1\n    ## dummy$tx_name <- \"NA\"\n\n    cns <- names(mcols(features))\n    for (cn in cns) {\n        if (grepl('id', cn)) {\n            mcols(dummy)[[cn]] <- -1\n        } else {\n            mcols(dummy)[[cn]] <- NA\n        }\n    }\n\n    features <- append(features, dummy)\n    dummyID <- length(features)\n\n    if (sameStrand) {\n        ## nearest from peak start\n        ps.idx <- follow(peaks, features)\n\n        ## nearest from peak end\n        pe.idx <- precede(peaks, features)\n    } else {\n        ps.idx <- follow(peaks, unstrand(features))\n        pe.idx <- precede(peaks, unstrand(features))\n    }\n\n    na.idx <- is.na(ps.idx) & is.na(pe.idx)\n    if (sum(na.idx) > 0) { ## suggested by Thomas Schwarzl\n        ps.idx <- ps.idx[!na.idx]\n        pe.idx <- pe.idx[!na.idx]\n        ##peaks <- peaks[!na.idx]\n    }\n\n    # set NA values to dummy value if only one entry is affected\n    ps.idx[is.na(ps.idx)] <- dummyID\n    pe.idx[is.na(pe.idx)] <- dummyID\n\n    ## features from nearest peak start\n    psF <- features[ps.idx]\n\n    ## feature distances from peak start\n    psD <- ifelse(strand(psF) == \"+\", 1, -1) *\n        (start(peaks[!na.idx]) - start(psF))\n    psD[ps.idx == dummyID] <- Inf # ensure that there is even no match if a seq with name \"chrNA\" exists\n\n    ## features from nearest peak end\n    peF <- features[pe.idx]\n    ## feature distances from peak end\n    peD <- ifelse(strand(peF) == \"+\", 1, -1) *\n        (end(peaks[!na.idx]) - start(peF))\n    peD[pe.idx == dummyID] <- Inf # ensure that there is even no match if a seq with name \"chrNA\" exists\n\n    ## restore the old feature object\n    features <- features.bak\n\n    pse <- data.frame(ps=psD, pe=peD)\n    if (ignoreUpstream) {\n        j <- rep(2, nrow(pse))\n    } else if (ignoreDownstream) {\n        j <- rep(1, nrow(pse))\n    } else {\n        j <- apply(pse, 1, function(i) which.min(abs(i)))\n    }\n\n    ## index\n    idx <- ps.idx\n    idx[j==2] <- pe.idx[j==2]\n\n    ## distance\n    dd <- psD\n    dd[j==2] <- peD[j==2]\n\n    index <- distanceToTSS <- rep(NA, length(peaks))\n    distanceToTSS[!na.idx] <- dd\n    index[!na.idx] <- idx\n\n    if (!ignoreOverlap) {\n        ## hit <- findOverlaps(peaks, unstrand(features))\n\n        if (overlap == \"all\") {\n            hit <- overlap_hit\n            if ( length(hit) != 0 ) {\n                qh <- queryHits(hit)\n                hit.idx <- getFirstHitIndex(qh)\n                hit <- hit[hit.idx]\n                peakIdx <- queryHits(hit)\n                featureIdx <- subjectHits(hit)\n\n                index[peakIdx] <- featureIdx\n                distance_both_end <- data.frame(start=start(peaks[peakIdx]) - start(features[featureIdx]),\n                                          end = end(peaks[peakIdx]) - start(features[featureIdx]))\n                distance_idx <- apply(distance_both_end, 1, function(i) which.min(abs(i)))\n                distance_minimal <- distance_both_end[,1]\n                distance_minimal[distance_idx == 2] <- distance_both_end[distance_idx==2, 2]\n\n                distanceToTSS[peakIdx] <- distance_minimal * ifelse(strand(features[featureIdx]) == \"+\", 1, -1)\n\n            }\n        }\n\n        hit <- findOverlaps(peaks, unstrand(features))\n\n        if ( length(hit) != 0 ) {\n            qh <- queryHits(hit)\n            hit.idx <- getFirstHitIndex(qh)\n            hit <- hit[hit.idx]\n            peakIdx <- queryHits(hit)\n            featureIdx <- subjectHits(hit)\n\n            index[peakIdx] <- featureIdx\n            distanceToTSS[peakIdx] <- 0\n        }\n\n    }\n\n    j <- is.na(distanceToTSS) | is.na(index)\n\n    res <- list(index=index[!j],\n                distance=distanceToTSS[!j],\n                peak=peaks[!j])\n\n    return(res)\n}\n\nisPeakFeatureOverlap <- function(peak, feature) {\n    peakRange <- ranges(peak)\n    featureRange <- ranges(feature)\n    x <- intersect(peakRange, featureRange)\n    return(length(x) != 0)\n}\n"
  },
  {
    "path": "R/plotAnno.R",
    "content": "##' plot feature distribution based on their chromosome region\n##'\n##' plot chromosome region features\n##' @title plotAnnoBar.data.frame\n##' @rdname plotAnnoBar\n##' @param anno.df annotation stats\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param title plot title\n##' @param categoryColumn category column\n##' @return bar plot that summarize genomic features of peaks\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 aes_string\n##' @importFrom ggplot2 geom_bar\n##' @importFrom ggplot2 coord_flip\n##' @importFrom ggplot2 theme_bw\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 scale_fill_manual\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 ggtitle\n##' @importFrom ggplot2 guide_legend\n##' @seealso \\code{\\link{annotatePeak}} \\code{\\link{plotAnnoPie}}\n##' @author Guangchuang Yu \\url{https://yulab-smu.top}\nplotAnnoBar.data.frame <- function(anno.df,\n                                   xlab=\"\",\n                                   ylab=\"Percentage(%)\",\n                                   title=\"Feature Distribution\",\n                                   categoryColumn) {\n\n\n    anno.df$Feature <- factor(anno.df$Feature, levels = rev(levels(anno.df$Feature)))\n\n    p <- ggplot(anno.df, aes_string(x = categoryColumn,\n                                    fill = \"Feature\",\n                                    y = \"Frequency\"))\n\n    p <- p + geom_bar(stat=\"identity\") + coord_flip() + theme_bw()\n    p <- p + ylab(ylab) + xlab(xlab) + ggtitle(title)\n\n    if (categoryColumn == 1) {\n        p <- p + scale_x_continuous(breaks=NULL)\n        p <- p+scale_fill_manual(values=rev(getCols(nrow(anno.df))), guide=guide_legend(reverse=TRUE))\n    } else {\n        p <- p+scale_fill_manual(values=rev(getCols(length(unique(anno.df$Feature)))), guide=guide_legend(reverse=TRUE))\n    }\n\n    return(p)\n}\n\n##' pieplot from peak genomic annotation\n##'\n##'\n##' @title plotAnnoPie\n##' @rdname plotAnnoPie\n##' @param x csAnno object\n##' @param ndigit number of digit to round\n##' @param cex label cex\n##' @param col color\n##' @param legend.position topright or other.\n##' @param pie3D plot in 3D or not\n##' @param radius radius of Pie\n##' @param ... extra parameter\n##' @return pie plot of peak genomic feature annotation\n##' @examples\n##' \\dontrun{\n##' require(TxDb.Hsapiens.UCSC.hg19.knownGene)\n##' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n##' peakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"chipseeker\")\n##' peakAnno <- annotatePeak(peakfile, TxDb=txdb)\n##' plotAnnoPie(peakAnno)\n##' }\n##' @seealso \\code{\\link{annotatePeak}} \\code{\\link{plotAnnoBar}}\n##' @export\n##' @author Guangchuang Yu \\url{https://yulab-smu.top}\nplotAnnoPie.csAnno <- function(x,\n                        ndigit=2,\n                        cex=0.8,\n                        col=NA,\n                        legend.position=\"rightside\",\n                        pie3D=FALSE,\n                        radius=0.8,\n                        ...){\n\n    anno.df <- getAnnoStat(x)\n    if (is.na(col[1])) {\n        col <- getCols(nrow(anno.df))\n    }\n\n    if (pie3D)\n        annoPie3D(anno.df, ndigit=ndigit, cex=cex, col=col, ...)\n\n    annoPie(anno.df, ndigit=ndigit, cex=cex, col=col, legend.position=legend.position, radius=radius, ...)\n }\n\n##' @importFrom grDevices colorRampPalette\n##' @importFrom graphics par\n##' @importFrom graphics layout\n##' @importFrom graphics pie\n##' @importFrom graphics legend\n##' @importFrom graphics plot.new\nannoPie <- function(anno.df, ndigit=2, cex=0.8, col=NA, legend.position, radius=0.8, ...) {\n    if ( ! all(c(\"Feature\", \"Frequency\") %in% colnames(anno.df))) {\n        stop(\"check your input...\")\n    }\n\n    if (legend.position == \"rightside\") {\n        labels=paste(anno.df$Feature, \" (\",\n            round(anno.df$Frequency/sum(anno.df$Frequency)*100, ndigit),\n            \"%)\", sep=\"\")\n\n        par(mai = c(0,0,0,0))\n        layout(matrix(c(1,2), ncol=2), widths=c(0.6,0.4))\n        pie(anno.df$Frequency, labels=NA, cex=cex, col=col, ...)\n        plot.new()\n        legend(\"center\", legend = labels, fill = col, bty = \"n\", cex = cex)\n    } else {\n        par(mai = c(0,0,0,0))\n        pie(anno.df$Frequency,\n            ##     ## labels=paste(round(anno.df$Frequency/sum(anno.df$Frequency)*100, 2), \"%\", sep=\"\"),\n            labels=paste(anno.df$Feature, \" (\",\n                round(anno.df$Frequency/sum(anno.df$Frequency)*100, ndigit),\n                \"%)\", sep=\"\"),\n            cex=cex,\n            col=col,\n            radius=radius,\n            ...\n            )\n    }\n}\n\n## @param ndigit ndigit\n## @param radius the radius of the pie\n## @param explode the amount to \"explode\" the pie\n## @param labelcex label font size\n## @importFrom plotrix pie3D\nannoPie3D <- function(anno.df,\n                      ndigit=2,\n                      cex=1,\n                      ...){\n\n    ## anno.df <- getGenomicAnnoStat(peakAnno)\n\n    pkg <- \"plotrix\"\n    require(pkg, character.only=TRUE)\n    pie3D <- eval(parse(text=\"pie3D\"))\n    pie3D(anno.df$Frequency,\n          labels=paste(\n              anno.df$Feature,\n              \"(\",\n              paste(round(anno.df$Frequency, ndigit), \"%\", sep=\"\"),\n              \")\",\n              sep=\"\"),\n          labelcex=cex,\n          col=col,\n          ...)\n}\n\ngetGenomicAnnoStat <- function(peakAnno) {\n    if(inherits(peakAnno,\"GRanges\"))\n        peakAnno <- as.data.frame(peakAnno)\n    anno <- peakAnno$annotation\n    ## anno <- sub(\" \\\\(.+\", \"\", anno)\n\n    e1 <- getOption(\"ChIPseeker.ignore_1st_exon\")\n    i1 <- getOption(\"ChIPseeker.ignore_1st_intron\")\n    ids <- getOption(\"ChIPseeker.ignore_downstream\")\n\n    if (is.null(e1) || !e1) {\n        e1lab <- \"1st Exon\"\n        anno[grep(\"exon 1 of\", anno)] <- e1lab\n        exonlab <- \"Other Exon\"\n    } else {\n        e1lab <- NULL\n        exonlab <- \"Exon\"\n    }\n\n    if (is.null(i1) || !i1) {\n        i1lab <- \"1st Intron\"\n        anno[grep(\"intron 1 of\", anno)] <- i1lab\n        intronlab <- \"Other Intron\"\n    } else {\n        i1lab <- NULL\n        intronlab <- \"Intron\"\n    }\n\n    anno[grep(\"Exon \\\\(\", anno)] <- exonlab\n    anno[grep(\"Intron \\\\(\", anno)] <- intronlab\n\n    if (is.null(ids) || !ids) {\n        dsd <- getOption(\"ChIPseeker.downstreamDistance\")\n        if (is.null(dsd))\n            dsd <- 3000 ## downstream 3k by default\n        if (dsd > 1000) {\n            dsd <- round(dsd/1000, 1)\n            dsd <- paste0(dsd, \"kb\")\n        }\n        dslab <- paste0(\"Downstream (<=\", dsd, \")\")\n\n        anno[grep(\"Downstream\", anno)] <- dslab\n        iglab <- \"Distal Intergenic\"\n    } else {\n        dslab <- NULL\n        iglab <- \"Intergenic\"\n        anno[grep(\"Downstream\", anno)] <- iglab\n    }\n    anno[grep(\"^Distal\", anno)] <- iglab\n\n    lvs <- c(\n        \"5' UTR\",\n        \"3' UTR\",\n        e1lab,\n        exonlab,\n        i1lab,\n        intronlab,\n        dslab,\n        iglab\n    )\n\n    promoter <- unique(anno[grep(\"Promoter\", anno)])\n    ip <- getOption(\"ChIPseeker.ignore_promoter_subcategory\")\n    if ((is.null(ip) || !ip) && (length(promoter) > 0)) {\n        plab <- sort(as.character(promoter))\n    } else {\n        plab <- \"Promoter\"\n        anno[grep(\"^Promoter\", anno)] <- plab\n    }\n    lvs <- c(plab, lvs)\n\n    ## count frequency\n    anno.table <- table(anno)\n\n    ## calculate ratio\n    anno.ratio <- anno.table/ sum(anno.table) * 100\n    anno.df <- as.data.frame(anno.ratio)\n    colnames(anno.df) <- c(\"Feature\", \"Frequency\")\n\n    anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature])\n    anno.df <- anno.df[order(anno.df$Feature),]\n    return(anno.df)\n}\n\n\n\n\n"
  },
  {
    "path": "R/plotDistToTSS.R",
    "content": "merge_two_si = function(x1, x2){\n  if (length(unique(gsub(\"^[0-9]+\",\"\",c(x1, x2)))) == 1){\n    return(paste0(gsub(\"[^0-9]*$\",\"\",x1), \"-\", x2))\n  } else {\n    return(paste0(x1, \"-\", x2))\n  }\n}\n\ngenerate_break_lbs = function(breaks) {\n  lbs = c()\n  \n  # break labels\n  break_labels = scales::label_number(scale_cut = scales::cut_si(unit = \"b\"))(breaks)\n  break_labels = gsub(\" b$\",\" bp\", break_labels)\n\n  # category labels\n  for (i in 2:length(breaks)) {\n    if (i == length(breaks)) {\n      lbs = c(lbs, paste0(\">\", break_labels[i-1]))\n    } else {\n      lbs = c(lbs, merge_two_si(break_labels[i-1], break_labels[i]))\n    }\n  }\n  \n  return(lbs)\n}\n\ngenerate_colors = function(palette = NULL, n) {\n  # old color in version <= 1.41.1\n  old_color = c(\"#9ecae1\", \"#3182bd\", \"#C7A76C\", \"#86B875\", \"#39BEB1\", \"#CD99D8\")\n  if (is.null(palette)){\n    brewer_cols = old_color\n  } else if (length(palette) == 1 && is_valid_palette(palette)){\n    brewer_cols = RColorBrewer::brewer.pal(\n      name = palette, \n      n = RColorBrewer::brewer.pal.info[palette, \"maxcolors\"]\n    ) |> rev()     \n  } else if (all(is_valid_color(palette))){\n    brewer_cols = palette\n  }\n  else {\n    warning(\"Your palette is non-valid, switching to default...\")\n    brewer_cols = old_color\n  }\n  \n  if (length(brewer_cols) >= n) {\n    cols = brewer_cols[1:length(brewer_cols)]\n  } else {\n    cols = grDevices::colorRampPalette(brewer_cols)(n)\n  }\n  \n  return(cols)\n}\n\nis_valid_palette = function(palette){\n  palette %in% rownames(RColorBrewer::brewer.pal.info)\n}\n\nis_valid_color = function(color){\n  tryCatch({\n    grDevices::col2rgb(color)\n    TRUE\n  }, error = function(e) {\n    FALSE\n  })\n}\n\n##' plot feature distribution based on the distances to the TSS\n##'\n##'\n##' @title plotDistToTSS.data.frame\n##' @param peakDist peak annotation\n##' @param distanceColumn column name of the distance from peak to nearest gene\n##' @param distanceBreaks default is 'c(0, 1000, 3000, 5000, 10000, 100000)'\n##' @param palette palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.\n##' @param xlab x label\n##' @param ylab y lable\n##' @param title figure title\n##' @param categoryColumn category column, default is \".id\"\n##' @return bar plot that summarize distance from peak to\n##' TSS of the nearest gene.\n##' @importFrom magrittr %<>%\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 aes\n##' @importFrom ggplot2 aes_string\n##' @importFrom ggplot2 geom_bar\n##' @importFrom ggplot2 geom_hline\n##' @importFrom ggplot2 theme_bw\n##' @importFrom ggplot2 coord_flip\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 ggtitle\n##' @importFrom ggplot2 geom_hline\n##' @importFrom ggplot2 scale_y_continuous\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 scale_fill_brewer\n##' @importFrom ggplot2 scale_fill_hue\n##' @importFrom ggplot2 scale_fill_manual\n##' @importFrom ggplot2 geom_text\n##' @importFrom rlang .data\n##' @examples\n##' \\dontrun{\n##' require(TxDb.Hsapiens.UCSC.hg19.knownGene)\n##' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n##' peakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"ChIPseeker\")\n##' peakAnno <- annotatePeak(peakfile, TxDb=txdb)\n##' plotDistToTSS(peakAnno)\n##' }\n##' @seealso \\code{\\link{annotatePeak}}\n##' @author Guangchuang Yu \\url{https://guangchuangyu.github.io}\nplotDistToTSS.data.frame <- function(peakDist,\n                                     distanceColumn=\"distanceToTSS\",\n                                     distanceBreaks=c(0, 1000, 3000, 5000, 10000, 100000),\n                                     palette = NULL,\n                                     xlab=\"\",\n                                     ylab=\"Binding sites (%) (5'->3')\",\n                                     title=\"Distribution of transcription factor-binding loci relative to TSS\",\n                                     categoryColumn = \".id\") {\n\n    distanceBreaks = sort(distanceBreaks)\n    hasZero = sum(distanceBreaks == 0)\n    if (!hasZero) distanceBreaks = c(0, distanceBreaks)\n    hasInf = sum(is.infinite(distanceBreaks))\n    if (!hasInf) distanceBreaks = c(distanceBreaks, Inf)\n    lbs = generate_break_lbs(distanceBreaks)\n    peakDist$Feature = cut(abs(peakDist[[distanceColumn]]), \n                           breaks = distanceBreaks,\n                           labels = lbs,\n                           include.lowest = TRUE)\n\n    ## sign containing -1 and 1 for upstream and downstream\n    peakDist$sign <- sign(peakDist[,distanceColumn])\n\n    ## count frequencies\n    if (categoryColumn == 1) {\n      peakDist = peakDist |> \n        summarise(freq = length(.data$Feature), .by = c(\"Feature\", \"sign\")) |> \n        mutate(freq = .data$freq/sum(.data$freq) * 100)\n    } else {\n      peakDist = peakDist |> \n        summarise(freq = length(.data$Feature), .by = c(categoryColumn, \"Feature\", \"sign\")) |> \n        mutate(freq = .data$freq/sum(.data$freq) * 100, .by = categoryColumn)\n    }\n\n    if (any(peakDist$sign == 0)) {\n        zeroDist <- peakDist[peakDist$sign == 0,]\n        zeroDist$freq <- zeroDist$freq/2\n        zeroDist$sign <- -1\n        peakDist[peakDist$sign == 0,] <- zeroDist\n        zeroDist$sign <- 1\n        peakDist <- rbind(peakDist, zeroDist)\n    }\n\n    if (categoryColumn == 1) {\n        peakDist %<>% group_by(.data$Feature, .data$sign) %>%\n            summarise(freq = sum(.data$freq))\n\n        totalFreq <- peakDist %>% group_by(.data$sign) %>%\n            summarise(total = sum(.data$freq))\n    } else {\n        peakDist %<>% group_by(.data$.id, .data$Feature, .data$sign) %>%\n            summarise(freq = sum(.data$freq))\n        totalFreq <- peakDist %>% group_by(.data$.id, .data$sign) %>%\n            summarise(total = sum(.data$freq))\n    }\n\n\n    ## preparing ylim and y tick labels\n    ds = max(totalFreq$total[totalFreq$sign == 1])\n    dslim = ceiling(ds/10) * 10\n    us = max(totalFreq$total[totalFreq$sign == -1])\n    uslim = ceiling(us/10) * 10\n    ybreaks <- seq(-uslim, dslim, by=10)\n    ylbs <- abs(ybreaks)\n    ylbs[ylbs == 0] <- \"TSS\"\n\n    peakDist$Feature <- factor(peakDist$Feature, levels=rev(levels(peakDist$Feature)))\n    if (categoryColumn == 1) {\n        p <- ggplot(peakDist, aes(x=1, fill=.data$Feature))\n    } else {\n        p <- ggplot(peakDist, aes(x=.data[[categoryColumn]], fill=.data$Feature))\n    }\n\n    p <- p + geom_bar(data=subset(peakDist, sign==1), aes(y=.data$freq), stat=\"identity\") +\n        geom_bar(data=subset(peakDist, sign==-1), aes(y=-.data$freq), stat=\"identity\")\n\n    p <- p + geom_hline(yintercept = 0, colour = \"black\") +\n        coord_flip() + theme_bw() +\n            scale_y_continuous(breaks=ybreaks,labels=ylbs)\n\n    p <- p + ylab(ylab) + xlab(xlab) + ggtitle(title)\n\n    if (categoryColumn == 1) {\n        p <- p + scale_x_continuous(breaks=NULL)\n    }\n\n    cols <- generate_colors(palette = palette, n = length(lbs))\n    p <- p + scale_fill_manual(values=rev(cols), guide=guide_legend(reverse=TRUE))\n\n    return(p)\n}\n"
  },
  {
    "path": "R/plotTagMatrix.R",
    "content": "##' plot the profile of peaks\n##'`\n##' \\code{plotPeakProf_MultiWindows()} is almost the same as \\code{plotPeakProf2()}, having\n##' the main difference of accepting two or more granges objects. Accepting more\n##' granges objects can help compare the same peaks in different windows.\n##' \n##' \\code{TxDb} parameter can accept txdb object.\n##' But many regions can not be obtained by txdb object. In this case,\n##' Users can provide self-made granges served the same role \n##' as txdb object and pass to \\code{TxDb} object.\n##' \n##' \\code{by} the features of interest. \n##' \n##' (1) if users use \\code{txdb}, \\code{by} can be one of 'gene', 'transcript', 'exon', \n##' 'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.\n##' \n##' (2) if users use self-made granges object, \\code{by} can be everything. Because this \\code{by}\n##' will not pass to functions to get features, which is different from the case of using \n##' txdb object. This \\code{by} is only used to made labels showed in picture.\n##' \n##' \\code{type} means the property of the region. one of the \"start site\",\n##' \"end site\" and \"body\".\n##' \n##' \\code{upstream} and \\code{downstream} parameter have different usages:\n##' \n##' (1) if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \n##' the flank of body region.\n##' \n##' (2) if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\n##' the upstream and downstream of the start_site or the end_site.\n##' \n##' \\code{weightCol} refers to column in peak file. This column acts as a weight value. Details\n##' see \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n##' \n##' \\code{nbin} refers to the number of bins. \\code{getTagMatrix()} provide a binning method\n##' to get the tag matrix.\n##' \n##' There are two ways input a list of window.\n##' \n##' (1) Users can input a list of self-made granges objects\n##' \n##' (2) Users can input a list of \\code{by} and only one \\code{type}. In this way, \n##' \\code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \\code{by} and \\code{type}.\n##' \n##' Warning: \n##' \n##' (1) All of these window should be the same type. It means users can only\n##' compare a list of \"start site\"/\"end site\"/\"body region\" with the same upstream\n##' and downstream.\n##' \n##' (2) So it will be only one \\code{type} and several \\code{by}.\n##' \n##' (3) Users can make window by txdb object or self-made granges object. Users can only\n##' choose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the\n##' way of using txdb object. User can input any \\code{by} in the way of using \n##' self-made granges object.\n##' \n##' (4) Users can mingle the \\code{by} designed for the two ways. \\code{plotPeakProf_MultiWindows} can\n##' accpet the hybrid \\code{by}. But the above rules should be followed.\n##' \n##' \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}\n##'\n##' @title plotPeakProf_MultiWindows\n##' \n##' @param tagMatrix tagMatrix or a list of tagMatrix\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object or self-made granges objects\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param by feature of interest\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param windows_name the name for each window, which will also be showed in the picture as labels\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param verbose print message or not\n##' @param nbin the amount of bines \n##' @param ignore_strand ignore the strand information or not\n##' @param ... additional parameter\n##' @return ggplot object\n##' @importFrom methods is\n##' @importFrom methods as\n##' @importFrom methods missingArg\n##' @importFrom methods new\n##' @export\nplotPeakProf <- function(tagMatrix = NULL,\n                         peak,\n                         upstream,\n                         downstream,\n                         conf,\n                         by,\n                         type,\n                         windows_name = NULL,\n                         weightCol = NULL,\n                         TxDb = NULL,\n                         xlab = \"Genomic Region (5'->3')\",\n                         ylab = \"Peak Count Frequency\",\n                         facet = \"row\",\n                         free_y = TRUE,\n                         verbose = TRUE,\n                         nbin = NULL,\n                         ignore_strand = FALSE,\n                         ...){\n  \n  if(is.null(tagMatrix)){\n    \n    conf <- if(missingArg(conf)) NA else conf\n    upstream <- if(missingArg(upstream)) NULL else upstream\n    downstream <- if(missingArg(downstream)) NULL else downstream\n    \n    if(length(by) == 1){\n      \n      plotPeakProf2(peak = peak, \n                    upstream = upstream, \n                    downstream = downstream,\n                    conf = conf,\n                    by = by,\n                    type = type,\n                    weightCol = weightCol, \n                    TxDb = TxDb,\n                    xlab = xlab,\n                    ylab = ylab,\n                    facet = facet,\n                    free_y = free_y,\n                    verbose = verbose, \n                    nbin = nbin,\n                    ignore_strand = ignore_strand,\n                    ...)\n      \n    }else{\n      \n      if(is.null(windows_name) && !is.null(names(TxDb)))\n        windows_name <- names(TxDb)\n      \n      plotPeakProf_MultiWindows(peak = peak,\n                                upstream = upstream,\n                                downstream = downstream,\n                                conf = conf,\n                                by = by,\n                                type = type,\n                                windows_name = windows_name,\n                                weightCol = weightCol,\n                                TxDb = TxDb,\n                                xlab = xlab,\n                                ylab = ylab,\n                                facet = facet,\n                                free_y = free_y,\n                                verbose = verbose,\n                                nbin = nbin,\n                                ignore_strand = ignore_strand,\n                                ...)\n      \n    }\n    \n  }else{\n    \n    if(is(tagMatrix, \"list\")){\n      upstream <- attr(tagMatrix[[1]], 'upstream')\n      downstream <- attr(tagMatrix[[1]], 'downstream')\n      label <- attr(tagMatrix[[1]], 'label')\n      attr(tagMatrix, 'type') <- attr(tagMatrix[[1]], 'type')\n      attr(tagMatrix, 'is.binning') <- attr(tagMatrix[[1]], 'is.binning')\n      \n    }else{\n      upstream <- attr(tagMatrix, 'upstream')\n      downstream <- attr(tagMatrix, 'downstream')\n      label <- attr(tagMatrix, 'label')\n    }\n    \n    \n    if(attr(tagMatrix, 'is.binning')){\n      \n      if (!(missingArg(conf) || is.na(conf))){\n        \n        plotAvgProf.binning(tagMatrix = tagMatrix, \n                            xlab = xlab,\n                            ylab = ylab,\n                            conf = conf,\n                            facet = facet, \n                            free_y = free_y,\n                            upstream = upstream,\n                            downstream = downstream,\n                            label = label,\n                            ...)\n        \n      }else{\n        \n        plotAvgProf.binning(tagMatrix = tagMatrix, \n                            xlab = xlab,\n                            ylab = ylab,\n                            facet = facet, \n                            free_y = free_y,\n                            upstream = upstream,\n                            downstream = downstream,\n                            label = label,\n                            ...)\n        \n      }\n      \n      \n    }else{\n      \n      xlim <- c(-upstream, downstream)\n      \n      if (!(missingArg(conf) || is.na(conf))){\n        \n        plotAvgProf (tagMatrix = tagMatrix, \n                     xlim = xlim,\n                     xlab = xlab,\n                     ylab = ylab,\n                     conf = conf,\n                     facet = facet, \n                     free_y = free_y,\n                     origin_label = label,\n                     ...)\n        \n      }else{\n        \n        plotAvgProf (tagMatrix = tagMatrix, \n                     xlim = xlim,\n                     xlab = xlab,\n                     ylab = ylab,\n                     facet = facet, \n                     free_y = free_y,\n                     origin_label = label,\n                     ...)\n        \n      }\n      \n      \n    }\n  }\n  \n}\n\n\n##' plot the profile of peaks\n##'\n##'\n##' @title plotAvgProf\n##' @param tagMatrix tagMatrix or a list of tagMatrix\n##' @param xlim xlim\n##' @param xlab x label\n##' @param ylab y label\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param origin_label label of the center\n##' @param verbose print message or not\n##' @param ... additional parameter\n##' @return ggplot object\n##' @author G Yu; Y Yan\n##' @export\nplotAvgProf <- function(tagMatrix, xlim,\n                        xlab=\"Genomic Region (5'->3')\",\n                        ylab = \"Peak Count Frequency\",\n                        conf,\n                        facet=\"none\", \n                        free_y = TRUE, \n                        origin_label = \"TSS\",\n                        verbose = TRUE,\n                        ...) {\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n\n  if (verbose) {\n      cat(\">> plotting figure...\\t\\t\\t\",\n          format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  \n  conf <- if(missingArg(conf)) NA else conf\n  \n  if (!(missingArg(conf) || is.na(conf))){\n    p <- plotAvgProf.internal(tagMatrix = tagMatrix, \n                              conf = conf, \n                              xlim = xlim,\n                              xlab = xlab, \n                              ylab = ylab,\n                              facet = facet, \n                              free_y = free_y, \n                              origin_label = origin_label,\n                              ...)\n  } else {\n    p <- plotAvgProf.internal(tagMatrix, \n                              xlim = xlim,\n                              xlab = xlab, \n                              ylab = ylab,\n                              facet = facet, \n                              free_y = free_y, \n                              origin_label = origin_label,\n                              ...)\n  }\n  return(p)\n}\n\n\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 geom_line\n##' @importFrom ggplot2 geom_vline\n##' @importFrom ggplot2 geom_ribbon\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 scale_color_manual\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 theme_bw\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 facet_grid\nplotAvgProf.internal <- function(tagMatrix, conf,\n                                 xlim = c(-3000,3000),\n                                 xlab = \"Genomic Region (5'->3')\",\n                                 ylab = \"Peak Count Frequency\",\n                                 facet=\"none\", \n                                 free_y = TRUE,\n                                 origin_label, \n                                 ...) {\n  \n  listFlag <- FALSE\n  if (is(tagMatrix, \"list\")) {\n    if ( is.null(names(tagMatrix)) ) {\n      nn <- paste0(\"peak\", seq_along(tagMatrix))\n      warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse=' '))\n      names(tagMatrix) <- nn\n      ## stop(\"tagMatrix should be a named list...\")\n    }\n    listFlag <- TRUE\n  }\n  \n  if ( listFlag ) {\n    facet <- match.arg(facet, c(\"none\", \"row\", \"column\"))\n    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix[[1]]) ) {\n      stop(\"please specify appropreate xcoordinations...\")\n    }\n  } else {\n    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix) ) {\n      stop(\"please specify appropreate xcoordinations...\")\n    }\n  }\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  ##\n  conf <- if(missingArg(conf)) NA else conf\n  \n  pos <- value <- .id <- Lower <- Upper <- NULL\n  \n  if ( listFlag ) {\n    tagCount <- lapply(tagMatrix, function(x) getTagCount(x, xlim = xlim, conf = conf, ...))\n    tagCount <- list_to_dataframe(tagCount)\n    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix))\n    p <- ggplot(tagCount, aes(pos, group=.id, color=.id))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = .id),\n                           linetype = 0, alpha = 0.2)\n    }\n  } else {\n    tagCount <- getTagCount(tagMatrix, xlim = xlim, conf = conf, ...)\n    p <- ggplot(tagCount, aes(pos))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper),\n                           linetype = 0, alpha = 0.2)\n    }\n  }\n  \n  p <- p + geom_line(aes(y = value))\n  \n  if ( 0 > xlim[1] && 0 < xlim[2] ) {\n    p <- p + geom_vline(xintercept=0,\n                        linetype=\"longdash\")\n    p <- p + scale_x_continuous(breaks=c(xlim[1], floor(xlim[1]/2),\n                                         0,\n                                         floor(xlim[2]/2), xlim[2]),\n                                labels=c(paste0(xlim[1],\"bp\"), paste0(floor(xlim[1]/2),\"bp\"),\n                                         origin_label, \n                                         paste0(floor(xlim[2]/2),\"bp\"), paste0(xlim[2], \"bp\")))\n  }\n  \n  if (listFlag) {\n    cols <- getCols(length(tagMatrix))\n    p <- p + scale_color_manual(values=cols)\n    if (facet == \"row\") {\n      if (free_y) {\n        p <- p + facet_grid(.id ~ ., scales = \"free_y\")\n      } else {\n        p <- p + facet_grid(.id ~ .)\n      }\n    } else if (facet == \"column\") {\n      if (free_y) {\n        p <-  p + facet_grid(. ~ .id, scales = \"free_y\")\n      } else {\n        p <-  p + facet_grid(. ~ .id)\n      }\n    }\n  }\n  p <- p+xlab(xlab)+ylab(ylab)\n  p <- p + theme_bw() + theme(legend.title=element_blank())\n  if(facet != \"none\") {\n    p <- p + theme(legend.position=\"none\")\n  }\n  return(p)\n}\n\n##' plot the profile of peaks that align to flank sequences of TSS\n##'\n##' This function is the old function of \\code{plotPeakProf2}. It can\n##' only plot the start site region of gene.\n##'\n##' @title plotAvgProf\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param verbose print message or not\n##' @param ignore_strand ignore the strand information or not\n##' @param ... additional parameter\n##' @return ggplot object\n##' @export\n##' @author G Yu, Ming L\nplotAvgProf2 <- function(peak, weightCol = NULL, TxDb = NULL,\n                         upstream = 1000, downstream = 1000,\n                         xlab = \"Genomic Region (5'->3')\",\n                         ylab = \"Peak Count Frequency\",\n                         conf,\n                         facet = \"none\",\n                         free_y = TRUE,\n                         verbose = TRUE, \n                         ignore_strand = FALSE,\n                         ...) {\n  \n  plotPeakProf2(peak = peak, \n                upstream = upstream, \n                downstream = downstream,\n                conf,\n                by = \"gene\",\n                type = \"start_site\",\n                weightCol = weightCol, \n                TxDb = TxDb,\n                xlab = xlab,\n                ylab = ylab,\n                facet = facet,\n                free_y = free_y,\n                verbose = verbose, \n                ignore_strand = ignore_strand,\n                ...)\n  \n}\n\n##' plot the profile of peaks  by binning\n##' \n##' \n##' @title plotAvgProf.binning\n##' @param tagMatrix tagMatrix or a list of tagMatrix\n##' @param xlab x label\n##' @param ylab y label \n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled \n##' @param upstream rel object reflects the percentage of flank extension, e.g rel(0.2)\n##'                 integer reflects the actual length of flank extension or TSS region\n##'                 NULL reflects the gene body with no extension\n##' @param downstream rel object reflects the percentage of flank extension, e.g rel(0.2)\n##'                   integer reflects the actual length of flank extension or TSS region\n##'                   NULL reflects the gene body with no extension\n##' @param label label\n##' @param ... additional parameter\n##' @return ggplot object\n##' @importFrom ggplot2 rel\nplotAvgProf.binning <- function(tagMatrix, \n                                xlab = \"Genomic Region (5'->3')\",\n                                ylab = \"Peak Count Frequency\",\n                                conf,\n                                facet =\"none\", \n                                free_y = TRUE,\n                                upstream = NULL,\n                                downstream = NULL,\n                                label,\n                                ...) {\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  conf <- if(missingArg(conf)) NA else conf\n  \n  if (!(missingArg(conf) || is.na(conf))){\n    p <- plotAvgProf.binning.internal(tagMatrix , \n                                      conf = conf, \n                                      xlab = xlab, \n                                      ylab = ylab,\n                                      facet = facet, \n                                      free_y = free_y,\n                                      upstream = upstream,\n                                      downstream = downstream,\n                                      label = label,\n                                      ...)\n  } else {\n    p <- plotAvgProf.binning.internal(tagMatrix , \n                                      xlab = xlab, \n                                      ylab = ylab,\n                                      facet = facet, \n                                      free_y = free_y, \n                                      upstream = upstream,\n                                      downstream = downstream,\n                                      label = label,\n                                      ...)\n  }\n  return(p)\n}\n\n\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 geom_line\n##' @importFrom ggplot2 geom_vline\n##' @importFrom ggplot2 geom_ribbon\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 scale_color_manual\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 theme_bw\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 facet_grid\n##' @importFrom ggplot2 rel\nplotAvgProf.binning.internal <- function(tagMatrix, \n                                         conf,\n                                         xlab = \"Genomic Region (5'->3')\",\n                                         ylab = \"Peak Count Frequency\",\n                                         facet=\"none\", \n                                         free_y = TRUE,\n                                         upstream = NULL,\n                                         downstream = NULL,\n                                         label,\n                                         ...) {\n  \n  listFlag <- FALSE\n  if (is(tagMatrix, \"list\")) {\n    if ( is.null(names(tagMatrix )) ) {\n      nn <- paste0(\"peak\", seq_along(tagMatrix ))\n      warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse=' '))\n      names(tagMatrix) <- nn\n      ## stop(\"tagMatrix should be a named list...\")\n    }\n    listFlag <- TRUE\n  }\n  \n  if(listFlag){\n    nbin <- dim(tagMatrix[[1]])[2]\n  }else{\n    nbin <- dim(tagMatrix)[2]\n  }\n  xlim <- c(1,nbin)\n  \n  if ( listFlag ) {\n    facet <- match.arg(facet, c(\"none\", \"row\", \"column\"))\n  }\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  ##\n  conf <- if(missingArg(conf)) NA else conf\n  \n  pos <- value <- .id <- Lower <- Upper <- NULL\n  \n  if ( listFlag ) {\n    tagCount <- lapply(tagMatrix , function(x) getTagCount(x, xlim = xlim, conf = conf, ...))\n    tagCount <- list_to_dataframe(tagCount)\n    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix ))\n    p <- ggplot(tagCount, aes(pos, group=.id, color=.id))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = .id),\n                           linetype = 0, alpha = 0.2)\n    }\n  } else {\n    tagCount <- getTagCount(tagMatrix , xlim = xlim, conf = conf, ...)\n    p <- ggplot(tagCount, aes(pos))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper),\n                           linetype = 0, alpha = 0.2)\n    }\n  }\n  \n  p <- p + geom_line(aes(y = value))\n  \n  ## x_scale for genebody\n  if(attr(tagMatrix, 'type') == 'body'){\n    ## x_scale for gene body with no flank extension\n    if(is.null(upstream)){\n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*0.25),\n                                           floor(nbin*0.5),\n                                           floor(nbin*0.75),\n                                           nbin),\n                                  labels=c(label[1], \n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2]))\n    }\n    \n    \n    ## x_scale for flank extension by relative value\n    if(inherits(upstream, 'rel')){\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",as.numeric(upstream)*100,\"%\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(\"+\",as.numeric(downstream)*100,\"%\")))\n      p <- p + geom_vline(xintercept=floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                          linetype=\"longdash\")\n      \n      p <- p + geom_vline(xintercept=floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                          linetype=\"longdash\")\n    }\n    \n    ## x_scale for flank extension by absolute value\n    if(!is.null(upstream) & !inherits(upstream, 'rel')){\n      \n      upstreamPer <- floor(upstream/1000)*0.1\n      downstreamPer <- floor(downstream/1000)*0.1\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",upstream,\"bp\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(downstream,\"bp\")))\n      p <- p + geom_vline(xintercept=floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),\n                          linetype=\"longdash\")\n      \n      p <- p + geom_vline(xintercept=floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),\n                          linetype=\"longdash\")\n    }\n  }\n  \n  \n  ## x_scale for start region\n  if(attr(tagMatrix, 'type') != 'body'){\n    \n    p <- p + scale_x_continuous(breaks=c(1, \n                                         floor(nbin*0.25),\n                                         floor(nbin*0.5),\n                                         floor(nbin*0.75),\n                                         nbin),\n                                labels=c(paste0(\"-\",upstream,\"bp\"), \n                                         paste0(\"-\",floor(upstream*0.5),\"bp\"),\n                                         label,\n                                         paste0(floor(downstream*0.5),\"bp\"),\n                                         paste0(downstream,\"bp\")))\n    \n    p <- p + geom_vline(xintercept=floor(nbin*0.5),\n                        linetype=\"longdash\")\n  }\n  \n  \n  if (listFlag) {\n    cols <- getCols(length(tagMatrix))\n    p <- p + scale_color_manual(values=cols)\n    if (facet == \"row\") {\n      if (free_y) {\n        p <- p + facet_grid(.id ~ ., scales = \"free_y\")\n      } else {\n        p <- p + facet_grid(.id ~ .)\n      }\n    } else if (facet == \"column\") {\n      if (free_y) {\n        p <-  p + facet_grid(. ~ .id, scales = \"free_y\")\n      } else {\n        p <-  p + facet_grid(. ~ .id)\n      }\n    }\n  }\n  p <- p+xlab(xlab)+ylab(ylab)\n  p <- p + theme_bw() + theme(legend.title=element_blank())\n  if(facet != \"none\") {\n    p <- p + theme(legend.position=\"none\")\n  }\n  return(p)\n}\n\n\n##' plot the profile of peaks automatically\n##'\n##' \\code{peak} stands for the peak file. \n##' \n##' \\code{by} the features of interest. \n##' \n##' (1) if users use \\code{txdb}, \\code{by} can be one of 'gene', 'transcript', 'exon', \n##' 'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.\n##' \n##' (2) if users use self-made granges object, \\code{by} can be everything. Because this \\code{by}\n##' will not pass to functions to get features, which is different from the case of using \n##' txdb object. This \\code{by} is only used to made labels showed in picture.\n##' \n##' \\code{type} means the property of the region. one of the \"start site\",\n##' \"end site\" and \"body\".\n##' \n##' \\code{upstream} and \\code{downstream} parameter have different usages:\n##' \n##' (1) if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \n##' the flank of body region.\n##' \n##' (2) if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\n##' the upstream and downstream of the start_site or the end_site.\n##' \n##' \\code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details\n##' see \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n##' \n##' \\code{nbin} refers to the number of bins, providing a binning method\n##' to get the tag matrix.\n##' \n##' \\code{TxDb} parameter can accept txdb object.\n##' But many regions can not be obtained by txdb object. In this case,\n##' Users can provide self-made granges served the same role \n##' as txdb object and pass to \\code{TxDb} object.\n##' \n##' \\code{plotPeakProf2()} is different from the \\code{plotPeakProf()}. \\code{plotPeakProf2()} do not\n##' need to provide \\code{window} parameter, which means \\code{plotPeakProf2()} will call relevent\n##' functions to make \\code{window} automatically.\n##'\n##' @title plotPeakProf2\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object, or self-made granges object\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param by e.g. 'gene', 'transcript', 'exon' or features of interest(e.g. \"enhancer\")\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param verbose print message or not\n##' @param nbin the amount of nbines \n##' @param ignore_strand ignore the strand information or not\n##' @param ... additional parameter\n##' @return ggplot object\n##' @export\n##' @author G Yu, Ming Li\nplotPeakProf2 <- function(peak, \n                          upstream, \n                          downstream,\n                          conf,\n                          by,\n                          type,\n                          weightCol = NULL, \n                          TxDb = NULL,\n                          xlab = \"Genomic Region (5'->3')\",\n                          ylab = \"Peak Count Frequency\",\n                          facet = \"none\",\n                          free_y = TRUE,\n                          verbose = TRUE, \n                          nbin = NULL,\n                          ignore_strand = FALSE,\n                          ...){\n  \n  conf <- if(missingArg(conf)) NA else conf\n  upstream <- if(missingArg(upstream)) NULL else upstream\n  downstream <- if(missingArg(downstream)) NULL else downstream\n  \n  if ( is(peak, \"list\") ) {\n    tagMatrix <- lapply(peak, getTagMatrix, \n                        upstream = upstream,\n                        downstream = downstream, \n                        type = type,\n                        TxDb = TxDb,\n                        by = by,\n                        weightCol = weightCol, \n                        nbin = nbin,\n                        verbose = verbose,\n                        ignore_strand = ignore_strand)\n  } else {\n    tagMatrix <- getTagMatrix(peak = peak, \n                              upstream = upstream,\n                              downstream = downstream, \n                              type = type,\n                              by = by,\n                              TxDb = TxDb,\n                              weightCol = weightCol, \n                              nbin = nbin,\n                              verbose = verbose,\n                              ignore_strand = ignore_strand)\n  }\n  \n  \n  if (!(missingArg(conf) || is.na(conf))){\n    p <- plotPeakProf(tagMatrix = tagMatrix,\n                      conf = conf,\n                      xlab = xlab,\n                      ylab = ylab,\n                      facet = facet, \n                      free_y = free_y,\n                      ...)\n    \n  } else {\n    p <- plotPeakProf(tagMatrix = tagMatrix,\n                      xlab = xlab,\n                      ylab = ylab,\n                      facet= facet, \n                      free_y = free_y,\n                      ...)\n  }\n  return(p)\n  \n}\n\n\n##' plot the profile of peaks in two or more windows\n##'\n##'\n##' This function comes from \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}\n##'`\n##' \\code{plotPeakProf_MultiWindows()} is almost the same as \\code{plotPeakProf2()}, having\n##' the main difference of accepting two or more granges objects. Accepting more\n##' granges objects can help compare the same peaks in different windows.\n##' \n##' \\code{TxDb} parameter can accept txdb object.\n##' But many regions can not be obtained by txdb object. In this case,\n##' Users can provide self-made granges served the same role \n##' as txdb object and pass to \\code{TxDb} object.\n##' \n##' \\code{by} the features of interest. \n##' \n##' (1) if users use \\code{txdb}, \\code{by} can be one of 'gene', 'transcript', 'exon', \n##' 'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.\n##' \n##' (2) if users use self-made granges object, \\code{by} can be everything. Because this \\code{by}\n##' will not pass to functions to get features, which is different from the case of using \n##' txdb object. This \\code{by} is only used to made labels showed in picture.\n##' \n##' \\code{type} means the property of the region. one of the \"start site\",\n##' \"end site\" and \"body\".\n##' \n##' \\code{upstream} and \\code{downstream} parameter have different usages:\n##' \n##' (1) if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \n##' the flank of body region.\n##' \n##' (2) if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\n##' the upstream and downstream of the start_site or the end_site.\n##' \n##' \\code{weightCol} refers to column in peak file. This column acts as a weight value. Details\n##' see \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n##' \n##' \\code{nbin} refers to the number of bins. \\code{getTagMatrix()} provide a binning method\n##' to get the tag matrix.\n##' \n##' There are two ways input a list of window.\n##' \n##' (1) Users can input a list of self-made granges objects\n##' \n##' (2) Users can input a list of \\code{by} and only one \\code{type}. In this way, \n##' \\code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \\code{by} and \\code{type}.\n##' \n##' Warning: \n##' \n##' (1) All of these window should be the same type. It means users can only\n##' compare a list of \"start site\"/\"end site\"/\"body region\" with the same upstream\n##' and downstream.\n##' \n##' (2) So it will be only one \\code{type} and several \\code{by}.\n##' \n##' (3) Users can make window by txdb object or self-made granges object. Users can only\n##' choose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the\n##' way of using txdb object. User can input any \\code{by} in the way of using \n##' self-made granges object.\n##' \n##' (4) Users can mingle the \\code{by} designed for the two ways. \\code{plotPeakProf_MultiWindows} can\n##' accpet the hybrid \\code{by}. But the above rules should be followed.\n##' \n##'\n##' @title plotPeakProf_MultiWindows\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object or self-made granges objects\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param by feature of interest\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param windows_name the name for each window, which will also be showed in the picture as labels\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param verbose print message or not\n##' @param nbin the amount of bines \n##' @param ignore_strand ignore the strand information or not\n##' @param ... additional parameter\n##' @return ggplot object\nplotPeakProf_MultiWindows <- function(peak,\n                                      upstream,\n                                      downstream,\n                                      conf,\n                                      by,\n                                      type,\n                                      windows_name = NULL,\n                                      weightCol = NULL,\n                                      TxDb = NULL,\n                                      xlab = \"Genomic Region (5'->3')\",\n                                      ylab = \"Peak Count Frequency\",\n                                      facet = \"row\",\n                                      free_y = TRUE,\n                                      verbose = TRUE,\n                                      nbin = NULL,\n                                      ignore_strand = FALSE,\n                                      ...){\n  \n  conf <- if(missingArg(conf)) NA else conf\n  upstream <- if(missingArg(upstream)) NULL else upstream\n  downstream <- if(missingArg(downstream)) NULL else downstream\n  \n  ## check type\n  if(length(type) != 1){\n    stop(\"It should be only one type...\")\n  }\n  \n  ## make the window name\n  if (is.null(windows_name)) {\n    nn <- by\n    warning(\"set the name automatically to \", paste(nn, collapse=' '))\n    windows_name <- nn\n  }else{\n    if (length(windows_name) != length(by)) {\n      stop(\"the length of the window name and the by should be equal...\")\n    }\n  }\n  \n  \n  if ( is(peak, \"list\") ) {\n    tagMatrix <- lapply(peak, getTagMatrix2,\n                        upstream=upstream,\n                        downstream=downstream,\n                        windows_name=windows_name,\n                        type=type,\n                        by=by,\n                        TxDb=TxDb,\n                        weightCol = weightCol, \n                        nbin = nbin,\n                        verbose = verbose,\n                        ignore_strand= ignore_strand)\n  } else {\n    tagMatrix <- getTagMatrix2(peak=peak, \n                               upstream=upstream,\n                               downstream=downstream,\n                               windows_name=windows_name,\n                               type=type,\n                               by=by,\n                               TxDb=TxDb,\n                               weightCol = weightCol, \n                               nbin = nbin,\n                               verbose = verbose,\n                               ignore_strand= ignore_strand)\n  }\n  \n  if (!(missingArg(conf) || is.na(conf))){\n    p <- plotMultiProf(tagMatrix = tagMatrix,\n                       conf = conf,\n                       xlab = xlab,\n                       ylab = ylab,\n                       facet = facet, \n                       free_y = free_y,\n                       ...)\n    \n  } else {\n    p <- plotMultiProf(tagMatrix = tagMatrix,\n                       xlab = xlab,\n                       ylab = ylab,\n                       facet= facet, \n                       free_y = free_y,\n                       ...)\n  }\n  return(p)\n  \n}\n\n\n##' internal function for plotPeakProf_MultiWindows\n##' \n##' @param tagMatrix tagMatrix\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param ... additional parameter\nplotMultiProf <- function(tagMatrix,\n                          conf,\n                          xlab=\"Genomic Region (5'->3')\",\n                          ylab = \"Peak Count Frequency\",\n                          facet=\"none\", \n                          free_y = TRUE,\n                          ...){\n  \n  \n  if(is(tagMatrix[[1]][[1]],\"matrix\")){\n    upstream <- attr(tagMatrix[[1]][[1]], 'upstream')\n    downstream <- attr(tagMatrix[[1]][[1]], 'downstream')\n    # attr(tagMatrix, 'type') <- attr(tagMatrix[[1]][[1]], 'type')\n    # attr(tagMatrix, 'is.binning') <- attr(tagMatrix[[1]][[1]], 'is.binning')\n    binFlag <- attr(tagMatrix[[1]][[1]], 'is.binning')\n    type <- attr(tagMatrix[[1]][[1]], 'type')\n    \n  }else{\n    upstream <- attr(tagMatrix[[1]], 'upstream')\n    downstream <- attr(tagMatrix[[1]], 'downstream')\n    binFlag <- attr(tagMatrix[[1]], 'is.binning')\n    type <- attr(tagMatrix[[1]], 'type')\n  }\n  \n  if(type == \"body\"){\n    \n    label <- c(\"SS\",\"TS\")\n    \n  }else if(type == \"start_site\"){\n    \n    label <- \"SS\"\n    \n  }else{\n    \n    label <- \"TS\"\n    \n  }\n  \n  \n  if(binFlag){\n    \n    if (!(missingArg(conf) || is.na(conf))){\n      \n      plotMultiProf.binning(tagMatrix = tagMatrix, \n                            xlab = xlab,\n                            ylab = ylab,\n                            conf = conf,\n                            facet = facet, \n                            free_y = free_y,\n                            upstream = upstream,\n                            downstream = downstream,\n                            label = label,\n                            ...)\n      \n    }else{\n      \n      plotMultiProf.binning(tagMatrix = tagMatrix, \n                            xlab = xlab,\n                            ylab = ylab,\n                            facet = facet, \n                            free_y = free_y,\n                            upstream = upstream,\n                            downstream = downstream,\n                            label = label,\n                            ...)\n    }\n    \n    \n  }else{\n    \n    xlim <- c(-upstream, downstream)\n    \n    if (!(missingArg(conf) || is.na(conf))){\n      \n      plotMultiProf.normal(tagMatrix = tagMatrix, \n                           xlim = xlim,\n                           xlab = xlab,\n                           ylab = ylab,\n                           conf = conf,\n                           facet = facet, \n                           free_y = free_y,\n                           origin_label = label,\n                           ...)\n      \n    }else{\n      \n      plotMultiProf.normal(tagMatrix = tagMatrix, \n                           xlim = xlim,\n                           xlab = xlab,\n                           ylab = ylab,\n                           facet = facet, \n                           free_y = free_y,\n                           origin_label = label,\n                           ...)\n    }\n  }\n  \n}\n\n##' internal function\n##' \n##' @param tagMatrix tagMatrix\n##' @param xlim xlim\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param origin_label the label of the center\n##' @param verbose print message or not\n##' @param ... additional parameter\nplotMultiProf.normal <- function(tagMatrix, xlim,\n                                 xlab=\"Genomic Region (5'->3')\",\n                                 ylab = \"Peak Count Frequency\",\n                                 conf,\n                                 facet=\"none\", \n                                 free_y = TRUE, \n                                 origin_label = \"TSS\",\n                                 verbose = TRUE,\n                                 ...) {\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  \n  if (verbose) {\n    cat(\">> plotting figure...\\t\\t\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  \n  conf <- if(missingArg(conf)) NA else conf\n  \n  if (!(missingArg(conf) || is.na(conf))){\n    \n    p <- plotMultiProf.normal.internal(tagMatrix = tagMatrix, \n                                       conf = conf, \n                                       xlim = xlim,\n                                       xlab = xlab, \n                                       ylab = ylab,\n                                       facet = facet, \n                                       free_y = free_y, \n                                       origin_label = origin_label,\n                                       ...)\n    \n    \n  } else {\n    \n    p <- plotMultiProf.normal.internal(tagMatrix, \n                                       xlim = xlim,\n                                       xlab = xlab, \n                                       ylab = ylab,\n                                       facet = facet, \n                                       free_y = free_y, \n                                       origin_label = origin_label,\n                                       ...)\n  }\n  return(p)\n}\n\n##' internal function\n##' \n##' \n##' @param tagMatrix tagMatrix\n##' @param xlim xlim\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param origin_label the label of the center\n##' @param ... additional parameter\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 geom_line\n##' @importFrom ggplot2 geom_vline\n##' @importFrom ggplot2 geom_ribbon\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 scale_color_manual\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 theme_bw\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 facet_grid\nplotMultiProf.normal.internal <- function(tagMatrix, conf,\n                                          xlim = c(-3000,3000),\n                                          xlab = \"Genomic Region (5'->3')\",\n                                          ylab = \"Peak Count Frequency\",\n                                          facet=\"row\", \n                                          free_y = TRUE,\n                                          origin_label, \n                                          ...) {\n  \n  listFlag <- FALSE\n  if (is.null(attr(tagMatrix[[1]],'upstream'))) {\n    if ( is.null(names(tagMatrix)) ) {\n      nn <- paste0(\"peak\", seq_along(tagMatrix))\n      warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse=' '))\n      names(tagMatrix) <- nn\n      ## stop(\"tagMatrix should be a named list...\")\n    }\n    listFlag <- TRUE\n  }\n  \n  if ( listFlag ) {\n    facet <- match.arg(facet, c(\"none\", \"row\", \"column\"))\n    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix[[1]][[1]]) ) {\n      stop(\"please specify appropreate xcoordinations...\")\n    }\n  } else {\n    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix[[1]]) ) {\n      stop(\"please specify appropreate xcoordinations...\")\n    }\n  }\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  ##\n  conf <- if(missingArg(conf)) NA else conf\n  \n  pos <- value <- .id <- Lower <- Upper <- NULL\n  \n  if ( listFlag ) {\n    \n    tagCount <- lapply(as.list(names(tagMatrix)), function(x){\n      \n      tmp <- tagMatrix[[x]]\n      tagCount_tmp <- lapply(as.list(names(tmp)),function(x){\n        result <- getTagCount(tmp[[x]], xlim = xlim, conf = conf, ...)\n        result$type <- x\n        \n        return(result)\n      })\n      tagCount_tmp <- list_to_dataframe(tagCount_tmp)\n      return(tagCount_tmp)\n      \n    })\n    \n    names(tagCount) <- names(tagMatrix)\n    tagCount <- list_to_dataframe(tagCount)\n    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix))\n    p <- ggplot(tagCount, aes(pos, group=type, color=type))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = type),\n                           linetype = 0, alpha = 0.2)\n    }\n    \n  } else {\n    \n    tagCount <- lapply(as.list(names(tagMatrix)), function(x){\n      \n      result <- getTagCount(tagMatrix[[x]], xlim = xlim, conf = conf, ...)\n      result$type <- x\n      \n      return(result)\n    })\n    \n    tagCount <- do.call(\"rbind\",tagCount)\n    \n    p <- ggplot(tagCount, aes(x = pos))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper,fill = type),\n                           linetype = 0, alpha = 0.2)\n    }\n  }\n  \n  p <- p + geom_line(aes(y = value,color = type))\n  \n  if ( 0 > xlim[1] && 0 < xlim[2] ) {\n    p <- p + geom_vline(xintercept=0,\n                        linetype=\"longdash\")\n    p <- p + scale_x_continuous(breaks=c(xlim[1], floor(xlim[1]/2),\n                                         0,\n                                         floor(xlim[2]/2), xlim[2]),\n                                labels=c(paste0(xlim[1],\"bp\"), paste0(floor(xlim[1]/2),\"bp\"),\n                                         origin_label, \n                                         paste0(floor(xlim[2]/2),\"bp\"), paste0(xlim[2], \"bp\")))\n  }\n  \n  if (listFlag) {\n    # cols <- getCols(length(tagMatrix[[1]]))\n    # p <- p + scale_color_manual(values=cols)\n    if (facet == \"row\") {\n      if (free_y) {\n        p <- p + facet_grid(.id ~ ., scales = \"free_y\")\n      } else {\n        p <- p + facet_grid(.id ~ .)\n      }\n    } else if (facet == \"column\") {\n      if (free_y) {\n        p <-  p + facet_grid(. ~ .id, scales = \"free_y\")\n      } else {\n        p <-  p + facet_grid(. ~ .id)\n      }\n    }\n  }\n  \n  p <- p+xlab(xlab)+ylab(ylab)\n  p <- p + theme_bw() + theme(legend.title=element_blank())\n\n  # if(facet != \"none\") {\n  #   p <- p + theme(legend.position=\"none\")\n  # }\n  \n  return(p)\n}\n\n##' internal function\n##' \n##' @param tagMatrix tagMatrix\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param upstream the upstream extension\n##' @param downstream the downstream extension\n##' @param label the label of the center\n##' @param ... additional parameter\nplotMultiProf.binning <- function(tagMatrix, \n                                  xlab = \"Genomic Region (5'->3')\",\n                                  ylab = \"Peak Count Frequency\",\n                                  conf,\n                                  facet =\"none\", \n                                  free_y = TRUE,\n                                  upstream = NULL,\n                                  downstream = NULL,\n                                  label,\n                                  ...) {\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  conf <- if(missingArg(conf)) NA else conf\n  \n  if (!(missingArg(conf) || is.na(conf))){\n    p <- plotMultiProf.binning.internal(tagMatrix , \n                                        conf = conf, \n                                        xlab = xlab, \n                                        ylab = ylab,\n                                        facet = facet, \n                                        free_y = free_y,\n                                        upstream = upstream,\n                                        downstream = downstream,\n                                        label = label,\n                                        ...)\n  } else {\n    p <- plotMultiProf.binning.internal(tagMatrix , \n                                        xlab = xlab, \n                                        ylab = ylab,\n                                        facet = facet, \n                                        free_y = free_y, \n                                        upstream = upstream,\n                                        downstream = downstream,\n                                        label = label,\n                                        ...)\n  }\n  return(p)\n}\n\n##' internal function\n##' \n##' @param tagMatrix tagMatrix\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param upstream the upstream extension\n##' @param downstream the downstream extension\n##' @param label the label of the center\n##' @param ... additional parameter\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 geom_line\n##' @importFrom ggplot2 geom_vline\n##' @importFrom ggplot2 geom_ribbon\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 scale_color_manual\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 theme_bw\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 facet_grid\n##' @importFrom ggplot2 rel\nplotMultiProf.binning.internal <- function(tagMatrix, \n                                           conf,\n                                           xlab = \"Genomic Region (5'->3')\",\n                                           ylab = \"Peak Count Frequency\",\n                                           facet=\"none\", \n                                           free_y = TRUE,\n                                           upstream = NULL,\n                                           downstream = NULL,\n                                           label,\n                                           ...) {\n  \n  listFlag <- FALSE\n  if (is(tagMatrix[[1]][[1]],\"matrix\")) {\n    if ( is.null(names(tagMatrix)) ) {\n      nn <- paste0(\"peak\", seq_along(tagMatrix))\n      warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse=' '))\n      names(tagMatrix) <- nn\n      ## stop(\"tagMatrix should be a named list...\")\n    }\n    listFlag <- TRUE\n  }\n  \n  if(listFlag){\n    nbin <- dim(tagMatrix[[1]][[1]])[2]\n    type <- attr(tagMatrix[[1]][[1]], 'type')\n  }else{\n    nbin <- dim(tagMatrix[[1]])[2]\n    type <- attr(tagMatrix[[1]], 'type')\n  }\n  xlim <- c(1,nbin)\n  \n  if ( listFlag ) {\n    facet <- match.arg(facet, c(\"none\", \"row\", \"column\"))\n  }\n  \n  ## S4Vectors change the behavior of ifelse\n  ## see https://support.bioconductor.org/p/70871/\n  ##\n  ## conf <- ifelse(missingArg(conf), NA, conf)\n  ##\n  conf <- if(missingArg(conf)) NA else conf\n  \n  pos <- value <- .id <- Lower <- Upper <- NULL\n  \n  if ( listFlag ) {\n    \n    tagCount <- lapply(as.list(names(tagMatrix)), function(x){\n      \n      tmp <- tagMatrix[[x]]\n      tagCount_tmp <- lapply(as.list(names(tmp)),function(x){\n        result <- getTagCount(tmp[[x]], xlim = xlim, conf = conf, ...)\n        result$type <- x\n        \n        return(result)\n      })\n      tagCount_tmp <- list_to_dataframe(tagCount_tmp)\n      return(tagCount_tmp)\n      \n    })\n    \n    names(tagCount) <- names(tagMatrix)\n    tagCount <- list_to_dataframe(tagCount)\n    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix))\n    p <- ggplot(tagCount, aes(pos, group=type, color=type))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = type),\n                           linetype = 0, alpha = 0.2)\n    }\n    \n  } else {\n    \n    tagCount <- lapply(as.list(names(tagMatrix)), function(x){\n      \n      result <- getTagCount(tagMatrix[[x]], xlim = xlim, conf = conf, ...)\n      result$type <- x\n      \n      return(result)\n    })\n    \n    tagCount <- do.call(\"rbind\",tagCount)\n    \n    p <- ggplot(tagCount, aes(pos,group=type,color=type))\n    if (!(is.na(conf))) {\n      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper,fill = type),\n                           linetype = 0, alpha = 0.2)\n    }\n  }\n  \n  p <- p + geom_line(aes(y = value,color = type))\n  \n  ## x_scale for genebody\n  if(type == 'body'){\n    ## x_scale for gene body with no flank extension\n    if(is.null(upstream)){\n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*0.25),\n                                           floor(nbin*0.5),\n                                           floor(nbin*0.75),\n                                           nbin),\n                                  labels=c(label[1], \n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2]))\n    }\n    \n    \n    ## x_scale for flank extension by relative value\n    if(inherits(upstream, 'rel')){\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",as.numeric(upstream)*100,\"%\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(\"+\",as.numeric(downstream)*100,\"%\")))\n      p <- p + geom_vline(xintercept=floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                          linetype=\"longdash\")\n      \n      p <- p + geom_vline(xintercept=floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                          linetype=\"longdash\")\n    }\n    \n    ## x_scale for flank extension by absolute value\n    if(!is.null(upstream) & !inherits(upstream, 'rel')){\n      \n      upstreamPer <- floor(upstream/1000)*0.1\n      downstreamPer <- floor(downstream/1000)*0.1\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",upstream,\"bp\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(downstream,\"bp\")))\n      p <- p + geom_vline(xintercept=floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),\n                          linetype=\"longdash\")\n      \n      p <- p + geom_vline(xintercept=floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),\n                          linetype=\"longdash\")\n    }\n  }\n  \n  \n  ## x_scale for start region\n  if(type != 'body'){\n    \n    p <- p + scale_x_continuous(breaks=c(1, \n                                         floor(nbin*0.25),\n                                         floor(nbin*0.5),\n                                         floor(nbin*0.75),\n                                         nbin),\n                                labels=c(paste0(\"-\",upstream,\"bp\"), \n                                         paste0(\"-\",floor(upstream*0.5),\"bp\"),\n                                         label,\n                                         paste0(floor(downstream*0.5),\"bp\"),\n                                         paste0(downstream,\"bp\")))\n    \n    p <- p + geom_vline(xintercept=floor(nbin*0.5),\n                        linetype=\"longdash\")\n  }\n  \n  \n  if (listFlag) {\n    \n    if (facet == \"row\") {\n      if (free_y) {\n        p <- p + facet_grid(.id ~ ., scales = \"free_y\")\n      } else {\n        p <- p + facet_grid(.id ~ .)\n      }\n    } else if (facet == \"column\") {\n      if (free_y) {\n        p <-  p + facet_grid(. ~ .id, scales = \"free_y\")\n      } else {\n        p <-  p + facet_grid(. ~ .id)\n      }\n    }\n  }\n  p <- p+xlab(xlab)+ylab(ylab)\n  p <- p + theme_bw() + theme(legend.title=element_blank())\n  # if(facet != \"none\") {\n  #   p <- p + theme(legend.position=\"none\")\n  # }\n  return(p)\n}\n\n\n##' plot the heatmap of tagMatrix\n##'\n##'\n##' @title tagHeatmap\n##' @param tagMatrix tagMatrix or a list of tagMatrix\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param title title\n##' @param palette palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}\n##' @param nrow the nrow of plotting a list of peak\n##' @param ncol the ncol of plotting a list of peak\n##' @return figure\n##' @export\n##' @author G Yu\ntagHeatmap <- function(tagMatrix, \n                       xlab=\"\", \n                       ylab=\"\", \n                       title=NULL, \n                       palette=\"RdBu\",\n                       nrow = NULL,\n                       ncol = NULL) {\n  listFlag <- FALSE\n  if (is(tagMatrix, \"list\")) {\n    listFlag <- TRUE\n  }\n  peakHeatmap.internal2(tagMatrix = tagMatrix, \n                        listFlag = listFlag, \n                        palette = palette, \n                        xlab = xlab, \n                        ylab = ylab, \n                        title = title,\n                        ncol = ncol,\n                        nrow = nrow)\n}\n\n##' plot the heatmap of peaks \n##'\n##'\n##' @title peakHeatmap\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param title title\n##' @param palette palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}\n##' @param verbose print message or not\n##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param nbin the amount of nbines \n##' @param ignore_strand ignore the strand information or not\n##' @param windows a collection of region\n##' @param nrow the nrow of plotting a list of peak\n##' @param ncol the ncol of plotting a list of peak\n##' @return figure\n##' @export\n##' @author G Yu\npeakHeatmap <- function(peak, weightCol=NULL, TxDb=NULL,\n                        upstream=1000, downstream=1000,\n                        xlab=\"\", ylab=\"\", title=NULL,\n                        palette=NULL, verbose=TRUE,\n                        by=\"gene\", type=\"start_site\",\n                        nbin = NULL,ignore_strand = FALSE,\n                        windows,ncol = NULL, nrow = NULL) {\n  listFlag <- FALSE\n  if ( is(peak, \"list\") ) {\n    listFlag <- TRUE\n    if (is.null(names(peak)))\n      stop(\"peak should be a peak file or a name list of peak files...\")\n  }\n  \n  if (verbose) {\n    cat(\">> preparing promoter regions...\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  \n  if (verbose) {\n    cat(\">> preparing tag matrix...\\t\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  \n  if(missing(windows)){\n    windows <- getBioRegion(TxDb=TxDb,\n                            upstream=upstream,\n                            downstream=downstream,\n                            by=by,\n                            type=type)\n  }\n  \n  \n  if (listFlag) {\n    tagMatrix <- lapply(peak, getTagMatrix, \n                        weightCol=weightCol, \n                        windows = windows,\n                        upstream=upstream,\n                        downstream=downstream,\n                        TxDb = TxDb,\n                        nbin = nbin,\n                        verbose = verbose,\n                        ignore_strand= ignore_strand)\n    \n    names(tagMatrix) <- names(peak)\n    \n  } else {\n    tagMatrix <- getTagMatrix(peak, \n                              weightCol=weightCol, \n                              windows = windows,\n                              TxDb = TxDb,\n                              upstream=upstream,\n                              downstream=downstream,\n                              nbin = nbin,\n                              verbose = verbose,\n                              ignore_strand= ignore_strand)\n  }\n  \n  if (verbose) {\n    cat(\">> generating figure...\\t\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  \n  xlim <- NULL\n  \n  p <- peakHeatmap.internal2(tagMatrix = tagMatrix,\n                             listFlag = listFlag, \n                             palette = palette, \n                             xlab = xlab,\n                             ylab = ylab, \n                             title = title,\n                             nrow = nrow,\n                             ncol = ncol)\n  \n  if (verbose) {\n    cat(\">> done...\\t\\t\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  invisible(tagMatrix)\n  p\n}\n\n##' @importFrom aplot plot_list\npeakHeatmap.internal2 <- function(tagMatrix, \n                                  listFlag, \n                                  palette, \n                                  xlab, \n                                  ylab, \n                                  title,\n                                  nrow,\n                                  ncol) {\n  if ( is.null(xlab) || is.na(xlab))\n    xlab <- \"\"\n  if ( is.null(ylab) || is.na(ylab))\n    ylab <- \"\"\n  \n  if (listFlag) {\n    nc <- length(tagMatrix)\n    if ( is.null(palette) || is.na(palette) ) {\n      palette <- getPalette(nc)\n    } else if (length(palette) != nc) {\n      palette <- rep(palette[1], nc)\n    } else {\n      palette <- palette\n    }\n    \n    if (is.null(title) || is.na(title))\n      title <- names(tagMatrix)\n    if (length(xlab) != nc) {\n      xlab <- rep(xlab[1], nc)\n    }\n    if (length(ylab) != nc) {\n      ylab <- rep(ylab[1], nc)\n    }\n    if (length(title) != nc) {\n      title <- rep(title[1], nc)\n    }\n    \n    tmp <- list()\n    \n    for (i in 1:nc) {\n      \n      p <- peakHeatmap.internal(tagMatrix = tagMatrix[[i]], \n                                palette = palette[i], \n                                xlab = xlab[i], \n                                ylab = ylab[i], \n                                title= title[i])\n      \n      p <- p + theme(plot.title = element_text(hjust = 0.5))\n      \n      tmp[[i]] <- p\n    }\n    \n    if(is.null(nrow) && is.null(ncol))\n      nrow <- 1\n    \n    p <- plot_list(gglist = tmp,\n                   ncol = ncol,\n                   nrow = nrow)\n    return(p)\n    \n  } else {\n    if (is.null(palette) || is.na(palette))\n      palette <- \"RdBu\"\n    if (is.null(title) || is.na(title))\n      title <- \"\"\n    peakHeatmap.internal(tagMatrix = tagMatrix, \n                         palette = palette, \n                         xlab = xlab, \n                         ylab = ylab, \n                         title = title)\n  }\n}\n\n\n##' @import BiocGenerics\n##' @importFrom yulab.utils mat2df\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 aes\n##' @importFrom ggplot2 geom_tile\n##' @importFrom ggplot2 scale_fill_distiller\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 labs\n##' @importFrom ggplot2 scale_x_continuous\npeakHeatmap.internal <- function(tagMatrix, \n                                 palette=\"RdBu\", \n                                 xlab=\"\", \n                                 ylab=\"\",\n                                 title=\"\") {\n  \n  upstream <- attr(tagMatrix, \"upstream\")\n  downstream <- attr(tagMatrix, \"downstream\")\n  binning_Flag <- attr(tagMatrix,\"is.binning\")\n  type <- attr(tagMatrix,\"type\")\n  \n  body_Flag <- FALSE\n  if(type == \"body\"){\n    body_Flag <- TRUE\n    label <- attr(tagMatrix,\"label\")\n  }\n  \n  if(binning_Flag){\n    nbin <- dim(tagMatrix)[2]\n  }\n  \n  tagMatrix <- t(apply(tagMatrix, 1, function(x) x/max(x)))\n  ii <- order(rowSums(tagMatrix))\n  tagMatrix <- tagMatrix[ii,]\n  \n  colnames(tagMatrix) <- seq_len(dim(tagMatrix)[2])\n  rownames(tagMatrix) <- seq_len(dim(tagMatrix)[1])\n  \n  tagMatrix <- mat2df(tagMatrix)\n  colnames(tagMatrix) <- c(\"values\",\"sample_ID\",\"coordinate\")\n\n  sample_ID <- coordinate <- NULL\n  \n  p <- ggplot(tagMatrix, aes(x = coordinate,y = sample_ID)) + \n    geom_tile(aes(fill = values)) +\n    scale_fill_distiller(palette = palette)  +\n    theme(axis.text.y=element_blank(),\n          axis.ticks.y=element_blank(),\n          axis.line.y = element_blank(),\n          panel.grid=element_blank(),\n          panel.background = element_blank()) +\n    labs(x = xlab, y = ylab, title = title)\n\n  if(body_Flag){\n    \n    if(inherits(upstream, 'rel')){\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",as.numeric(upstream)*100,\"%\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(\"+\",as.numeric(downstream)*100,\"%\")))\n    }\n    \n    if(is.null(upstream)){\n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*0.25),\n                                           floor(nbin*0.5),\n                                           floor(nbin*0.75),\n                                           nbin),\n                                  labels=c(label[1], \n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2]))\n    }\n    \n    if(!is.null(upstream) && !inherits(upstream, 'rel')){\n      \n      upstreamPer <- floor(upstream/1000)*0.1\n      downstreamPer <- floor(downstream/1000)*0.1\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",upstream,\"bp\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(downstream,\"bp\")))\n    }\n    \n    p <- p + scale_y_continuous(expand = c(0,0))\n    return(p)\n    \n  }\n  \n  if(binning_Flag){\n    \n    p <- p + scale_x_continuous(breaks = c(1,\n                                           floor(nbin*(downstream*0.5/(downstream+upstream))),\n                                           floor(nbin*(downstream/(downstream+upstream))),\n                                           floor(nbin*((downstream + upstream*0.5)/(downstream+upstream))),\n                                           nbin),\n                                labels = c((-1*downstream),\n                                           floor(-1*downstream*0.5),\n                                           0,\n                                           floor(upstream*0.5),\n                                           upstream))\n  }else{\n    \n    p <- p + scale_x_continuous(labels = function(x) x - upstream)    \n    \n  }\n  \n  p <- p + scale_y_continuous(expand = c(0,0))\n  \n  p\n}\n\n##' plot the heatmap of peaks align to a sets of regions\n##'\n##'\n##' @title peakHeatmap\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param title title\n##' @param palette palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}\n##' @param verbose print message or not\n##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param nbin the amount of nbines \n##' @param ignore_strand ignore the strand information or not\n##' @param windows_name the name for each window, which will also be showed in the picture as labels\n##' @param nrow the nrow of plotting a list of peak\n##' @param ncol the ncol of plotting a list of peak\n##' @param facet_label_text_size the size of facet label text\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 aes\n##' @importFrom ggplot2 geom_tile\n##' @importFrom ggplot2 scale_fill_distiller\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 labs\n##' @importFrom ggplot2 scale_x_continuous\n##' @return figure\n##' @export\npeakHeatmap_multiple_Sets <- function(peak, \n                                      weightCol=NULL,\n                                      TxDb=NULL,\n                                      upstream=1000, \n                                      downstream=1000,\n                                      xlab=\"\", \n                                      ylab=\"\", \n                                      title=NULL,\n                                      palette=NULL, \n                                      verbose=TRUE,\n                                      by=\"gene\", \n                                      type=\"start_site\",\n                                      nbin = NULL,\n                                      ignore_strand = FALSE,\n                                      windows_name = NULL,\n                                      ncol = NULL,\n                                      nrow = NULL,\n                                      facet_label_text_size = 12){\n  listFlag <- FALSE\n  if ( is(peak, \"list\") ) {\n    listFlag <- TRUE\n    if (is.null(names(peak)))\n      stop(\"peak should be a peak file or a name list of peak files...\")\n  }\n  \n  if (verbose) {\n    cat(\">> preparing promoter regions...\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n  }\n  \n  \n  ## check type\n  if(length(type) != 1){\n    stop(\"It should be only one type...\")\n  }\n  \n  if(is.null(windows_name) && !is.null(names(TxDb)))\n    windows_name <- names(TxDb)\n  \n  ## make the window name\n  if (is.null(windows_name)) {\n    nn <- by\n    warning(\"set the name automatically to \", paste(nn, collapse=' '))\n    windows_name <- nn\n  }else{\n    if (length(windows_name) != length(by)) {\n      stop(\"the length of the window name and the by should be equal...\")\n    }\n  }\n  \n  if ( is(peak, \"list\") ) {\n    tagMatrix <- lapply(peak, getTagMatrix2,\n                        upstream=upstream,\n                        downstream=downstream,\n                        windows_name=windows_name,\n                        type=type,\n                        by=by,\n                        TxDb=TxDb,\n                        weightCol = weightCol, \n                        nbin = nbin,\n                        verbose = verbose,\n                        ignore_strand= ignore_strand)\n  } else {\n    tagMatrix <- getTagMatrix2(peak=peak, \n                               upstream=upstream,\n                               downstream=downstream,\n                               windows_name=windows_name,\n                               type=type,\n                               by=by,\n                               TxDb=TxDb,\n                               weightCol = weightCol, \n                               nbin = nbin,\n                               verbose = verbose,\n                               ignore_strand= ignore_strand)\n  }\n  \n  if(listFlag){\n    \n    nc <- length(tagMatrix)\n    if ( is.null(palette) || is.na(palette) ) {\n      palette <- getPalette(nc)\n    } else if (length(palette) != nc) {\n      palette <- rep(palette[1], nc)\n    } else {\n      palette <- palette\n    }\n    \n    if (is.null(title) || is.na(title))\n      title <- names(tagMatrix)\n    if (length(xlab) != nc) {\n      xlab <- rep(xlab[1], nc)\n    }\n    if (length(ylab) != nc) {\n      ylab <- rep(ylab[1], nc)\n    }\n    if (length(title) != nc) {\n      title <- rep(title[1], nc)\n    }\n    \n    tmp <- list()\n    \n    for (i in 1:nc) {\n      \n      p <- peakHeatmap_multiple_Sets.internal(tagMatrix = tagMatrix[[i]],\n                                              upstream=upstream, \n                                              downstream=downstream,\n                                              xlab=xlab[[i]], \n                                              ylab=ylab[[i]], \n                                              title=title[[i]],\n                                              palette=palette[[i]], \n                                              ncol = ncol,\n                                              nrow = nrow,\n                                              facet_label_text_size = facet_label_text_size)\n      \n      p <- p + theme(plot.title = element_text(hjust = 0.5))\n      \n      tmp[[i]] <- p\n    }\n    \n    if(is.null(nrow) && is.null(ncol))\n      nrow <- 1\n    \n    p <- plot_list(gglist = tmp,\n                   ncol = ncol,\n                   nrow = nrow)\n    \n  }else{\n    \n    if (is.null(palette) || is.na(palette))\n      palette <- \"RdBu\"\n    if (is.null(title) || is.na(title))\n      title <- \"\"\n    \n    p <- peakHeatmap_multiple_Sets.internal(tagMatrix = tagMatrix,\n                                            upstream=upstream, \n                                            downstream=downstream,\n                                            xlab=xlab, \n                                            ylab=ylab, \n                                            title=title,\n                                            palette=palette, \n                                            ncol = ncol,\n                                            nrow = nrow,\n                                            facet_label_text_size = facet_label_text_size)\n    \n  }\n  \n  return(p)\n  \n}\n\n\n##' @importFrom yulab.utils mat2df\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 aes\n##' @importFrom ggplot2 geom_tile\n##' @importFrom ggplot2 scale_fill_distiller\n##' @importFrom ggplot2 theme\n##' @importFrom ggplot2 element_blank\n##' @importFrom ggplot2 labs\n##' @importFrom ggplot2 scale_x_continuous\n##' @importFrom ggplot2 facet_grid\n##' @importFrom ggplot2 element_text\n##' @importFrom ggplot2 element_blank\npeakHeatmap_multiple_Sets.internal <- function(tagMatrix,\n                                               upstream=1000, \n                                               downstream=1000,\n                                               xlab=\"\", \n                                               ylab=\"\", \n                                               title=NULL,\n                                               palette=NULL, \n                                               ncol = NULL,\n                                               nrow = NULL,\n                                               facet_label_text_size = 12){\n\n  binning_Flag <- attr(tagMatrix[[1]],\"is.binning\")\n  if(binning_Flag) nbin <- dim(tagMatrix[[1]])[2]\n  \n  type <- attr(tagMatrix,\"type\")\n  body_Flag <- FALSE\n  if(attr(tagMatrix[[1]],\"type\") == \"body\"){\n    body_Flag <- TRUE\n    label <- attr(tagMatrix,\"label\")\n  }\n  \n  name_of_list <- as.list(names(tagMatrix))\n  \n  peak_list <- lapply(name_of_list,function(x){\n    \n    tagMatrix[[x]] <- t(apply(tagMatrix[[x]], 1, function(x) x/max(x)))\n    ii <- order(rowSums(tagMatrix[[x]]))\n    tagMatrix[[x]] <- tagMatrix[[x]][ii,]\n    \n    colnames(tagMatrix[[x]]) <- seq_len(dim(tagMatrix[[x]])[2])\n    rownames(tagMatrix[[x]]) <- seq_len(dim(tagMatrix[[x]])[1])\n    \n    tagMatrix[[x]] <- mat2df(tagMatrix[[x]])\n    colnames(tagMatrix[[x]]) <- c(\"values\",\"sample_ID\",\"coordinate\")\n    \n    tagMatrix[[x]]$sample <- x\n    return(tagMatrix[[x]])\n  })\n  \n  peak_df <- list_to_dataframe(peak_list)\n  \n  sample_ID <- coordinate <- NULL\n  \n  p <- ggplot(peak_df, aes(x = coordinate,y = sample_ID)) + \n    geom_tile(aes(fill = values)) +\n    scale_fill_distiller(palette = palette)  +\n    theme(axis.text.y=element_blank(),\n          axis.ticks.y=element_blank(),\n          axis.line.y = element_blank(),\n          panel.grid=element_blank(),\n          panel.background = element_blank()) +\n    labs(x = xlab, y = ylab, title = title)\n  \n  if(body_Flag){\n    \n    if(inherits(upstream, 'rel')){\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",as.numeric(upstream)*100,\"%\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(\"+\",as.numeric(downstream)*100,\"%\")))\n    }\n    \n    if(is.null(upstream)){\n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*0.25),\n                                           floor(nbin*0.5),\n                                           floor(nbin*0.75),\n                                           nbin),\n                                  labels=c(label[1], \n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2]))\n    }\n    \n    if(!is.null(upstream) && !inherits(upstream, 'rel')){\n      \n      upstreamPer <- floor(upstream/1000)*0.1\n      downstreamPer <- floor(downstream/1000)*0.1\n      \n      p <- p + scale_x_continuous(breaks=c(1, \n                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),\n                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),\n                                           nbin),\n                                  labels=c(paste0(\"-\",upstream,\"bp\"), \n                                           label[1],\n                                           \"25%\",\n                                           \"50%\",\n                                           \"75%\",\n                                           label[2],\n                                           paste0(downstream,\"bp\")))\n    }\n    \n    p <-  p + facet_grid(sample ~ .,switch = \"y\",space = \"free_y\",scales = \"free_y\") +\n      theme(strip.text.y.left = element_text(color = \"black\",face = \"bold\",\n                                             size = facet_label_text_size),\n            strip.background = element_blank())\n    \n    return(p)\n    \n  }\n  \n  if(binning_Flag){\n    \n    p <- p + scale_x_continuous(breaks = c(1,\n                                           floor(nbin*(downstream*0.5/(downstream+upstream))),\n                                           floor(nbin*(downstream/(downstream+upstream))),\n                                           floor(nbin*((downstream + upstream*0.5)/(downstream+upstream))),\n                                           nbin),\n                                labels = c((-1*downstream),\n                                           floor(-1*downstream*0.5),\n                                           0,\n                                           floor(upstream*0.5),\n                                           upstream))\n  }else{\n    \n    p <- p + scale_x_continuous(breaks = c(1,\n                                           floor(downstream*0.5),\n                                           (downstream + 1),\n                                           (downstream + 1 + floor(upstream * 0.5)), \n                                           upstream+downstream+1),\n                                labels = c((-1*downstream),\n                                           floor(-1*downstream*0.5),\n                                           0,\n                                           floor(upstream*0.5),\n                                           upstream))    \n    \n  }\n  \n  p <-  p + facet_grid(sample ~ .,switch = \"y\",scales = \"free_y\",space = \"free\") +\n    theme(strip.text.y.left = element_text(color = \"black\",face = \"bold\",\n                                           size = facet_label_text_size),\n          strip.background = element_blank()) +\n    scale_y_continuous(expand = c(0,0))\n  \n  return(p)\n  \n}\n\n\n\n\n##' plot peak heatmap and profile in a picture\n##' \n##' \n##' @title peak_Profile_Heatmap\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight\n##' @param TxDb TxDb object\n##' @param upstream upstream position\n##' @param downstream downstream position\n##' @param xlab xlab\n##' @param ylab ylab\n##' @param title title\n##' @param palette palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}\n##' @param verbose print message or not\n##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param nbin the amount of nbines \n##' @param ignore_strand ignore the strand information or not\n##' @param windows_name the name for each window, which will also be showed in the picture as labels\n##' @param nrow the nrow of plotting a list of peak\n##' @param ncol the ncol of plotting a list of peak\n##' @param facet_label_text_size the size of facet label text\n##' @param conf confidence interval\n##' @param facet one of 'none', 'row' and 'column'\n##' @param free_y if TRUE, y will be scaled by AvgProf\n##' @param height_proportion the proportion of profiling picture and heatmap\n##' @importFrom aplot insert_bottom\n##' @importFrom aplot plot_list\n##' @export\npeak_Profile_Heatmap <- function(peak, \n                                 weightCol=NULL,\n                                 TxDb=NULL,\n                                 upstream=1000, \n                                 downstream=1000,\n                                 xlab=\"\", \n                                 ylab=\"\", \n                                 title=NULL,\n                                 palette=NULL, \n                                 verbose=TRUE,\n                                 by=\"gene\", \n                                 type=\"start_site\",\n                                 nbin = NULL,\n                                 ignore_strand = FALSE,\n                                 windows_name = NULL,\n                                 ncol = NULL,\n                                 nrow = NULL,\n                                 facet_label_text_size = 12,\n                                 conf,\n                                 facet = \"row\",\n                                 free_y = TRUE,\n                                 height_proportion = 4){\n  \n  conf <- if(missingArg(conf)) NA else conf\n  \n  if(is(peak, \"list\")){\n    \n    nc <- length(peak)\n    \n    tmp <- list()\n    \n    if ( is.null(names(peak)) ) {\n      nn <- paste0(\"peak\", seq_along(peak))\n      warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse=' '))\n      names(peak) <- nn\n      ## stop(\"tagMatrix should be a named list...\")\n    }\n    \n    if(is.null(palette)) palette <- getPalette(nc)\n    \n    if(is.null(title)) title_of_plot <- names(peak)\n    \n    for (i in 1:nc) {\n      peak_profile <- plotPeakProf(peak = peak[[i]],\n                                   upstream = upstream,\n                                   downstream = downstream,\n                                   conf = conf,\n                                   by = by,\n                                   type = type,\n                                   windows_name = windows_name,\n                                   weightCol = weightCol,\n                                   TxDb = TxDb,\n                                   xlab = xlab,\n                                   ylab = ylab,\n                                   facet = facet,\n                                   free_y = free_y,\n                                   verbose = verbose,\n                                   nbin = nbin,\n                                   ignore_strand = ignore_strand)\n      \n      peak_profile <- peak_profile + labs(title = title_of_plot[i]) +\n        theme(plot.title = element_text(hjust = 0.5))\n      \n      if(length(by) != 1){\n        peak_heatmap <- peakHeatmap_multiple_Sets(peak = peak[[i]], \n                                                  weightCol=weightCol,\n                                                  TxDb=TxDb,\n                                                  upstream=upstream, \n                                                  downstream=downstream,\n                                                  xlab=xlab, \n                                                  ylab=ylab, \n                                                  title=title,\n                                                  palette=palette[[i]], \n                                                  verbose=verbose,\n                                                  by=by, \n                                                  type=type,\n                                                  nbin = nbin,\n                                                  ignore_strand = ignore_strand,\n                                                  windows_name = windows_name,\n                                                  ncol = ncol,\n                                                  nrow = nrow,\n                                                  facet_label_text_size = facet_label_text_size)\n      }else{\n        \n        peak_heatmap <- peakHeatmap(peak[[i]], \n                                    weightCol=weightCol, \n                                    TxDb=TxDb,\n                                    upstream=upstream, \n                                    downstream=downstream,\n                                    xlab=xlab, \n                                    ylab=ylab, \n                                    title=title,\n                                    palette=palette[[i]], \n                                    verbose=verbose,\n                                    by=by, \n                                    type=type,\n                                    nbin = nbin,\n                                    ignore_strand = ignore_strand,\n                                    ncol = ncol,\n                                    nrow = nrow)\n        \n      }\n      \n      p <- peak_profile %>% \n        insert_bottom(peak_heatmap,height = height_proportion)\n      \n      tmp[[i]] <- p\n    }\n    \n    if (is.null(ncol) && is.null(nrow))\n      nrow <- 1\n    \n    p <- plot_list(gglist = tmp,\n                   ncol = ncol,\n                   nrow = nrow)\n    \n    return(p)\n    \n  }\n  \n  peak_profile <- plotPeakProf(peak = peak,\n                               upstream = upstream,\n                               downstream = downstream,\n                               conf = conf,\n                               by = by,\n                               type = type,\n                               windows_name = windows_name,\n                               weightCol = weightCol,\n                               TxDb = TxDb,\n                               xlab = xlab,\n                               ylab = ylab,\n                               facet = facet,\n                               free_y = free_y,\n                               verbose = verbose,\n                               nbin = nbin,\n                               ignore_strand = ignore_strand)\n  \n  \n  if(length(by) != 1){\n    peak_heatmap <- peakHeatmap_multiple_Sets(peak = peak, \n                                              weightCol=weightCol,\n                                              TxDb=TxDb,\n                                              upstream=upstream, \n                                              downstream=downstream,\n                                              xlab=xlab, \n                                              ylab=ylab, \n                                              title=title,\n                                              palette=palette, \n                                              verbose=verbose,\n                                              by=by, \n                                              type=type,\n                                              nbin = nbin,\n                                              ignore_strand = ignore_strand,\n                                              windows_name = windows_name,\n                                              ncol = ncol,\n                                              nrow = nrow,\n                                              facet_label_text_size = facet_label_text_size)\n  }else{\n    \n    peak_heatmap <- peakHeatmap(peak = peak, \n                                weightCol=weightCol, \n                                TxDb=TxDb,\n                                upstream=upstream, \n                                downstream=downstream,\n                                xlab=xlab, \n                                ylab=ylab, \n                                title=title,\n                                palette=palette, \n                                verbose=verbose,\n                                by=by, \n                                type=type,\n                                nbin = nbin,\n                                ignore_strand = ignore_strand,\n                                ncol = ncol,\n                                nrow = nrow)\n    \n  }\n  \n  p <- peak_profile %>% \n    insert_bottom(peak_heatmap,height = height_proportion)\n  \n\n  return(p)\n}"
  },
  {
    "path": "R/readPeakFile.R",
    "content": "##' read peak file and store in data.frame or GRanges object\n##'\n##' \n##' @title readPeakFile\n##' @param peakfile peak file\n##' @param as output format, one of GRanges or data.frame\n##' @param ... additional parameter (pass to `utils::read.delim()`)\n##' @return peak information, in GRanges or data.frame object\n##' @import IRanges GenomicRanges\n##' @export\n##' @examples\n##' peakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"ChIPseeker\")\n##' peak.gr <- readPeakFile(peakfile, as=\"GRanges\")\n##' peak.gr\n##' @author G Yu\nreadPeakFile <- function(peakfile, as=\"GRanges\", ...) {\n    as <- match.arg(as, c(\"GRanges\", \"data.frame\"))\n    peak.df <- peak2DF(peakfile, ...)\n    if (as == \"data.frame\")\n        return(peak.df)\n    peak.gr <- peakDF2GRanges(peak.df)\n    return(peak.gr)\n}\n\npeakDF2GRanges <- function(peak.df) {\n    peak.gr=GRanges(seqnames=peak.df[,1],\n        ranges=IRanges(peak.df[,2], peak.df[,3]))\n    cn <- colnames(peak.df)\n    if (length(cn) > 3) {\n        for (i in 4:length(cn)) {\n            mcols(peak.gr)[[cn[i]]] <- peak.df[, cn[i]]\n        }\n    }\n    return(peak.gr)\n}\n\n##' @importFrom utils read.delim\npeak2DF <- function(peakfile, header, ...) {\n    if (missing(header)) {\n        ## determine file format\n        if (isBedFile(peakfile)) {\n            header <- FALSE\n        } else {\n            header <- TRUE\n        }\n    }\n    peak.df <- read.delim(peakfile, header=header, comment.char=\"#\", ...)\n    ## coordinate system in BED file is start at 0\n    ## refer to http://asia.ensembl.org/info/website/upload/bed.html?redirect=no\n    ## The chromEnd base is not included in the display of the feature.\n    ## For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100,\n    ## and span the bases numbered 0-99.\n    ## so chromEnd, peak.df[,3], is not needed to +1\n    peak.df[,2] <- peak.df[,2] + 1\n    return(peak.df)\n}\n\nisBedFile <- function(peakfile) {\n    ## peakfile is a peak file name\n    grepl(\"\\\\.bed$\", peakfile) || grepl(\"\\\\.bed.gz$\", peakfile) || \n    grepl(\"\\\\Peak.gz$\", peakfile) || grepl(\"\\\\.bedGraph.gz$\", peakfile) || \n    grepl(\"\\\\.narrowPeak$\", peakfile) || grepl(\"\\\\.broadPeak$\",peakfile) ||\n    grepl(\"\\\\.gappedPeak$\", peakfile)\n}\n"
  },
  {
    "path": "R/seq2gene.R",
    "content": "##' annotate genomic regions to genes in many-to-many mapping\n##'\n##' This funciton associates genomic regions with coding genes in a many-to-many mapping. It first maps genomic regions to host genes (either located in exon or intron), proximal genes (located in promoter regions) and flanking genes (located in upstream and downstream within user specify distance).\n##' @title seq2gene\n##' @param seq genomic regions in GRanges object\n##' @param tssRegion TSS region\n##' @param flankDistance flanking search radius\n##' @param TxDb TranscriptDb object\n##' @param sameStrand logical whether find nearest/overlap gene in the same strand\n##' @return gene vector\n##' @export\n##' @examples\n##' \\dontrun{\n##' library(TxDb.Hsapiens.UCSC.hg19.knownGene)\n##' TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n##' file <- getSampleFiles()[[1]] # a bed file\n##' gr <- readPeakFile(file)\n##' genes <- seq2gene(gr, tssRegion=c(-1000, 1000), flankDistance = 3000, TxDb) \n##' }\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\n##' @author Guangchuang Yu\nseq2gene <- function(seq, tssRegion, flankDistance, TxDb, sameStrand=FALSE) {\n    .ChIPseekerEnv(TxDb, item = ChIPseekerCache)\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n    \n    ## Exons\n    exonList <- get_cache_element(item = ChIPseekerCache, elements = \"exonList\")\n    if(is.null(exonList)){\n        exonList <- exonsBy(TxDb)\n        update_cache_item(item = ChIPseekerCache, list(\"exonList\" = exonList))\n    }\n\n    # if ( exists(\"exonList\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n    #     exonList <- get(\"exonList\", envir=ChIPseekerEnv)\n    # } else {\n    #     exonList <- exonsBy(TxDb)\n    #     assign(\"exonList\", exonList, envir=ChIPseekerEnv)\n    # }\n    exons <- getGenomicAnnotation.internal(seq, exonList, type = \"Exon\", sameStrand=sameStrand)\n    \n    ## Introns\n    intronList <- get_cache_element(item = ChIPseekerCache, elements = \"intronList\")\n\n    if(is.null(intronList)){\n        intronList <- intronsByTranscript(TxDb)\n        update_cache_item(item = ChIPseekerCache, list(\"intronList\" = intronList))\n    }\n\n    # if ( exists(\"intronList\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n    #     intronList <- get(\"intronList\", envir=ChIPseekerEnv)\n    # } else {\n    #     intronList <- intronsByTranscript(TxDb)\n    #     assign(\"intronList\", intronList, envir=ChIPseekerEnv)\n    # }\n    introns <- getGenomicAnnotation.internal(seq, intronList, type=\"Intron\", sameStrand=sameStrand)\n    \n    genes <- c(exons$gene, introns$gene)\n    ## > head(genes)\n    ## [1] \"uc001aed.3/126789\"    \"uc001aka.3/440556\"    \"uc001ako.3/49856\"    \n    ## [4] \"uc001alg.3/100133612\" \"uc009vly.2/390992\"    \"uc001awv.2/79814\"   \n    genes <- gsub(\"\\\\w+\\\\.*\\\\d*/(\\\\d+)\", \"\\\\1\", genes)\n    ## > head(genes)\n    ## [1] \"126789\"    \"440556\"    \"49856\"     \"100133612\" \"390992\"    \"79814\"   \n\n    features <- getGene(TxDb, by=\"gene\")\n    idx.dist <- getNearestFeatureIndicesAndDistances(seq, features, sameStrand=sameStrand)\n    nearestFeatures <- features[idx.dist$index] \n    \n    distance <- idx.dist$distance\n\n    pi <- distance > tssRegion[1] & distance < tssRegion[2]\n    promoters <- mcols(nearestFeatures[pi])[[\"gene_id\"]]\n\n    nearest_genes <- mcols(nearestFeatures[!pi][abs(distance[!pi]) < flankDistance])[[\"gene_id\"]]\n\n    genes <- c(genes, promoters, nearest_genes)\n    return(unique(genes))\n}\n"
  },
  {
    "path": "R/subset.R",
    "content": "##' @importFrom S4Vectors subset\r\n##' @importFrom BiocGenerics start\r\n##' @importFrom BiocGenerics end\r\n##' @method subset csAnno\r\n##' @export\r\nsubset.csAnno <- function(x, ... ){\r\n  \r\n  index <- paste(seqnames(x@anno),start(x@anno),end(x@anno), sep = \"_\")\r\n  # subset the GRanges\r\n  x@anno <- subset(x@anno, ...)\r\n  index2 <- paste(seqnames(x@anno),start(x@anno),end(x@anno), sep = \"_\")\r\n  \r\n  # the tssRgion, level, hsaGenomicAnnotation keep unchanged\r\n  \r\n  # change the detailGenomicAnnotation\r\n  x@detailGenomicAnnotation <- x@detailGenomicAnnotation[index %in% index2,]\r\n  \r\n  # change the annotation stat \r\n  x@annoStat <- getGenomicAnnoStat(x@anno)\r\n  \r\n  # change peak number\r\n  x@peakNum <-  length(x@anno)\r\n  \r\n  return(x)\r\n  \r\n}\r\n"
  },
  {
    "path": "R/tagMatrix.R",
    "content": "##' prepare the promoter regions\n##'\n##'\n##' @title getPromoters\n##' @param TxDb TxDb\n##' @param upstream upstream from TSS site\n##' @param downstream downstream from TSS site\n##' @param by one of gene or transcript\n##' @return GRanges object\n##' @export\ngetPromoters <- function(TxDb=NULL,\n                         upstream=1000,\n                         downstream=1000,\n                         by = \"gene\") {\n  \n  getBioRegion(TxDb = TxDb,\n               upstream = upstream,\n               downstream = downstream,\n               by = by,\n               type = \"start_site\")\n}\n\n\n##' prepare a bioregion of selected feature\n##' \n##' this function combined previous functions getPromoters(), getBioRegion() and getGeneBody() in order\n##' to solve the following issues.\n##' \n##' (1) \\url{https://github.com/GuangchuangYu/ChIPseeker/issues/16}\n##' \n##' (2) \\url{https://github.com/GuangchuangYu/ChIPseeker/issues/87}\n##' \n##' The getBioRegion() function can prevoid a region of interest from\n##' \\code{txdb} object. There are three kinds of regions, \\code{start_site},\n##' \\code{end_site} and \\code{body}. \n##' \n##' We take transcript region to expain the differences of these three regions.\n##' tx: chr1 1000 1400. \n##' \n##' \\code{body} region refers to the 1000-1400bp.\n##' \n##' \\code{start_site} region with \\code{upstream = 100, downstream = 100} refers to 900-1100bp. \n##' \n##' \\code{end_site} region with \\code{upstream = 100, downstream = 100} refers to 1300-1500bp.\n##'\n##' @title getBioRegion\n##' @param TxDb TxDb\n##' @param upstream upstream from start site or end site\n##' @param downstream downstream from start site or end site\n##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @return GRanges object\n##' @import BiocGenerics IRanges GenomicRanges\n##' @importFrom yulab.utils get_cache_item\n##' @author Guangchuang Yu, Ming L\n##' @export\ngetBioRegion <- function(TxDb=NULL,\n                         upstream=1000,\n                         downstream=1000,\n                         by=\"gene\",\n                         type=\"start_site\"){\n  \n  by <- match.arg(by, c('gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR','UTR'))\n  type <- match.arg(type, c(\"start_site\", \"end_site\", \"body\"))\n  \n  TxDb <- loadTxDb(TxDb)\n  .ChIPseekerEnv(TxDb, item = ChIPseekerCache)\n  # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n  \n  label <- make_label(type = type, by = by)\n  \n  \n  if(by == 'gene' || by == 'transcript'){\n    regions <- getGene(TxDb, by)\n  }\n  \n  if (by == \"exon\") {\n    # exonList <- get_exonList(ChIPseekerEnv)\n    exonList <- get_exonList(item = ChIPseekerCache)\n    regions <-  unlist(exonList)\n  }\n  \n  if (by == \"intron\") {\n    # intronList <- get_intronList(ChIPseekerEnv)\n    intronList <- get_intronList(item = ChIPseekerCache)\n    regions <- unlist(intronList)\n  }\n  \n  if (by == \"3UTR\") {\n    threeUTRList <- threeUTRsByTranscript(TxDb)\n    regions <- unlist(threeUTRList)\n  }\n  \n  if (by == \"5UTR\") {\n    fiveUTRList <- fiveUTRsByTranscript(TxDb)\n    regions <- unlist(fiveUTRList)\n  }\n  \n  if (by == 'UTR'){\n    three_URT <- threeUTRsByTranscript(TxDb)\n    three_UTR_regions <- unlist(three_URT)\n    five_UTR <- fiveUTRsByTranscript(TxDb)\n    five_UTR_regions <- unlist(five_UTR)\n    regions <- c(three_UTR_regions,five_UTR_regions)\n  }\n  \n  if(type == \"start_site\"){\n    coordinate<- ifelse(strand(regions) == \"+\", start(regions), end(regions))\n  }else if(type == \"end_site\"){\n    coordinate<- ifelse(strand(regions) == \"+\", end(regions), start(regions))\n  }else{\n    ## assign attribute \n    attr(regions, 'type') = type\n    attr(regions, 'by') = by\n    attr(regions, 'label') = label\n    \n    return(regions)\n  }\n  \n  ## issue and code obtained from Chen Ting(NIH/NCI)\n  start_site <- ifelse(strand(regions) == \"+\",coordinate-upstream, coordinate-downstream)\n  end_site <- ifelse(strand(regions) == \"+\", coordinate+downstream, coordinate+upstream)\n  \n  bioRegion <- GRanges(seqnames=seqnames(regions),\n                       ranges=IRanges(start_site, end_site),\n                       strand=strand(regions))\n  bioRegion <- unique(bioRegion)\n  \n  ## assign attribute \n  attr(bioRegion, 'type') = type\n  attr(bioRegion, 'by') = by\n  \n  ## different region have different label to be added to the figures\n  ## so we attach label to the Granges object\n  attr(bioRegion, 'label') = label\n  \n  attr(bioRegion, 'upstream') = upstream\n  attr(bioRegion, 'downstream') = downstream\n  \n  return(bioRegion)\n}\n\n##' make windows from granges object\n##' \n##' \\code{makeBioRegionFromGranges()} function can make bioregion from granges object.\n##' \n##' The differences between \\code{makeBioRegionFromGranges()} and \\code{getBioRegion()} is that\n##' \\code{getBioRegion()} get the region object from \\code{txdb} object but\n##' \\code{makeBioRegionFromGranges()} get the region from the granges object provided by users.\n##' For example, \\code{txdb} object do not contain insulator or enhancer regions. Users can\n##' provide these regions through self-made granges object\n##' \n##' There are three kinds of regions, \\code{start_site}, \\code{end_site} and \\code{body}. \n##' \n##' We take enhancer region to explain the differences of these three regions.\n##' enhancer: chr1 1000 1400. \n##' \n##' \\code{body} region refers to the 1000-1400bp.\n##' \n##' \\code{start_site} region with \\code{upstream = 100, downstream = 100} refers to 900-1100bp. \n##' \n##' \\code{end_site} region with \\code{upstream = 100, downstream = 100} refers to 1300-1500bp.\n##'\n##' In \\code{makeBioRegionFromGranges()}, \\code{upstream} and \\code{downstream} can be\n##' \\code{NULL} if the \\code{type == 'body'}. \\code{by} should be specified by users and \n##' can not be omitted. \\code{by} parameter will be used to made labels. \\code{type} should also\n##' be specified.\n##' \n##' \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}\n##' \n##' @title makeBioRegionFromGranges\n##' \n##' @param gr a grange object contain region of interest\n##' @param upstream upstream from start site or end site, can be NULL if the type == 'body'\n##' @param downstream downstream from start site or end site, can be NULL if the type == 'body'\n##' @param by specify be users, e.g. gene, insulator, enhancer\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @return GRanges object\n##' @import BiocGenerics IRanges GenomicRanges\n##' @export\nmakeBioRegionFromGranges <- function(gr,\n                                     by,\n                                     type,\n                                     upstream=1000,\n                                     downstream=1000){\n  \n  if (!is(gr, \"GRanges\")) {\n    stop(\"windows should be a GRanges object...\")\n  }\n  \n  type <- match.arg(type, c(\"start_site\", \"end_site\", \"body\"))\n  \n  label <- make_label(type = type, by = by)\n  regions <- gr\n  \n  if(type == \"start_site\"){\n    coordinate<- ifelse(strand(regions) == \"+\", start(regions), end(regions))\n  }else if(type == \"end_site\"){\n    coordinate<- ifelse(strand(regions) == \"+\", end(regions), start(regions))\n  }else{\n    ## assign attribute \n    attr(regions, 'type') = type\n    attr(regions, 'by') = by\n    attr(regions, 'label') = label\n    \n    return(regions)\n  }\n  \n  ## issue and code obtained from Chen Ting(NIH/NCI)\n  start_site <- ifelse(strand(regions) == \"+\",coordinate-upstream, coordinate-downstream)\n  end_site <- ifelse(strand(regions) == \"+\", coordinate+downstream, coordinate+upstream)\n  \n  bioRegion <- GRanges(seqnames=seqnames(regions),\n                       ranges=IRanges(start_site, end_site),\n                       strand=strand(regions))\n  bioRegion <- unique(bioRegion)\n  \n  ## assign attribute \n  attr(bioRegion, 'type') = type\n  attr(bioRegion, 'by') = by\n  attr(bioRegion, 'label') = label\n  attr(bioRegion, 'upstream') = upstream\n  attr(bioRegion, 'downstream') = downstream\n  \n  return(bioRegion)\n  \n}\n\n\n##' calculate the tag matrix\n##' \n##' \\code{getTagMatrix()} function can produce the matrix for visualization.\n##' \\code{peak} stands for the peak file. \n##' \\code{window} stands for a collection of regions that users want to look into. \n##' Users can use \\code{window} to capture the peak of interest.\n##' There are two ways to input \\code{window}. \n##' \n##' The first way is that users can use\n##' \\code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()} to \n##' get \\code{window} and put it into \\code{getTagMatrix()}. \n##' \n##' The second way is that users can use \\code{getTagMatrix()} to\n##' call \\code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()}. In this way\n##' users do not need to input \\code{window} parameter but they need to input\n##' \\code{txdb}. \n##' \n##' \\code{txdb} is a set of packages contained annotation \n##' of regions of different genomes. Users can\n##' get the regions of interest through specific functions. These specific functions\n##' are built in \\code{getPromoters()/getBioRegion()}. Many regions can not be gain\n##' through \\code{txdb}, like insulator and enhancer regions. \n##' Users can provide these regions in the form of granges object. \n##' These self-made granges object will be passed to \\code{TxDb} parameter and they will\n##' be passed to \\code{makeBioRegionFromGranges()} to produce the \\code{window}.\n##' In a word, \\code{TxDb} parameter is a reference information. Users can\n##' pass \\code{txdb object} or self-made granges into it.\n##' \n##' Details see \\code{\\link{getPromoters}},\\code{\\link{getBioRegion}} and \\code{\\link{makeBioRegionFromGranges}}\n##' \n##' \\code{upstream} and \\code{downstream} parameter have different usages:\n##' \n##' (1) \\code{window} parameter is provided, \n##' \n##' if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \n##' the flank of body region.\n##' \n##' if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} do not\n##' play a role in \\code{getTagMatrix()} function.\n##' \n##' (2) \\code{window} parameter is missing,\n##' \n##' if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \n##' the flank of body region.\n##' \n##' if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\n##' the upstream and downstream of the start_site or the end_site.\n##' \n##' \\code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details\n##' see \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n##' \n##' \\code{nbin} refers to the number of bins. \\code{getTagMatrix()} provide a binning method\n##' to get the tag matrix.\n##' \n##' @title getTagMatrix\n##'\n##' @param peak peak peak file or GRanges object\n##' @param upstream the distance of upstream extension\n##' @param downstream the distance of downstream extension\n##' @param windows a collection of region\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param by one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users\n##' @param TxDb TxDb or self-made granges object, served as txdb\n##' @param weightCol column name of weight, default is NULL\n##' @param nbin the amount of nbines \n##' @param verbose print message or not\n##' @param ignore_strand ignore the strand information or not\n##' @return tagMatrix\n##' @importFrom ggplot2 rel\n##' @export\ngetTagMatrix <- function(peak, \n                         upstream,\n                         downstream, \n                         windows,\n                         type,\n                         by,\n                         TxDb=NULL,\n                         weightCol = NULL, \n                         nbin = NULL,\n                         verbose = TRUE,\n                         ignore_strand= FALSE){\n  \n  is_GRanges_of_TxDb <- FALSE\n  if (is(TxDb, \"GRanges\")) {\n    is_GRanges_of_TxDb <- TRUE\n    message(\"#\\n#.. 'TxDb' is a self-defined 'GRanges' object...\\n#\")\n  }\n  \n  if(missingArg(windows)){\n    \n    if(is_GRanges_of_TxDb){\n      \n      ## make windows from self-made granges object\n      windows <- makeBioRegionFromGranges(gr=TxDb,\n                                          by=by,\n                                          type=type,\n                                          upstream=upstream,\n                                          downstream=downstream)\n      \n    }else{\n      \n      ## make windows from txdb object\n      windows <- getBioRegion(TxDb=TxDb,\n                              upstream=upstream,\n                              downstream=downstream,\n                              by=by,\n                              type=type)\n      \n      \n    }\n    \n  }else{\n    \n    if (!is(windows, \"GRanges\")) {\n      stop(\"windows should be a GRanges object...\")\n    }\n    \n    if(is.null(attr(windows,'type'))){\n      stop(\"windows should be made from getPromoters()/getBioRegion()/makeBioRegionFromGranges()\")\n    }\n    \n    type <- attr(windows, 'type')\n    by <- attr(windows, 'by')\n    \n  }\n  \n  # check the upstream and downstream parameter\n  if(type == \"body\"){\n    if(missingArg(upstream)){\n      upstream <- NULL\n    }\n    \n    if(missingArg(downstream)){\n      downstream <- NULL\n    }\n    \n  }else{\n    upstream <- attr(windows, 'upstream')\n    downstream <- attr(windows, 'downstream')\n  }\n  \n  ## check upstream and downstream parameter\n  check_upstream_and_downstream(upstream = upstream, downstream = downstream)\n  \n  if(type != 'body'){\n    if(inherits(upstream, 'rel') || is.null(upstream)){\n      stop(\"upstream and downstream for site region should be actual number...\")\n    }\n  }\n  \n  ## check nbin parameters\n  if(!is.null(nbin) && !is.numeric(nbin)){\n    stop('nbin should be NULL or numeric...')\n  }\n  \n  if(type == 'body' && is.null(nbin)){\n    stop('plotting body region should set the nbin parameter...')\n  }\n  \n  ## check nbin parameter\n  if(!is.null(nbin)){\n    cat(\">> binning method is used...\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\",sep = \"\")\n    \n    is.binning <- TRUE\n  }else{\n    \n    is.binning <- FALSE\n  }\n  \n  if (verbose) {\n    cat(\">> preparing \",type,\" regions\",\" by \",by,\"... \",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\",sep = \"\")\n  }\n  \n  \n  if(is.binning){\n    \n    if (verbose) {\n      cat(\">> preparing tag matrix by binning... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    }\n    \n    tagMatrix <- getTagMatrix.binning.internal(peak = peak, \n                                               weightCol = weightCol, \n                                               windows = windows, \n                                               nbin = nbin,\n                                               upstream = upstream,\n                                               downstream = downstream,\n                                               ignore_strand = ignore_strand)\n  }else{\n    \n    if (verbose) {\n      cat(\">> preparing tag matrix... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    }\n    \n    tagMatrix <- getTagMatrix.internal(peak=peak, \n                                       weightCol=weightCol, \n                                       windows=windows, \n                                       ignore_strand=ignore_strand)\n  }\n  \n  ## assign attribute \n  attr(tagMatrix, 'upstream') = upstream\n  attr(tagMatrix, 'downstream') = downstream\n  attr(tagMatrix, 'type') = attr(windows, 'type')\n  attr(tagMatrix, 'label') = attr(windows, 'label')\n  attr(tagMatrix, \"is.binning\") <- is.binning\n  \n  return(tagMatrix)\n}\n\n\n##' calculate the tag matrix\n##'\n##'\n##' @title getTagMatrix.internal\n##' @param peak peak file or GRanges object\n##' @param weightCol column name of weight, default is NULL\n##' @param windows a collection of region with equal size, eg. promoter region.\n##' @param ignore_strand ignore the strand information or not\n##' @return tagMatrix\n##' @import BiocGenerics S4Vectors IRanges GenomeInfoDb GenomicRanges\n##' @author G Yu\ngetTagMatrix.internal <- function(peak, \n                                  weightCol=NULL, \n                                  windows, \n                                  ignore_strand= FALSE) {\n  peak.gr <- loadPeak(peak)\n  \n  if (! is(windows, \"GRanges\")) {\n    stop(\"windows should be a GRanges object...\")\n  }\n  if (length(unique(width(windows))) != 1) {\n    stop(\"width of windows should be equal...\")\n  }\n  \n  ## if (!exists(\"ChIPseekerEnv\", envir = .GlobalEnv)) {\n  ##     assign(\"ChIPseekerEnv\", new.env(), .GlobalEnv)\n  ## }\n  ## ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir = .GlobalEnv)\n  \n  ## if (exists(\"peak\", envir=ChIPseekerEnv, inherits=FALSE) &&\n  ##     exists(\"promoters\", envir=ChIPseekerEnv, inherits=FALSE) &&\n  ##     exists(\"weightCol\", envir=ChIPseekerEnv, inherits=FALSE) &&\n  ##     exists(\"tagMatrix\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n  \n  ##     pp <- get(\"peak\", envir=ChIPseekerEnv)\n  ##     promoters <- get(\"promoters\", envir=ChIPseekerEnv)\n  ##     w <- get(\"weightCol\", envir=ChIPseekerEnv)\n  \n  ##     if (all(pp == peak)) {\n  ##         if (all(windows == promoters)) {\n  ##             if ( (is.null(w) && is.null(weightCol)) ||\n  ##                 (!is.null(w) && !is.null(weightCol) && w == weightCol)) {\n  ##                 tagMatrix <- get(\"tagMatrix\", envir=ChIPseekerEnv)\n  ##                 return(tagMatrix)\n  ##             } else {\n  ##                 assign(\"weightCol\", weightCol, envir=ChIPseekerEnv)\n  ##             }\n  ##         } else {\n  ##             assign(\"promoters\", windows)\n  ##             ## make sure it is not conflict with getPromoters\n  ##             if ( exists(\"upstream\", envir=ChIPseekerEnv, inherits=FALSE))\n  ##                 rm(\"upstream\", envir=ChIPseekerEnv)\n  ##         }\n  ##     } else {\n  ##         assign(\"peak\", peak, envir=ChIPseekerEnv)\n  ##     }\n  \n  ## }\n  \n  ## if ( !exists(\"peak\", envir=ChIPseekerEnv, inherits=FALSE)) {\n  ##     assign(\"peak\", peak, envir=ChIPseekerEnv)\n  ## }\n  \n  ## if ( !exists(\"promoters\", envir=ChIPseekerEnv, inherits=FALSE)) {\n  ##     assign(\"promoters\", windows, envir=ChIPseekerEnv)\n  ## }\n  \n  ## if (!exists(\"weightCol\", envir=ChIPseekerEnv, inherits=FALSE)) {\n  ##     assign(\"weightCol\", weightCol, envir=ChIPseekerEnv)\n  ## }\n  if (is.null(weightCol)) {\n    peak.cov <- coverage(peak.gr)\n  } else {\n    weight <- mcols(peak.gr)[[weightCol]]\n    peak.cov <- coverage(peak.gr, weight=weight)\n  }\n  cov.len <- elementNROWS(peak.cov)\n  cov.width <- GRanges(seqnames=names(cov.len),\n                       IRanges(start=rep(1, length(cov.len)),\n                               end=cov.len))\n  windows <- subsetByOverlaps(windows, cov.width,\n                              type=\"within\", ignore.strand=FALSE)\n  \n  chr.idx <- intersect(names(peak.cov),\n                       unique(as.character(seqnames(windows))))\n  \n  peakView <- Views(peak.cov[chr.idx], as(windows, \"IntegerRangesList\")[chr.idx])\n  tagMatrixList <- lapply(peakView, function(x) t(viewApply(x, as.vector)))\n  tagMatrix <- do.call(\"rbind\", tagMatrixList)\n  \n  ## get the index of windows, that are reorganized by as(windows, \"IntegerRangesList\")\n  idx.list <- split(1:length(windows),  as.factor(seqnames(windows)))\n  idx <- do.call(\"c\", idx.list)\n  \n  rownames(tagMatrix) <- idx\n  tagMatrix <- tagMatrix[order(idx),]\n  \n  ## minus strand\n  if (!ignore_strand) {\n    minus.idx <- which(as.character(strand(windows)) == \"-\")\n    tagMatrix[minus.idx,] <- tagMatrix[minus.idx, ncol(tagMatrix):1]\n  }\n  \n  tagMatrix <- tagMatrix[rowSums(tagMatrix)!=0,]\n  ## assign(\"tagMatrix\", tagMatrix, envir=ChIPseekerEnv)\n  return(tagMatrix)\n}\n\n\n##' calculate the tagMatrix by binning\n##' the idea was derived from the function of deeptools\n##' https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html \n##' \n##' @title getTagMatrix.binning.internal\n##' @param peak peak peak file or GRanges object\n##' @param weightCol weightCol column name of weight, default is NULL\n##' @param windows windows a collection of region with equal or not equal size, eg. promoter region, gene region.\n##' @param nbin the amount of nbines needed to be splited and it should not be more than min_body_length\n##' @param upstream rel object, NULL or actual number\n##' @param downstream rel object, NULL or actual number\n##' @param ignore_strand ignore the strand information or not\n##' @import BiocGenerics S4Vectors IRanges GenomeInfoDb GenomicRanges \n##' @importFrom ggplot2 rel\n##' @return tagMatrix \ngetTagMatrix.binning.internal <- function(peak, \n                                          weightCol = NULL, \n                                          windows, \n                                          nbin = 800,\n                                          upstream = NULL,\n                                          downstream = NULL,\n                                          ignore_strand = FALSE){\n  \n  min_body_length <- filter_length <- nbin\n  peak.gr <- loadPeak(peak)\n  type <- attr(windows, 'type')\n  \n  \n  if (!is(windows, \"GRanges\")) {\n    stop(\"windows should be a GRanges object...\")\n  }\n  \n  if (is.null(weightCol)) {\n    peak.cov <- coverage(peak.gr)\n  } else {\n    weight <- mcols(peak.gr)[[weightCol]]\n    peak.cov <- coverage(peak.gr, weight=weight)\n  }\n  \n  \n  cov.len <- elementNROWS(peak.cov)\n  cov.width <- GRanges(seqnames=names(cov.len),\n                       IRanges(start=rep(1, length(cov.len)),\n                               end=cov.len))\n  \n  windows <- subsetByOverlaps(windows, \n                              cov.width,\n                              type=\"within\", \n                              ignore.strand=FALSE)\n  \n  ## extend the windows by rel object\n  if(inherits(upstream, 'rel')){\n    \n    windows1 <- windows\n    \n    if(!ignore_strand){\n      \n      positive_index <- which(as.character(strand(windows1)) == \"+\")\n      negative_index <- which(as.character(strand(windows1)) == \"-\")\n      start(windows1)[positive_index] <- suppressWarnings(start(windows1)[positive_index] - floor(width(windows)[positive_index]*as.numeric(upstream)))\n      end(windows1)[positive_index] <- suppressWarnings(end(windows1)[positive_index] + floor(width(windows)[positive_index]*as.numeric(downstream)))\n      \n      start(windows1)[negative_index] <- suppressWarnings(start(windows1)[negative_index] - floor(width(windows)[negative_index]*as.numeric(downstream)))\n      end(windows1)[negative_index] <- suppressWarnings(end(windows1)[negative_index] + floor(width(windows)[negative_index]*as.numeric(upstream)))\n      \n    }else{\n      \n      start(windows1) <- suppressWarnings(start(windows1) - floor(width(windows)*as.numeric(upstream)))\n      end(windows1) <- suppressWarnings(end(windows1) + floor(width(windows)*as.numeric(downstream)))\n      \n    }\n   \n    windows <- windows1\n    nbin <- floor(nbin*(1+as.numeric(downstream)+as.numeric(upstream)))\n    min_body_length <- min_body_length*(1+as.numeric(upstream)+as.numeric(downstream))\n    \n    cat(\">> preparing matrix with extension from (\",attr(windows,'label')[1],\"-\",\n        100*as.numeric(upstream),\"%)~(\",attr(windows,'label')[2],\"+\",\n        100*as.numeric(downstream),\"%)... \",\n        format(Sys.time(), \"%Y-%m-%d %X\"),\"\\n\",sep = \"\")\n  }\n  \n  ## do not extend\n  if(is.null(upstream)){\n    if(attr(windows, 'type') == 'body'){\n      cat(\">> preparing matrix for \",attr(windows, 'type'),\" region with no flank extension... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"),\"\\n\",sep = \"\")\n    }else{\n      cat(\">> preparing matrix for \",attr(windows,'type'),\" region... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"),\"\\n\",sep = \"\")\n    }\n  }\n  \n  ## extend the windows by actual number \n  if(!is.null(upstream) && !inherits(upstream, 'rel') && attr(windows, 'type')== 'body'){\n    \n    windows1 <- windows\n    \n    if(!ignore_strand){\n      \n      positive_index <- which(as.character(strand(windows1)) == \"+\")\n      negative_index <- which(as.character(strand(windows1)) == \"-\")\n      \n      start(windows1)[positive_index] <- suppressWarnings(start(windows1)[positive_index] - upstream)\n      end(windows1)[positive_index] <- suppressWarnings(end(windows1)[positive_index] + downstream)\n      \n      start(windows1)[negative_index] <- suppressWarnings(start(windows1)[negative_index] - downstream)\n      end(windows1)[negative_index] <- suppressWarnings(end(windows1)[negative_index] + upstream)\n      \n    }else{\n      \n      start(windows1) <- suppressWarnings(start(windows1) - upstream)\n      end(windows1) <- suppressWarnings(end(windows1) + downstream)\n      \n    }\n    \n    windows <- windows1\n    upstreamPer <- floor(upstream/1000)*0.1\n    downstreamPer <- floor(downstream/1000)*0.1\n    nbin <- floor(nbin*(1+upstreamPer+downstreamPer))\n    min_body_length <- min_body_length+upstream+downstream\n    \n    cat(\">> preparing matrix with flank extension from (\",attr(windows,'label')[1],\"-\",\n        upstream,\"bp)~(\",attr(windows,'label')[2],\"+\",downstream,\"bp)... \",\n        format(Sys.time(), \"%Y-%m-%d %X\"),\"\\n\",sep = \"\")\n  }\n  \n  chr.idx <- intersect(names(peak.cov),\n                       unique(as.character(seqnames(windows))))\n  \n  windows <- as(windows, \"IntegerRangesList\")[chr.idx]\n  attr(windows,'type') <- type\n  \n  peakView <- Views(peak.cov[chr.idx], \n                    windows)\n  \n  ## remove the gene that has no binding proteins\n  for (i in 1:length(peakView)) {\n    \n    index <- viewSums(peakView[[i]])!= 0\n    peakView[[i]] <- peakView[[i]][index]\n    windows[[i]] <- windows[[i]][index]\n  } \n  \n  tagMatrixList <- lapply(peakView, function(x) viewApply(x, as.vector))\n  \n  if(!attr(windows, 'type') == 'body'){\n    \n    tagMatrixList <- lapply(tagMatrixList, function(x) t(x))\n    \n    # to remove the chromosome that do not bind protein\n    index <- vapply(tagMatrixList, function(x) length(x)>0, FUN.VALUE = logical(1))\n    tagMatrixList <- tagMatrixList[index]\n    windows <- windows[index]\n    \n    ## create a matrix to receive binning results\n    tagMatrix <- list()\n    \n    ## this circulation is to deal with different chromosomes\n    for (i in 1:length(tagMatrixList)) {\n      \n      tagMatrix[[i]] <- matrix(nrow = nrow(tagMatrixList[[i]]),ncol = nbin)\n      \n      ## this circulation is to deal with different genes\n      for (j in 1:nrow(tagMatrixList[[i]])) {\n        \n        ## seq is the distance between different bins\n        seq <- floor(length(tagMatrixList[[i]][j,])/nbin)\n        \n        ## cursor record the position of calculation\n        cursor <- 1\n        \n        ## the third circulation is to calculate the binding strength\n        ## it has two parts\n        ## the first part is to for the nbin(1:nbin-1)\n        ## because the seq is not derived from exact division\n        ## the second part is to compensate the loss of non-exact-division\n        \n        ## this the first part for 1:(nbin-1)\n        for (k in 1:(nbin-1)) {\n          \n          read <- 0\n          \n          for (z in cursor:(cursor+seq-1)) {\n            read <- read + tagMatrixList[[i]][j,z]\n          }\n          \n          tagMatrix[[i]][j,k] <- read/seq\n          \n          cursor <- cursor+seq\n        }\n        \n        ## this the second part to to compensate the loss of non-exact-division\n        read <- 0\n        for (z in cursor:length(tagMatrixList[[i]][j,])) {\n          read <- read+tagMatrixList[[i]][j,z]\n        }\n        \n        tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][j,])-cursor+1)\n      }\n      \n      if(!ignore_strand){\n        minus.idx <- which(as.character(mcols(windows[[i]])[[\"strand\"]]) == \"-\")\n        tagMatrix[[i]][minus.idx,] <- tagMatrix[[i]][minus.idx, ncol(tagMatrix[[i]]):1]\n      }\n    }\n    \n  }else{\n    \n    ## extend genebody by atual number\n    if(!is.null(upstream) & !inherits(upstream, 'rel')){\n      \n      for (i in 1:length(tagMatrixList)) {\n        if (length(class(tagMatrixList[[i]])) != 1) {\n          sample <- tagMatrixList[[i]]\n          tagMatrixList[[i]] <- lapply(seq_len(ncol(sample)), function(i) sample[,i])  \n        }\n      }\n      \n      index <- vapply(tagMatrixList, function(x) length(x)>0, FUN.VALUE = logical(1))\n      tagMatrixList <- tagMatrixList[index]\n      windows <- windows[index]\n      \n      ## count the amount before filtering\n      pre_amount <- 0\n      for(i in 1:length(tagMatrixList)){\n        pre_amount <- pre_amount+length(tagMatrixList[[i]])\n      }\n      \n      for (i in 1:length(tagMatrixList)) {\n        \n        index <- vapply(tagMatrixList[[i]], function(y) length(y)>min_body_length,FUN.VALUE = logical(1))\n        tagMatrixList[[i]] <- tagMatrixList[[i]][index]\n        windows[[i]] <- windows[[i]][index]\n      }\n      \n      ## count the amount after filtering\n      amount <- 0\n      for(i in 1:length(tagMatrixList)){\n        amount <- amount+length(tagMatrixList[[i]])\n      }\n      \n      cat(\">> \",pre_amount-amount,\" peaks(\",100*((pre_amount-amount)/pre_amount),\n          \"%), having lengths smaller than \",filter_length,\"bp, are filtered... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"),\"\\n\",sep = \"\")\n      \n      upstreamnbin <- floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer)))\n      bodynbin <- floor(nbin*(1/(1+upstreamPer+downstreamPer)))\n      downstreamnbin <- floor(nbin*(downstreamPer/(1+upstreamPer+downstreamPer)))\n      \n      tagMatrix <- list()\n      \n      for (i in 1:length(tagMatrixList)) {\n        \n        tagMatrix[[i]] <- matrix(nrow = length(tagMatrixList[[i]]),ncol = nbin)\n        \n        ## count the upstream \n        for (j in 1:length(tagMatrixList[[i]])) {\n          \n          seq <- floor(upstream/upstreamnbin)\n          cursor <- 1\n          \n          for (k in 1:(upstreamnbin-1)) {\n            \n            read <- 0\n            \n            for (z in cursor:(cursor+seq-1)) {\n              read <- read + tagMatrixList[[i]][[j]][z]\n            }\n            \n            tagMatrix[[i]][j,k] <- read/seq\n            \n            cursor <- cursor+seq\n          }\n          \n          \n          read <- 0\n          for (z in cursor:upstream) {\n            read <- read+tagMatrixList[[i]][[j]][z]\n          }\n          \n          tagMatrix[[i]][j,upstreamnbin] <- read/(upstream-cursor)\n          \n        }\n        \n        ## count genebody\n        for (j in 1:length(tagMatrixList[[i]])) {\n          \n          seq <- floor((length(tagMatrixList[[i]][[j]])-upstream-downstream)/bodynbin)\n          cursor <- upstream+1\n          \n          for (k in (upstreamnbin+1):(upstreamnbin+bodynbin-1)) {\n            \n            read <- 0\n            \n            for (z in cursor:(cursor+seq-1)) {\n              read <- read + tagMatrixList[[i]][[j]][z]\n            }\n            \n            tagMatrix[[i]][j,k] <- read/seq\n            \n            cursor <- cursor+seq\n          }\n          \n          read <- 0\n          for (z in cursor:(length(tagMatrixList[[i]][[j]])-downstream)) {\n            read <- read+tagMatrixList[[i]][[j]][z]\n          }\n          \n          tagMatrix[[i]][j,bodynbin+upstreamnbin] <- read/(length(tagMatrixList[[i]][[j]])-downstream-cursor)\n        }\n        \n        ## count downstream\n        for (j in 1:length(tagMatrixList[[i]])) {\n          \n          seq <- floor(downstream/downstreamnbin)\n          cursor <- length(tagMatrixList[[i]][[j]])-downstream+1\n          \n          for (k in (upstreamnbin+bodynbin+1):(nbin-1)) {\n            \n            read <- 0\n            \n            for (z in cursor:(cursor+seq-1)) {\n              read <- read + tagMatrixList[[i]][[j]][z]\n            }\n            \n            tagMatrix[[i]][j,k] <- read/seq\n            \n            cursor <- cursor+seq\n          }\n          \n          read <- 0\n          for (z in cursor:length(tagMatrixList[[i]][[j]])) {\n            read <- read+tagMatrixList[[i]][[j]][z]\n          }\n          \n          tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][[j]])-cursor+1)\n        }\n        \n        if(!ignore_strand){\n          minus.idx <- which(as.character(mcols(windows[[i]])[[\"strand\"]]) == \"-\")\n          tagMatrix[[i]][minus.idx,] <- tagMatrix[[i]][minus.idx, ncol(tagMatrix[[i]]):1]\n        }\n        \n      }\n      \n    }else{\n      \n      for (i in 1:length(tagMatrixList)) {\n        if (length(class(tagMatrixList[[i]])) != 1) {\n          sample <- tagMatrixList[[i]]\n          tagMatrixList[[i]] <- lapply(seq_len(ncol(sample)), function(i) sample[,i])  \n        }\n      }\n      \n      index <- vapply(tagMatrixList, function(x) length(x)>0, FUN.VALUE = logical(1))\n      tagMatrixList <- tagMatrixList[index]\n      windows <- windows[index]\n      \n      ## count the amount before filtering\n      pre_amount <- 0\n      for(i in 1:length(tagMatrixList)){\n        pre_amount <- pre_amount+length(tagMatrixList[[i]])\n      }\n      \n      for (i in 1:length(tagMatrixList)) {\n        \n        index <- vapply(tagMatrixList[[i]], function(y) length(y)>min_body_length,FUN.VALUE = logical(1))\n        tagMatrixList[[i]] <- tagMatrixList[[i]][index]\n        windows[[i]] <- windows[[i]][index]\n      }\n      \n      ## count the amount after filtering\n      amount <- 0\n      for(i in 1:length(tagMatrixList)){\n        amount <- amount+length(tagMatrixList[[i]])\n      }\n      \n      cat(\">> \",pre_amount-amount,\" peaks(\",100*((pre_amount-amount)/pre_amount),\n          \"%), having lengths smaller than \",filter_length,\"bp, are filtered... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"),\"\\n\",sep = \"\")\n  \n      tagMatrix <- list()\n      \n      for (i in 1:length(tagMatrixList)) {\n        \n        tagMatrix[[i]] <- matrix(nrow = length(tagMatrixList[[i]]),ncol = nbin)\n        \n        for (j in 1:length(tagMatrixList[[i]])) {\n          \n          seq <- floor(length(tagMatrixList[[i]][[j]])/nbin)\n          cursor <- 1\n          \n          for (k in 1:(nbin-1)) {\n            \n            read <- 0\n            \n            for (z in cursor:(cursor+seq-1)) {\n              read <- read + tagMatrixList[[i]][[j]][z]\n            }\n            \n            tagMatrix[[i]][j,k] <- read/seq\n            \n            cursor <- cursor+seq\n          }\n          \n          read <- 0\n          for (z in cursor:length(tagMatrixList[[i]][[j]])) {\n            read <- read+tagMatrixList[[i]][[j]][z]\n          }\n          \n          tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][[j]])-cursor+1)\n          \n        }\n        \n        if(!ignore_strand){\n          minus.idx <- which(as.character(mcols(windows[[i]])[[\"strand\"]]) == \"-\")\n          tagMatrix[[i]][minus.idx,] <- tagMatrix[[i]][minus.idx, ncol(tagMatrix[[i]]):1]\n        }\n      }\n      \n    }\n    \n  }\n  \n  ## combine the results\n  tagMatrix <- do.call(\"rbind\",tagMatrix)\n  \n  return(tagMatrix)\n}\n\n\n##' Nested function for getTagMatrix() to deal with multiple windows\n##' \n##' This is an internal function.\n##' @title getTagMatrix2\n##'\n##' @param peak peak peak file or GRanges object\n##' @param upstream the distance of upstream extension\n##' @param downstream the distance of downstream extension\n##' @param windows_name the names of windows\n##' @param type one of \"start_site\", \"end_site\", \"body\"\n##' @param by one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users\n##' @param TxDb TxDb or self-made granges object, served as txdb\n##' @param weightCol column name of weight, default is NULL\n##' @param nbin the amount of nbines \n##' @param verbose print message or not\n##' @param ignore_strand ignore the strand information or not\n##' @return tagMatrix\n##' @importFrom ggplot2 rel\ngetTagMatrix2 <- function(peak, \n                          upstream,\n                          downstream,\n                          windows_name,\n                          type,\n                          by,\n                          TxDb=NULL,\n                          weightCol = NULL, \n                          nbin = NULL,\n                          verbose = TRUE,\n                          ignore_strand= FALSE){\n  \n  names(TxDb) <- by\n  \n  windows <- lapply(as.list(by), function(x){\n    \n    if(x %in% c('gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR')){\n      \n      result <- getBioRegion(TxDb=TxDb[[x]],\n                             upstream=upstream,\n                             downstream=downstream,\n                             by=x,\n                             type=type)\n    }else{\n      \n      result <- makeBioRegionFromGranges(gr=TxDb[[x]],\n                                         by=x,\n                                         type=type,\n                                         upstream=upstream,\n                                         downstream=downstream)\n      \n    }\n    \n    return(result)\n    \n  })\n  \n  names(windows) <- windows_name\n  \n  # check the upstream and downstream parameter for body\n  if(type == \"body\"){\n    if(missingArg(upstream)){\n      upstream <- NULL\n    }\n    \n    if(missingArg(downstream)){\n      downstream <- NULL\n    }\n    \n  }else{\n    upstream <- attr(windows[[1]], 'upstream')\n    downstream <- attr(windows[[1]], 'downstream')\n  }\n  \n  ## check upstream and downstream parameter\n  check_upstream_and_downstream(upstream = upstream, downstream = downstream)\n  \n  if(type != 'body'){\n    if(inherits(upstream, 'rel') || is.null(upstream)){\n      stop(\"upstream and downstream for site region should be actual number...\")\n    }\n  }\n  \n  ## check nbin parameters\n  if(!is.null(nbin) && !is.numeric(nbin)){\n    stop('nbin should be NULL or numeric...')\n  }\n  \n  if(type == 'body' && is.null(nbin)){\n    stop('plotting body region should set the nbin parameter...')\n  }\n  \n  ## check nbin parameter\n  if(!is.null(nbin)){\n    cat(\">> binning method is used...\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\",sep = \"\")\n    \n    is.binning <- TRUE\n  }else{\n    \n    is.binning <- FALSE\n  }\n  \n  if (verbose) {\n    cat(\">> preparing \",type,\" regions\",\" by \",paste(by,collapse = \" \"),\"... \",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\",sep = \"\")\n  }\n  \n  \n  if(is.binning){\n    \n    if (verbose) {\n      cat(\">> preparing tag matrix by binning... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    }\n    \n    tagMatrix <- getTagMatrix2.binning.internal(peak = peak, \n                                                weightCol = weightCol, \n                                                windows = windows, \n                                                windows_name=windows_name,\n                                                nbin = nbin,\n                                                upstream = upstream,\n                                                downstream = downstream,\n                                                ignore_strand = ignore_strand)\n  }else{\n    \n    if (verbose) {\n      cat(\">> preparing tag matrix... \",\n          format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    }\n    \n    tagMatrix <- getTagMatrix2.internal(peak=peak, \n                                        weightCol=weightCol,\n                                        windows=windows,\n                                        windows_name=windows_name,\n                                        ignore_strand=ignore_strand)\n  }\n  \n  names(tagMatrix) <- windows_name\n  \n  ## assign attribute \n  tagMatrix <- lapply(tagMatrix, function(x){\n    attr(x, 'upstream') = upstream\n    attr(x, 'downstream') = downstream\n    attr(x, 'type') = attr(windows[[1]], 'type')\n    attr(x, 'label') = attr(windows[[1]], 'label')\n    attr(x, \"is.binning\") <- is.binning\n    return(x)\n  })\n  \n  return(tagMatrix)\n  \n}\n\n##' @title getTagMatrix2.internal\n##'\n##' @param peak peak peak file or GRanges object\n##' @param windows a collection of region\n##' @param windows_name the name of windows\n##' @param weightCol column name of weight, default is NULL\n##' @param ignore_strand ignore the strand information or not\ngetTagMatrix2.internal <- function(peak, \n                                   weightCol=NULL,\n                                   windows,\n                                   windows_name,\n                                   ignore_strand= FALSE) {\n  \n  mt_list <- lapply(windows_name, function(x){\n    \n    windows_tmp <- windows[[x]]\n    \n    mt <- getTagMatrix.internal(peak=peak, \n                                weightCol=weightCol, \n                                windows=windows_tmp, \n                                ignore_strand=ignore_strand)\n    \n    return(mt)\n  })\n  \n  return(mt_list)\n}\n\n##' internal function\n##' \n##' @param peak peak peak file or GRanges object\n##' @param upstream the distance of upstream extension\n##' @param downstream the distance of downstream extension\n##' @param windows a collection of region\n##' @param windows_name the name of windows\n##' @param weightCol column name of weight, default is NULL\n##' @param nbin the amount of nbines \n##' @param ignore_strand ignore the strand information or not\ngetTagMatrix2.binning.internal <- function(peak, \n                                           weightCol = NULL, \n                                           windows, \n                                           windows_name,\n                                           nbin = 800,\n                                           upstream = NULL,\n                                           downstream = NULL,\n                                           ignore_strand = FALSE){\n  \n  mt_list <- lapply(windows_name, function(x){\n    \n    windows_tmp <- windows[[x]]\n    \n    mt <- getTagMatrix.binning.internal(peak = peak, \n                                        weightCol = weightCol, \n                                        windows = windows_tmp, \n                                        nbin = nbin,\n                                        upstream = upstream,\n                                        downstream = downstream,\n                                        ignore_strand = ignore_strand)\n    \n    return(mt)\n  })\n  \n  return(mt_list)\n  \n}"
  },
  {
    "path": "R/upsetplot.R",
    "content": "## @importFrom UpSetR upset\n## @importFrom grid viewport\n## @importFrom grid pushViewport\n## @importFrom grid popViewport\n## @importFrom gridBase gridPLT\n## @importFrom graphics plot.new\n##' @importFrom ggplot2 coord_fixed\n##' @importFrom ggplot2 ggplot\n##' @importFrom ggplot2 aes_\n##' @importFrom ggplot2 geom_bar\n##' @importFrom ggplot2 xlab\n##' @importFrom ggplot2 ylab\n##' @importFrom ggplot2 theme_minimal\n##' @author Guangchuang Yu\nupsetplot.csAnno <- function(x, order_by = \"freq\", vennpie=FALSE, vp = list(x=.6, y=.7, width=.8, height=.8)) {\n    y <- x@detailGenomicAnnotation\n    nn <- names(y)\n    y <- as.matrix(y)\n\n    res <- tibble::tibble(anno = lapply(1:nrow(y), function(i) nn[y[i,]]))\n    g <- ggplot(res, aes_(x = ~anno)) + geom_bar() +\n        xlab(NULL) + ylab(NULL) + theme_minimal() +\n        ggupset::scale_x_upset(n_intersections = 20, order_by = order_by) \n\n    if (!vennpie) return(g)\n\n    f <- function() vennpie(x, cex = .9)\n\n    p <- ggplotify::as.ggplot(f) + coord_fixed() \n\n    ggplotify::as.ggplot(g) +\n        ggimage::geom_subview(subview = p, x = vp$x, y = vp$y, width = vp$width, height = vp$height)\n\n\n    ## y[y] <- 1\n    ## y <- as.data.frame(y)\n    ## ## cn <- colnames(y)\n    ## ## cn[cn == \"fiveUTR\"] <- \"5 UTR\"\n    ## ## cn[cn == \"threeUTR\"] <- \"3 UTR\"\n    ## ## colnames(y) <- cn\n\n    ## if (is.null(sets)) {\n    ##     sets <- c(\"distal_intergenic\", \"downstream\",\n    ##               \"threeUTR\", \"fiveUTR\", \"Intron\",\n    ##               \"Exon\", \"Promoter\")\n    ##     if (vennpie && is.null(sets.bar.color)) {\n    ##         sets.bar.color <- c(\"#d95f0e\", \"#fee0d2\", \"#98D277\",\n    ##                             \"#6F9E4C\", \"#fc9272\", \"#9ecae1\", \"#ffeda0\")\n    ##     }\n    ## }\n\n    ## if (is.null(sets.bar.color)) {\n    ##     sets.bar.color <- \"black\"\n    ## }\n\n    ## if (vennpie) {\n    ##     plot.new()\n    ##     # grid.rect(gp = gpar(fill=\"white\"))\n    ##     upset(y, sets=sets, sets.bar.color=sets.bar.color,\n    ##           order.by = order.by, ...)\n    ##     pushViewport(vp)\n    ##     ##par(plt=gridPLT(), new=TRUE)\n    ##     vennpie(x)\n    ##     popViewport()\n    ## } else {\n    ##     upset(y, sets=sets,sets.bar.color=sets.bar.color,\n    ##           order.by = order.by, ...)\n    ## }\n}\n"
  },
  {
    "path": "R/utilities.R",
    "content": "#' @title env function for ChIPseeker\n#' @param TxDb txdb object\n#' @param item item name\n#' @param force force to update txdb item in cache or not.\n#' @importFrom yulab.utils get_cache_item\n#' @importFrom yulab.utils update_cache_item\n#' @importFrom yulab.utils rm_cache_item\n#' @importFrom yulab.utils initial_cache_item\n#' @importFrom S4Vectors metadata\n.ChIPseekerEnv <- function(TxDb, item = \"ChIPseekerEnv\", force = FALSE) {\n    \n    # get cache item\n    # it will create a list if there is no a cache item\n    cache_item <- get_cache_item(item)\n\n    # if there is no TXDB cached, write in cache\n    if (is.null(cache_item$TXDB)) {\n        update_cache_item(item = item, list(TXDB = TxDb))\n        cat(\">> Using Genome:\", get_env_genome(),\"...\\n\")\n        return(invisible(NULL))\n    }\n\n    # force to update item\n    if(force){\n        cat(\">> Force to update txdb in cache...\\n\")\n        rm_cache_item(item)           \n        initial_cache_item(item)      \n        update_cache_item(item, list(TXDB = TxDb))  \n        cat(\">> Using Genome:\", get_env_genome(),\"...\\n\")\n    }\n\n    # if exist TXDB\n    TXDB <- cache_item$TXDB\n    m1 <- tryCatch(unlist(metadata(TXDB)), error = function(e) NULL)\n    m2 <- tryCatch(unlist(metadata(TxDb)),  error = function(e) NULL)\n    if (!is.null(m1)) m1 <- m1[!is.na(m1)]\n    if (!is.null(m2)) m2 <- m2[!is.na(m2)]\n\n    txdb_flag <- is.character(all.equal(TXDB, TxDb))\n\n    if (is.null(m1) || is.null(m2) || length(m1) != length(m2) || any(m1 != m2) || txdb_flag) {\n        cat(\">> Update txdb in cache...\\n\")\n        rm_cache_item(item)           \n        initial_cache_item(item)      \n        update_cache_item(item, list(TXDB = TxDb))  \n    }\n\n    cat(\">> Using Genome:\", get_env_genome(),\"...\\n\")\n\n    invisible(NULL)\n\n    # pos <- 1\n    # envir <- as.environment(pos)\n    # if (!exists(\"ChIPseekerEnv\", envir=.GlobalEnv)) {\n    #     assign(\"ChIPseekerEnv\", new.env(), envir = envir)\n    # }\n\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n    # if (!exists(\"TXDB\", envir=ChIPseekerEnv, inherits=FALSE)) {\n    #     ## first run\n    #     assign(\"TXDB\", TxDb, envir=ChIPseekerEnv)\n    # } else {\n    #     TXDB <- get(\"TXDB\", envir=ChIPseekerEnv)\n    #     m1 <- tryCatch(unlist(metadata(TXDB)), error=function(e) NULL)\n\n    #     m2 <- unlist(metadata(TxDb))\n\n    #     if (!is.null(m1)) {\n    #         m1 <- m1[!is.na(m1)]\n    #     }\n    #     m2 <- m2[!is.na(m2)]\n\n    #     if ( is.null(m1) || length(m1) != length(m2) || any(m1 != m2) ) {\n    #         rm(ChIPseekerEnv)\n    #         assign(\"ChIPseekerEnv\", new.env(), envir = envir)\n    #         ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n    #         assign(\"TXDB\", TxDb, envir=ChIPseekerEnv)\n    #     }\n    # }\n}\n\n\n##' @importFrom GenomicFeatures exonsBy\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\nget_exonList <- function(item = \"ChIPseekerEnv\") {\n    # TxDb <- get(\"TXDB\", envir=ChIPseekerEnv)\n    TxDb <- get_cache_element(item = item, elements = \"TXDB\")\n\n    exonList <- get_cache_element(item = item, elements = \"exonList\")\n\n    if(is.null(exonList)){\n        exonList <- exonsBy(TxDb)\n        update_cache_item(item = item, list(\"exonList\" = exonList))\n    }\n\n    # if ( exists(\"exonList\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n    #     exonList <- get(\"exonList\", envir=ChIPseekerEnv)\n    # } else {\n    #     exonList <- exonsBy(TxDb)\n    #     assign(\"exonList\", exonList, envir=ChIPseekerEnv)\n    # }\n    return(exonList)\n}\n\n##' @importFrom GenomicFeatures intronsByTranscript\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\nget_intronList <- function(item = \"ChIPseekerEnv\") {\n\n    # TxDb <- get(\"TXDB\", envir=ChIPseekerEnv)\n    TxDb <- get_cache_element(item = item, elements = \"TXDB\")\n\n    intronList <- get_cache_element(item = item, elements = \"intronList\")\n\n    if(is.null(intronList)){\n        intronList <- intronsByTranscript(TxDb)\n        update_cache_item(item = item, list(\"intronList\" = intronList))\n    }\n\n    # if ( exists(\"intronList\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n    #     intronList <- get(\"intronList\", envir=ChIPseekerEnv)\n    # } else {\n    #     intronList <- intronsByTranscript(TxDb)\n    #     assign(\"intronList\", intronList, envir=ChIPseekerEnv)\n    # }\n    return(intronList)\n}\n\n\ngetCols <- function(n) {\n    col <- c(\"#8dd3c7\", \"#ffffb3\", \"#bebada\",\n             \"#fb8072\", \"#80b1d3\", \"#fdb462\",\n             \"#b3de69\", \"#fccde5\", \"#d9d9d9\",\n             \"#bc80bd\", \"#ccebc5\", \"#ffed6f\")\n\n    col2 <- c(\"#1f78b4\", \"#ffff33\", \"#c2a5cf\",\n             \"#ff7f00\", \"#810f7c\", \"#a6cee3\",\n             \"#006d2c\", \"#4d4d4d\", \"#8c510a\",\n             \"#d73027\", \"#78c679\", \"#7f0000\",\n             \"#41b6c4\", \"#e7298a\", \"#54278f\")\n\n    col3 <- c(\"#a6cee3\", \"#1f78b4\", \"#b2df8a\",\n              \"#33a02c\", \"#fb9a99\", \"#e31a1c\",\n              \"#fdbf6f\", \"#ff7f00\", \"#cab2d6\",\n              \"#6a3d9a\", \"#ffff99\", \"#b15928\")\n\n    ## colorRampPalette(brewer.pal(12, \"Set3\"))(n)\n    col3[1:n]\n}\n\ngetPalette <- function(n){\n  \n  palette <- c(\"RdBu\", \"RdYlGn\", \"Spectral\",\n               \"RdYlBu\", \"PiYG\", \"PRGn\",\n               \"PuOr\", \"BrBG\", \"RdGy\")\n  \n  palette[1:n]\n  \n}\n\ngetSgn <- function(data, idx){\n    d <- data[idx, ]\n    ss <- colSums(d)\n    ss <- ss / sum(ss)\n    return(ss)\n}\nparseBootCiPerc <- function(bootCiPerc){\n    bootCiPerc <- bootCiPerc$percent\n    tmp <- length(bootCiPerc)\n    ciLo <- bootCiPerc[tmp - 1]\n    ciUp <- bootCiPerc[tmp]\n    return(c(ciLo, ciUp))\n}\n\n## estimate CI using bootstraping\n##' @importFrom boot boot\n##' @importFrom boot boot.ci\n##' @importFrom parallel detectCores\ngetTagCiMatrix <- function(tagMatrix, conf = 0.95, resample=500, ncpus=detectCores()-1){\n    RESAMPLE_TIME <- resample\n    trackLen <- ncol(tagMatrix)\n    if (Sys.info()[1] == \"Windows\") {\n        tagMxBoot <- boot(data = tagMatrix, statistic = getSgn, R = RESAMPLE_TIME)\n    } else {\n        tagMxBoot <- boot(data = tagMatrix, statistic = getSgn, R = RESAMPLE_TIME,\n                          parallel = \"multicore\", ncpus = ncpus)\n    }\n    cat(\">> Running bootstrapping for tag matrix...\\t\\t\",\n        format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n    tagMxBootCi <- sapply(seq_len(trackLen), function(i) {\n                        bootCiToken <- boot.ci(tagMxBoot, type = \"perc\", index = i)\n                        ## parse boot.ci results\n                        return(parseBootCiPerc(bootCiToken))\n                        }\n                    )\n    row.names(tagMxBootCi) <- c(\"Lower\", \"Upper\")\n    return(tagMxBootCi)\n}\n\ngetTagCount <- function(tagMatrix, xlim, conf, ...) {\n    ss <- colSums(tagMatrix)\n    ss <- ss/sum(ss)\n    ## plot(1:length(ss), ss, type=\"l\", xlab=xlab, ylab=ylab)\n    pos <- value <- NULL\n    dd <- data.frame(pos=c(xlim[1]:xlim[2]), value=ss)\n    if (!(missingArg(conf) || is.na(conf))){\n        tagCiMx <- getTagCiMatrix(tagMatrix, conf = conf, ...)\n        dd$Lower <- tagCiMx[\"Lower\", ]\n        dd$Upper <- tagCiMx[\"Upper\", ]\n    }\n    return(dd)\n}\n\n\nTXID2EG <- function(txid, geneIdOnly=FALSE) {\n    txid <- as.character(txid)\n    if (geneIdOnly == TRUE) {\n        res <- TXID2EGID(txid)\n    } else {\n        res <- TXID2TXEG(txid)\n    }\n    return(res)\n}\n\n##' @importFrom GenomicFeatures transcripts\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\nTXID2TXEG <- function(txid) {\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n\n    txid2geneid <- get_cache_element(item = ChIPseekerCache, elements = \"txid2geneid\")\n\n    if(is.null(txid2geneid)){\n        txdb <- get_cache_element(item = ChIPseekerCache, elements = \"TXDB\")\n        txidinfo <- transcripts(txdb, columns=c(\"tx_id\", \"tx_name\", \"gene_id\"))\n        idx <- which(sapply(txidinfo$gene_id, length) == 0)\n        txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name\n        txid2geneid <- paste(mcols(txidinfo)[[\"tx_name\"]],\n                             mcols(txidinfo)[[\"gene_id\"]],\n                             sep=\"/\")\n        txid2geneid <- sub(\"/NA\", \"\", txid2geneid)\n\n        names(txid2geneid) <- mcols(txidinfo)[[\"tx_id\"]]\n        update_cache_item(item = ChIPseekerCache, list(\"txid2geneid\" = txid2geneid))\n    }\n\n    # if (exists(\"txid2geneid\", envir=ChIPseekerEnv, inherits=FALSE)) {\n    #     txid2geneid <- get(\"txid2geneid\", envir=ChIPseekerEnv)\n    # } else {\n    #     txdb <- get(\"TXDB\", envir=ChIPseekerEnv)\n    #     txidinfo <- transcripts(txdb, columns=c(\"tx_id\", \"tx_name\", \"gene_id\"))\n    #     idx <- which(sapply(txidinfo$gene_id, length) == 0)\n    #     txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name\n    #     txid2geneid <- paste(mcols(txidinfo)[[\"tx_name\"]],\n    #                          mcols(txidinfo)[[\"gene_id\"]],\n    #                          sep=\"/\")\n    #     txid2geneid <- sub(\"/NA\", \"\", txid2geneid)\n\n    #     names(txid2geneid) <- mcols(txidinfo)[[\"tx_id\"]]\n    #     assign(\"txid2geneid\", txid2geneid, envir=ChIPseekerEnv)\n    # }\n    return(as.character(txid2geneid[txid]))\n}\n\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\nTXID2EGID <- function(txid) {\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n\n    txid2geneid <- get_cache_element(item = ChIPseekerCache, elements = \"txid2eg\")\n\n    if(is.null(txid2geneid)){\n        txdb <- get_cache_element(item = ChIPseekerCache, elements = \"TXDB\")\n        txidinfo <- transcripts(txdb, columns=c(\"tx_id\", \"tx_name\", \"gene_id\"))\n        idx <- which(sapply(txidinfo$gene_id, length) == 0)\n        txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name\n        txid2geneid <- as.character(mcols(txidinfo)[[\"gene_id\"]])\n\n        names(txid2geneid) <- mcols(txidinfo)[[\"tx_id\"]]\n        update_cache_item(item = ChIPseekerCache, list(\"txid2eg\" = txid2geneid))\n    }\n\n    # if (exists(\"txid2eg\", envir=ChIPseekerEnv, inherits=FALSE)) {\n    #     txid2geneid <- get(\"txid2eg\", envir=ChIPseekerEnv)\n    # } else {\n    #     txdb <- get(\"TXDB\", envir=ChIPseekerEnv)\n    #     txidinfo <- transcripts(txdb, columns=c(\"tx_id\", \"tx_name\", \"gene_id\"))\n    #     idx <- which(sapply(txidinfo$gene_id, length) == 0)\n    #     txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name\n    #     txid2geneid <- as.character(mcols(txidinfo)[[\"gene_id\"]])\n\n    #     names(txid2geneid) <- mcols(txidinfo)[[\"tx_id\"]]\n    #     assign(\"txid2eg\", txid2geneid, envir=ChIPseekerEnv)\n    # }\n    return(as.character(txid2geneid[txid]))\n}\n\n## according to: https://support.bioconductor.org/p/70432/#70545\n## contributed by Hervé Pagès\ngetFirstHitIndex <- function(x) {\n    ## sapply(unique(x), function(i) which(x == i)[1])\n    which(!duplicated(x))\n}\n\n##' calculate the overlap matrix, which is useful for vennplot\n##'\n##'\n##' @title overlap\n##' @param Sets a list of objects\n##' @return data.frame\n##' @importFrom gtools permutations\n##' @export\n##' @author G Yu\noverlap <- function(Sets) {\n    ## this function is very generic.\n    ## it call the getIntersectLength function to calculate\n    ## the number of the intersection.\n    ## if it fail, take a look at the object type were supported by getIntersectLength function.\n\n    nn <- names(Sets)\n    w <- t(apply(permutations(2,length(Sets),0:1, repeats.allowed=TRUE), 1 , rev))\n    rs <- rowSums(w)\n    wd <- as.data.frame(w)\n    wd$n <- NA\n    for (i in length(nn):0) {\n        idx <- which(rs == i)\n        if (i == length(nn)) {\n            len <- getIntersectLength(Sets, as.logical(w[idx,]))\n            wd$n[idx] <- len\n        } else if (i == 0) {\n            wd$n[idx] <- 0\n        } else {\n            for (ii in idx) {\n                ##print(ii)\n                len <- getIntersectLength(Sets, as.logical(w[ii,]))\n                ww = w[ii,]\n                jj <- which(ww == 0)\n                pp <- permutations(2, length(jj), 0:1, repeats.allowed=TRUE)\n\n                for (aa in 2:nrow(pp)) {\n                    ## 1st row is all 0, abondoned\n                    xx <- jj[as.logical(pp[aa,])]\n                    ww[xx] =ww[xx] +1\n                    bb <-  t(apply(w, 1, function(i) i == ww))\n                    wd$n[rowSums(bb) == length(ww) ]\n                         ww <- w[ii,]\n                    len <- len - wd$n[rowSums(bb) == length(ww) ]\n                    ww <- w[ii,]\n                }\n                wd$n[ii] <- len\n            }\n        }\n    }\n    colnames(wd) = c(names(Sets), \"Weight\")\n    return(wd)\n}\n\n\ngetIntersectLength <- function(Sets, idx) {\n    ## only use intersect and length methods in this function\n    ## works fine with GRanges object\n    ## and easy to extend to other objects.\n    ss= Sets[idx]\n    ol <- ss[[1]]\n\n    if (sum(idx) == 1) {\n        return(length(ol))\n    }\n\n    for (j in 2:length(ss)) {\n        ol <-  intersect(ol, ss[[j]])\n    }\n    return(length(ol))\n}\n\nloadPeak <- function(peak, verbose=FALSE) {\n    if (is(peak, \"GRanges\")) {\n        peak.gr <- peak\n    } else if (file.exists(peak)) {\n        if (verbose)\n            cat(\">> loading peak file...\\t\\t\\t\\t\",\n                format(Sys.time(), \"%Y-%m-%d %X\"), \"\\n\")\n        peak.gr <- readPeakFile(peak, as=\"GRanges\")\n    } else {\n        stop(\"peak should be GRanges object or a peak file...\")\n    }\n    return(peak.gr)\n}\n\n##' @importFrom TxDb.Hsapiens.UCSC.hg19.knownGene TxDb.Hsapiens.UCSC.hg19.knownGene\nloadTxDb <- function(TxDb) {\n    if ( is.null(TxDb) ) {\n        warning(\">> TxDb is not specified, use 'TxDb.Hsapiens.UCSC.hg19.knownGene' by default...\")\n        TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n    }\n    return(TxDb)\n}\n\n##' @importFrom AnnotationDbi get\n##' @importFrom GenomicFeatures genes\n##' @importFrom GenomicFeatures transcriptsBy\n##' @importFrom yulab.utils get_cache_element\n##' @importFrom yulab.utils update_cache_item\ngetGene <- function(TxDb, by=\"gene\") {\n    .ChIPseekerEnv(TxDb, item = ChIPseekerCache)\n    # ChIPseekerEnv <- get(\"ChIPseekerEnv\", envir=.GlobalEnv)\n\n    by <- match.arg(by, c(\"gene\", \"transcript\"))\n\n    if (by == \"gene\") {\n\n        features <- get_cache_element(item = ChIPseekerCache, elements = \"Genes\")\n\n        if(is.null(features)){\n            features <- suppressMessages(genes(TxDb))\n            update_cache_item(item = ChIPseekerCache, list(\"Genes\" = features))\n        }\n\n        # if ( exists(\"Genes\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n        #     features <- get(\"Genes\", envir=ChIPseekerEnv)\n        # } else {\n        #     features <- suppressMessages(genes(TxDb))\n        #     assign(\"Genes\", features, envir=ChIPseekerEnv)\n        # }\n    } else {\n\n        features <- get_cache_element(item = ChIPseekerCache, elements = \"Transcripts\")\n\n        if(is.null(features)){\n            features <- transcriptsBy(TxDb)\n            features <- unlist(features)\n            update_cache_item(item = ChIPseekerCache, list(\"Transcripts\" = features))\n        }\n\n        # if ( exists(\"Transcripts\", envir=ChIPseekerEnv, inherits=FALSE) ) {\n        #     features <- get(\"Transcripts\", envir=ChIPseekerEnv)\n        # } else {\n        #     features <- transcriptsBy(TxDb)\n        #     features <- unlist(features)\n        #     assign(\"Transcripts\", features, envir=ChIPseekerEnv)\n        # }\n    }\n\n    return(features)\n}\n\n\n##' get filenames of sample files\n##'\n##'\n##' @title getSampleFiles\n##' @return list of file names\n##' @export\n##' @author G Yu\ngetSampleFiles <- function() {\n    dir <- system.file(\"extdata\", \"GEO_sample_data\", package=\"ChIPseeker\")\n    files <- list.files(dir)\n    ## protein <- sub(\"GSM\\\\d+_\", \"\", files)\n    ## protein <- sub(\"_.+\", \"\", protein)\n    protein <- gsub(pattern='GSM\\\\d+_(\\\\w+_\\\\w+)_.*', replacement='\\\\1',files)\n    protein <- sub(\"_Chip.+\", \"\", protein)\n    res <- paste(dir, files, sep=\"/\")\n    res <- as.list(res)\n    names(res) <- protein\n    return(res)\n}\n## @importFrom RCurl getURL\n## getDirListing <- function (url) {\n##     ## from GEOquery\n##     print(url)\n##     a <- getURL(url)\n##     b <- textConnection(a)\n##     d <- read.table(b, header = FALSE)\n##     close(b)\n##     return(d)\n## }\n\n\nis.dir <- function(dir) {\n    if (file.exists(dir) == FALSE)\n        return(FALSE)\n    return(file.info(dir)$isdir)\n}\n\n\nparse_targetPeak_Param <- function(targetPeak) {\n    if (length(targetPeak) == 1) {\n        if (is.dir(targetPeak)) {\n            files <- list.files(path=targetPeak)\n            idx <- unlist(sapply(c(\"bed\", \"bedGraph\", \"Peak\"), grep, x=files))\n            idx <- sort(unique(idx))\n            files <- files[idx]\n            targetPeak <- sub(\"/$\", \"\", targetPeak)\n            res <- paste(targetPeak, files, sep=\"/\")\n        } else {\n            if (!file.exists(targetPeak)) {\n                stop(\"bed file is not exists...\")\n            } else {\n                res <- targetPeak\n            }\n        }\n    } else {\n        if (is.dir(targetPeak[1])) {\n            stop(\"targetPeak should be a vector of bed file names or a folder containing bed files...\")\n        } else {\n            res <- targetPeak[file.exists(targetPeak)]\n            if (length(res) == 0) {\n                stop(\"targetPeak file not exists...\")\n            }\n        }\n    }\n    return(res)\n}\n\n\nIDType <- function(TxDb) {\n    ##\n    ## IDType <- metadata(TxDb)[8,2]\n    ##\n    ## update: 2015-10-27\n    ## now IDType change from metadata(TxDb)[8,2] to metadata(TxDb)[9,2]\n    ## it may change in future too\n    ##\n    ## it's safe to extract via grep\n\n    md <- metadata(TxDb)\n    md[grep(\"Type of Gene ID\", md[,1]), 2]\n}\n\nlist_to_dataframe <- function(dataList) {\n    if (is.null(names(dataList)))\n        return(do.call('rbind', dataList))\n\n    cn <- lapply(dataList, colnames) %>% unlist %>% unique\n    cn <- c('.id', cn)\n    dataList2 <- lapply(seq_along(dataList), function(i) {\n        data = dataList[[i]]\n        data$.id = names(dataList)[i]\n        idx <- ! cn %in% colnames(data)\n        if (sum(idx) > 0) {\n            for (i in cn[idx]) {\n                data[, i] <- NA\n            }\n        }\n        return(data[,cn])\n    })\n    res <- do.call('rbind', dataList2)\n    res$.id <- factor(res$.id, levels=rev(names(dataList)))\n    return(res)\n}\n\n##' @importFrom GenomicRanges GRangesList\n##' @export\nGenomicRanges::GRangesList\n\n## . function was from plyr package\n##' capture name of variable\n##'\n##' @rdname dotFun\n##' @export\n##' @title .\n##' @param ... expression\n##' @param .env environment\n##' @return expression\n##' @examples\n##' x <- 1\n##' eval(.(x)[[1]])\n. <- function (..., .env = parent.frame()) {\n    structure(as.list(match.call()[-1]), env = .env, class = \"quoted\")\n}\n\n\n##' check upstream and downstream parameter\n##' \n##' \n##' check_upstream_and_downstream\n##'\n##' @param upstream upstream\n##' @param downstream downstream\n##' @importFrom ggplot2 rel\ncheck_upstream_and_downstream <- function(upstream, downstream){\n    \n    ## upstream and downstream should be the same type\n    if(class(upstream) != class(downstream)){\n        stop(\"the type of upstream and downstream should be the same...\")\n    }\n    \n    ## downstream and upstream parameter should be numeric or NULL\n    if(!is.numeric(upstream) && !is.null(upstream)){\n        stop(\"upstream and downstream parameter should be numeric or NULL...\")\n    }\n    \n    ## the value of rel object should be in (0,1)\n    if(inherits(upstream, 'rel')){\n        if(as.numeric(upstream) < 0 || as.numeric(upstream) >1 ){\n            stop('the value of rel object should be in (0,1)...')\n        }\n    }\n    \n    ## check actual number\n    if(is.numeric(upstream) && !inherits(upstream, 'rel')){\n        if(upstream < 1 | downstream < 1){\n            stop('if upstream or downstream is integer, the value of it should be greater than 1...')\n        }\n    }\n}\n\n\n##' @importFrom ggplot2 rel\n##' \n##' @export\nggplot2::rel\n\n\n##' make label for figures\n##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'\n##' @param type one of \"start_site\", \"end_site\", \"body\"\nmake_label <- function(type, by){\n    \n    if(type == 'body'){\n        if(by %in% c('gene', 'transcript', 'exon', 'intron')){\n            label_SS <- paste0(\"T\",\"SS\")\n            label_TS <- paste0(\"T\",\"TS\")\n            label <- c(label_SS,label_TS)\n        }else{\n            label_SS <- paste0(by,\"_SS\")\n            label_TS <- paste0(by,\"_TS\")\n            label <- c(label_SS,label_TS)\n        }\n        \n    }else if(type == \"start_site\"){\n        if(by %in% c('gene', 'transcript', 'exon', 'intron')){\n            label <- paste0(\"T\",\"SS\")\n        }else{\n            label <- paste0(by,\"_SS\") \n        }\n        \n    }else{\n        if(by %in% c('gene', 'transcript', 'exon', 'intron')){\n            label <- paste0(\"T\",\"TS\")\n        }else{\n            label <- paste0(by,\"_TS\")\n        }\n    }\n    \n    return(label)\n}\n\n##' @importFrom yulab.utils get_cache_item\nget_env_genome <- function(){\n\n    current_env <- get_cache_item(item = ChIPseekerCache)\n\n    env_txdb <- current_env$TXDB\n    env_txdb_meta <- S4Vectors::metadata(env_txdb)\n    env_txdb_version <- env_txdb_meta[grep(\"Genome\",env_txdb_meta[,1]),2]\n\n    return(env_txdb_version)\n}"
  },
  {
    "path": "R/vennpie.R",
    "content": "##' @importFrom plotrix floating.pie\nvennpie.csAnno <- function(x, \n                           r = 0.2, \n                           cex = 1.2,\n                           col = NULL) {\n    detailGenomicAnnotation <- x@detailGenomicAnnotation\n\n    distance <- as.data.frame(x)$distanceToTSS\n    total <- nrow(detailGenomicAnnotation)\n    Genic <- sum(detailGenomicAnnotation$genic)\n\n    Intergenic <- total-Genic\n    Distal_Intergenic <- sum(detailGenomicAnnotation$distal_intergenic)\n    Intron <- sum(detailGenomicAnnotation$Intron)\n    Exon <- sum(detailGenomicAnnotation$Exon)\n    Upstream <- sum(detailGenomicAnnotation$Promoter & distance < 0)\n\n    ## fiveUTR <- sum(detailGenomicAnnotation$fiveUTR)\n    ## threeUTR <- sum(detailGenomicAnnotation$threeUTR)\n    Downstream <- sum(detailGenomicAnnotation$downstream)\n\n    ## fiveUTR='#e5f5e0',threeUTR='#a1d99b',\n    cols <- c(NO='white', Genic='#3182bd', Intergenic='#fec44f',\n              Intron='#fc9272', Exon='#9ecae1', Upstream='#ffeda0',\n              Downstream='#fee0d2', Distal_Intergenic='#d95f0e')\n    \n    cols[names(col)] <- col\n\n\n    ##par(mai = c(0,0,0,0))\n    ##layout(matrix(c(1,2), ncol=2), widths=c(0.7,0.3))\n    pie(1, radius=r, init.angle=90, col=\"white\", border=NA, labels='')\n\n    ## https://www.biostars.org/p/326456/\n    ## if count is 0, floating pie will ignore it\n    ## and the color will mismatch with the category\n    ## fixed by adding pseudo-count +1\n    floating.pie(0,0, c(Exon,\n                        Genic-Exon,\n                        Distal_Intergenic,\n                        Downstream,\n                        Intergenic-Distal_Intergenic-Downstream\n                        ) + 1,\n                 radius=4*r,\n                 startpos=pi/2,\n                 col=cols[c(\"Exon\", \"NO\", \"NO\", \"Downstream\", \"NO\")],\n                 border=NA)\n\n    floating.pie(0,0, c(Genic-Intron,\n                        Intron,\n                        Distal_Intergenic,\n                        Intergenic-Upstream-Distal_Intergenic,\n                        Upstream) +1 ,\n                 radius=3*r,\n                 startpos=pi/2,\n                 col=cols[c(\"NO\", \"Intron\", \"Distal_Intergenic\",\n                     \"NO\", \"Upstream\")],\n                 border=NA)\n\n    floating.pie(0, 0, c(Genic, Intergenic) +1,\n                 radius=2*r,\n                 startpos=pi/2,\n                 col=cols[c(\"Genic\", \"Intergenic\")],\n                 border=NA)\n    ##plot.new()\n    ##legend(center), legend=names(cols)[-1], fill=cols[-1], bty=\"n\")\n    legend(3*r, 3*r, legend=sub(\"_\", \" \", names(cols)[-1]),\n           fill=cols[-1], bty=\"n\", cex=cex)\n}\n"
  },
  {
    "path": "R/vennplot.R",
    "content": "##' plot the overlap of a list of object\n##'\n##'\n##' There are two ways to plot, which users can specify through `by`.\n##' \n##' The first way is to use `gplots` packages, by setting `by = gplots`. This method\n##' is default method. The venn plot produced through this way has no color.\n##' \n##' The second way is to use `ggVennDiagram` packages, by setting `by = ggVennDiagram`. \n##' The venn plot produced through this way has colors which can be defined by users using\n##' ggplot2 grammar e.g.(scale_fill_distiller()). And users can specify any details, like digital number,\n##' text size and showing percentage or not, by inputting `...` extra parameters.\n##' \n##' @title vennplot\n##' @param Sets a list of object, can be vector or GRanges object\n##' @param by one of gplots, ggVennDiagram or Vennerable\n##' @param ... extra parameters using ggVennDiagram. Details see \\link[ggVennDiagram]{ggVennDiagram}\n##' @return venn plot that summarize the overlap of peaks\n##' from different experiments or gene annotation from\n##' different peak files.\n##' @importFrom gplots plot.venn\n## @importFrom ggVennDiagram ggVennDiagram\n## @importFrom Vennerable Venn\n## @importFrom grid grid.newpage\n##' @examples\n##' ## example not run\n##' ## require(TxDb.Hsapiens.UCSC.hg19.knownGene)\n##' ## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n##' ## peakfiles <- getSampleFiles()\n##' ## peakAnnoList <- lapply(peakfiles, annotatePeak)\n##' ## names(peakAnnoList) <- names(peakfiles)\n##' ## genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)\n##' ## vennplot(genes)\n##' @export\n##' @author G Yu\nvennplot <- function(Sets, by=\"gplots\",...) {\n    if (is.null(names(Sets))) {\n        nn <- paste0(\"Set\", seq_along(Sets))\n        warning(\"input is not a named list, set the name automatically to \", paste(nn, collapse = \" \"))\n        names(Sets) <- nn\n        ## stop(\"input object should be a named list...\")\n    }\n\n    overlapDF <- overlap(Sets)\n    if (by == \"Vennerable\") {\n        ## setRepositories(ind=7)\n        ## install.package(\"Vennerable\")\n        ## OR\n        ## install.packages(\"Vennerable\", repos=\"http://R-Forge.R-project.org\")\n        pkg <- \"Vennerable\"\n        require(pkg, character.only=TRUE)\n        Venn <- eval(parse(text=\"Venn\"))\n        v <- Venn(SetNames=names(Sets), Weight=overlapDF$Weight)\n        plotVenn <- eval(parse(text=\"Vennerable:::plotVenn\"))\n        plotVenn(v)\n    } else if (by == \"gplots\") {\n        n <- ncol(overlapDF)\n        colnames(overlapDF)[n] <- \"num\"\n        overlapDF <- overlapDF[, c(n, 1:(n-1))]\n        rownames(overlapDF)=apply(overlapDF, 1, function(i) paste(i[-1], sep=\"\", collapse=\"\"))\n        vennCount <- as.matrix(overlapDF)\n        class(vennCount) <- \"venn\"\n        plot.venn(vennCount)\n    } else if(by == \"ggVennDiagram\"){\n\t    ggVennDiagram::ggVennDiagram(Sets, ...)\n    } else {\n        stop(\"not supported...\")\n    }\n}\n\n##' vennplot for peak files\n##'\n##'\n##' @title vennplot.peakfile\n##' @param files peak files\n##' @param labels labels for peak files\n##' @return figure\n##' @export\n##' @author G Yu\nvennplot.peakfile <- function(files, labels=NULL) {\n    peak.Sets <- lapply(files, readPeakFile)\n    if (is.null(labels)) {\n        ## remove .xls or .bed of the file names as labels\n        labels <- sub(\"\\\\.\\\\w+$\", \"\", files)\n    }\n    names(peak.Sets) <- labels\n    vennplot(peak.Sets)\n}\n\n\n"
  },
  {
    "path": "R/zzz.R",
    "content": "##' @importFrom yulab.utils yulab_msg\n.onAttach <- function(libname, pkgname) {\n  packageStartupMessage(yulab_msg(pkgname))\n\n  options(ChIPseeker.downstreamDistance = 300)\n  options(ChIPseeker.ignore_1st_exon = FALSE)\n  options(ChIPseeker.ignore_1st_intron = FALSE)\n  options(ChIPseeker.ignore_downstream = FALSE)\n  options(ChIPseeker.ignore_promoter_subcategory= FALSE)\n  \n  options(aplot_align = 'y')\n\n}\n\n"
  },
  {
    "path": "README.Rmd",
    "content": "---\noutput:\n  md_document:\n    variant: gfm\nhtml_preview: false\n---\n\n\n```{r echo=FALSE, results=\"hide\", message=FALSE}\n#library(\"txtplot\")\nlibrary(\"badger\")\nlibrary(\"ypages\")\nlibrary(\"yulab.utils\")\n```\n\n\n# ChIPseeker: ChIP peak Annotation, Comparison, and Visualization\n\n<img src=\"https://raw.githubusercontent.com/Bioconductor/BiocStickers/master/ChIPseeker/ChIPseeker.png\" height=\"200\" align=\"right\" />\n\n\n`r badge_bioc_release(\"ChIPseeker\", \"green\")`\n`r badge_devel(\"guangchuangyu/ChIPseeker\", \"green\")`\n[![Bioc](http://www.bioconductor.org/shields/years-in-bioc/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#since)\n[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/GuangchuangYu)\n\n\n[![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)\n[![codecov](https://codecov.io/gh/GuangchuangYu/ChIPseeker/branch/master/graph/badge.svg)](https://codecov.io/gh/GuangchuangYu/ChIPseeker/)\n[![Last-changedate](https://img.shields.io/badge/last%20change-`r gsub('-', '--', Sys.Date())`-green.svg)](https://github.com/GuangchuangYu/ChIPseeker/commits/master)\n[![GitHub forks](https://img.shields.io/github/forks/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/network)\n[![GitHub stars](https://img.shields.io/github/stars/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/stargazers)\n\n\n[![platform](http://www.bioconductor.org/shields/availability/devel/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#archives)\n[![Build Status](http://www.bioconductor.org/shields/build/devel/bioc/ChIPseeker.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/ChIPseeker/)\n[![Linux/Mac Travis Build Status](https://img.shields.io/travis/GuangchuangYu/ChIPseeker/master.svg?label=Mac%20OSX%20%26%20Linux)](https://travis-ci.org/GuangchuangYu/ChIPseeker)\n[![AppVeyor Build Status](https://img.shields.io/appveyor/ci/Guangchuangyu/ChIPseeker/master.svg?label=Windows)](https://ci.appveyor.com/project/GuangchuangYu/ChIPseeker)\n\n\n\n\n```{r comment=\"\", echo=FALSE, results='asis'}\ncat(packageDescription('ChIPseeker')$Description)\n```\n\n\n## :writing_hand: Authors\n\nGuangchuang YU \n\nSchool of Basic Medical Sciences, Southern Medical University\n\n<https://yulab-smu.top>\n\nIf you use `r Biocpkg('ChIPseeker')` in published research, please cite:\n\n\n+ Q Wang<sup>#</sup>, M Li<sup>#</sup>, T Wu, L Zhan, L Li, M Chen, W Xie, Z Xie, E Hu, S Xu, __G Yu__<sup>\\*</sup>. [Exploring epigenomic datasets by ChIPseeker](https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585). __*Current Protocols*__, 2022, 2(10): e585. \n+ __G Yu__<sup>\\*</sup>, LG Wang, QY He<sup>\\*</sup>. [ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparision and visualization](http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btv145). __*Bioinformatics*__. 2015, 31(14):2382-2383. \n\n\n## :arrow_double_down: Installation\n\nGet the released version from Bioconductor:\n\n```r\n## try http:// if https:// URLs are not supported\nif (!requireNamespace(\"BiocManager\", quietly=TRUE))\n    install.packages(\"BiocManager\")\n## BiocManager::install(\"BiocUpgrade\") ## you may need this\nBiocManager::install(\"ChIPseeker\")\n```\n\nOr the development version from github:\n\n```r\n## install.packages(\"devtools\")\ndevtools::install_github(\"YuLab-SMU/ChIPseeker\")\n```\n\n\n## Contributing\n\nWe welcome any contributions! By participating in this project you agree to\nabide by the terms outlined in the [Contributor Code of Conduct](CONDUCT.md).\n\n"
  },
  {
    "path": "README.md",
    "content": "# ChIPseeker: ChIP peak Annotation, Comparison, and Visualization\n\n<img src=\"https://raw.githubusercontent.com/Bioconductor/BiocStickers/master/ChIPseeker/ChIPseeker.png\" height=\"200\" align=\"right\" />\n\n[![](https://img.shields.io/badge/release%20version-1.32.1-green.svg)](https://www.bioconductor.org/packages/ChIPseeker)\n[![](https://img.shields.io/badge/devel%20version-1.33.4-green.svg)](https://github.com/guangchuangyu/ChIPseeker)\n[![Bioc](http://www.bioconductor.org/shields/years-in-bioc/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#since)\n[![Say\nThanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/GuangchuangYu)\n\n[![Project Status: Active - The project has reached a stable, usable\nstate and is being actively\ndeveloped.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)\n[![codecov](https://codecov.io/gh/GuangchuangYu/ChIPseeker/branch/master/graph/badge.svg)](https://codecov.io/gh/GuangchuangYu/ChIPseeker/)\n[![Last-changedate](https://img.shields.io/badge/last%20change-2022--10--29-green.svg)](https://github.com/GuangchuangYu/ChIPseeker/commits/master)\n[![GitHub\nforks](https://img.shields.io/github/forks/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/network)\n[![GitHub\nstars](https://img.shields.io/github/stars/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/stargazers)\n\n[![platform](http://www.bioconductor.org/shields/availability/devel/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#archives)\n[![Build\nStatus](http://www.bioconductor.org/shields/build/devel/bioc/ChIPseeker.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/ChIPseeker/)\n[![Linux/Mac Travis Build\nStatus](https://img.shields.io/travis/GuangchuangYu/ChIPseeker/master.svg?label=Mac%20OSX%20%26%20Linux)](https://travis-ci.org/GuangchuangYu/ChIPseeker)\n[![AppVeyor Build\nStatus](https://img.shields.io/appveyor/ci/Guangchuangyu/ChIPseeker/master.svg?label=Windows)](https://ci.appveyor.com/project/GuangchuangYu/ChIPseeker)\n\nThis package implements functions to retrieve the nearest genes around\nthe peak, annotate genomic region of the peak, statstical methods for\nestimate the significance of overlap among ChIP peak data sets, and\nincorporate GEO database for user to compare the own dataset with those\ndeposited in database. The comparison can be used to infer cooperative\nregulation and thus can be used to generate hypotheses. Several\nvisualization functions are implemented to summarize the coverage of the\npeak experiment, average profile and heatmap of peaks binding to TSS\nregions, genomic annotation, distance to TSS, and overlap of peaks or\ngenes.\n\n## :writing_hand: Authors\n\nGuangchuang YU\n\nSchool of Basic Medical Sciences, Southern Medical University\n\n<https://yulab-smu.top>\n\nIf you use [ChIPseeker](http://bioconductor.org/packages/ChIPseeker) in\npublished research, please cite:\n\n-   Q Wang<sup>\\#</sup>, M Li<sup>\\#</sup>, T Wu, L Zhan, L Li, M Chen,\n    W Xie, Z Xie, E Hu, S Xu, **G Yu**<sup>\\*</sup>. [Exploring\n    epigenomic datasets by\n    ChIPseeker](https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585).\n    ***Current Protocols***, 2022, 2(10): e585.\n-   **G Yu**<sup>\\*</sup>, LG Wang, QY He<sup>\\*</sup>. [ChIPseeker: an\n    R/Bioconductor package for ChIP peak annotation, comparision and\n    visualization](http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btv145).\n    ***Bioinformatics***. 2015, 31(14):2382-2383.\n\n## :arrow_double_down: Installation\n\nGet the released version from Bioconductor:\n\n``` r\n## try http:// if https:// URLs are not supported\nif (!requireNamespace(\"BiocManager\", quietly=TRUE))\n    install.packages(\"BiocManager\")\n## BiocManager::install(\"BiocUpgrade\") ## you may need this\nBiocManager::install(\"ChIPseeker\")\n```\n\nOr the development version from github:\n\n``` r\n## install.packages(\"devtools\")\ndevtools::install_github(\"YuLab-SMU/ChIPseeker\")\n```\n\n## Contributing\n\nWe welcome any contributions! By participating in this project you agree\nto abide by the terms outlined in the [Contributor Code of\nConduct](CONDUCT.md).\n"
  },
  {
    "path": "appveyor.yml",
    "content": "environment:\n  matrix:\n    - R_VERSION: devel\n      R_ARCH: x64\n      USE_RTOOLS: true\n  _R_CHECK_FORCE_SUGGESTS_: false\n\n# DO NOT CHANGE the \"init\" and \"install\" sections below\n\n# Download script file from GitHub\ninit:\n  ps: |\n        $ErrorActionPreference = \"Stop\"\n        Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile \"..\\appveyor-tool.ps1\"\n        Import-Module '..\\appveyor-tool.ps1'\n\ninstall:\n  ps: Bootstrap\n\n# Adapt as necessary starting from here\n\nbuild_script:\n  - travis-tool.sh install_bioc BiocStyle IRanges graphite ReactomePA AnnotationDbi DO.db DOSE org.Hs.eg.db TxDb.Hsapiens.UCSC.hg19.knownGene clusterProfiler\n  - travis-tool.sh install_deps\n\ntest_script:\n  - travis-tool.sh run_tests\n\non_failure:\n  - 7z a failure.zip *.Rcheck\\*\n  - appveyor PushArtifact failure.zip\n\nartifacts:\n  - path: '*.Rcheck\\**\\*.log'\n    name: Logs\n\n  - path: '*.Rcheck\\**\\*.out'\n    name: Logs\n\n  - path: '*.Rcheck\\**\\*.fail'\n    name: Logs\n\n  - path: '*.Rcheck\\**\\*.Rout'\n    name: Logs\n\n  - path: '\\*_*.tar.gz'\n    name: Linux Package\n\n  - path: '\\*_*.zip'\n    name: Windows Package\n\nnotifications:\n- provider: Email\n  to:\n  - gcyu@connect.hku.hk\n  on_build_success: false\n\n"
  },
  {
    "path": "inst/CITATION",
    "content": "citHeader(\"Please cite Q. Wang (2022) or G. Yu (2015) for using ChIPseeker. In addition, please cite clusterProfiler/DOSE/ReactomePA when using functional enrichment analyses.\")\n\n\ncitEntry(entry  =\"ARTICLE\",\n         title  = \"Exploring epigenomic datasets by ChIPseeker\",\n         author = c(\n             person(\"Qianwen\", \"Wang\"),\n             person(\"Ming\", \"Li\"),\n             person(\"Tianzhi\", \"Wu\"),\n             person(\"Li\", \"Zhan\"),\n             person(\"Lin\", \"Li\"),\n             person(\"Meijun\", \"Chen\"),\n             person(\"Wenqin\", \"Xie\"),\n             person(\"Zijing\", \"Xie\"),\n             person(\"Erqiang\", \"Hu\"),\n             person(\"Shuangbin\", \"Xu\"),\n             person(\"Guangchuang\", \"Yu\", email = \"guangchuangyu@gmail.com\")\n             ),\n         journal = \"Current Protocols\",\n         year    = \"2022\",\n         volume  = \"2\",\n         number  = \"10\",\n         pages   = \"e585\",\n         PMID    = \"36286622\",\n         doi     = \"10.1002/cpz1.585\",\n         url     = \"https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585\",\n         textVersion = paste(\"Qianwen Wang, Ming Li, Tianzhi Wu, Li Zhan, Lin Li, Meijun Chen, Wenqin Xie, Zijing Xie, Erqiang Hu, Shuangbin Xu, Guangchuang Yu.\",\n         \"Exploring epigenomic datasets by ChIPseeker.\",\n         \"Current Protocols 2022, 2(10): e585\")\n         )\n\n\ncitEntry(entry  =\"ARTICLE\",\n         title  = \"ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization\",\n         author = personList(\n             as.person(\"Guangchuang Yu\"),\n             as.person(\"Li-Gen Wang\"),\n             as.person(\"Qing-Yu He\")\n             ),\n         journal = \"Bioinformatics\",\n         year    = \"2015\",\n         volume  = \"31\",\n         number  = \"14\",\n         pages   = \"2382-2383\",\n         PMID    = \"25765347\",\n         doi     = \"10.1093/bioinformatics/btv145\",\n         textVersion = paste(\"Guangchuang Yu, LiGen Wang, and QingYu He.\",\n             \"ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization.\",\n             \"Bioinformatics 2015, 31(14):23822383\")\n         )\n"
  },
  {
    "path": "inst/extdata/sample_peaks.txt",
    "content": "chr\tstart\tend\tlength\tsummit\ttags\tX.10.log10.pvalue.\tfold_enrichment\tFDR...\nchr10\t105137980\t105138593\t614\t174\t7\t52.8\t15.32\tNA\nchr10\t42644416\t42645383\t968\t669\t27\t77.15\t9.13\t0.79\nchr6\t162188189\t162188742\t554\t174\t8\t59.88\t17.82\tNA\nchr4\t2307246\t2307622\t377\t188\t9\t85.25\t26.62\t1.03\nchr13\t51791808\t51792240\t433\t267\t8\t65.32\t15.08\t0.74\nchr19\t58731678\t58732653\t976\t472\t15\t55\t8.04\tNA\nchr16\t47717334\t47717718\t385\t193\t8\t75.05\t18.38\tNA\nchr5\t133997712\t133998281\t570\t216\t11\t59.14\t10.48\tNA\nchr17\t21730549\t21731638\t1090\t685\t15\t64.79\t9.19\tNA\nchr1\t108313793\t108314347\t555\t189\t7\t58.81\t18.16\t0.78\nchr7\t66713702\t66714183\t482\t285\t7\t50.52\t14.54\t0.97\nchr6\t102911566\t102912390\t825\t476\t17\t53.89\t6.13\tNA\nchr7\t99693575\t99694953\t1379\t285\t19\t54.14\t6.93\t0.92\nchr14\t21930630\t21931094\t465\t289\t7\t57.91\t11.83\t0.78\nchr7\t140460467\t140461015\t549\t221\t10\t51.62\t9.66\t0.98\nchr1\t204827779\t204828392\t614\t236\t10\t54.94\t11.73\t0.85\nchr22\t31055016\t31055701\t686\t486\t9\t68.84\t24.51\tNA\nchr12\t121813018\t121813753\t736\t507\t12\t63.05\t13.41\t0.77\nchr9\t34128226\t34128829\t604\t266\t9\t62.93\t15.08\t0.77\nchr8\t51449160\t51449457\t298\t150\t7\t63.71\t9.59\tNA\nchr7\t152266697\t152267502\t806\t218\t13\t66.03\t10.72\tNA\nchr7\t5200045\t5200648\t604\t217\t10\t51.25\t9.72\tNA\nchr20\t32330916\t32331273\t358\t179\t7\t51.33\t14.66\t0.98\nchr13\t53041893\t53042341\t449\t256\t7\t65.63\t21.79\t0.74\nchr4\t188271660\t188272266\t607\t340\t10\t51.25\t11.16\tNA\nchr11\t57336216\t57336780\t565\t167\t8\t55.92\t15.08\t0.85\nchr7\t147992515\t147993057\t543\t199\t8\t70.42\t18.16\t0.7\nchr9\t126707435\t126708020\t586\t198\t10\t89.63\t28.73\t1.04\nchr8\t146099083\t146099655\t573\t205\t8\t65.26\t18.38\tNA\nchr3\t18887631\t18888098\t468\t210\t10\t80.46\t19.36\tNA\nchr15\t75296138\t75296986\t849\t382\t14\t62.33\t7.8\tNA\nchr7\t76678195\t76678851\t657\t217\t12\t68.53\t10.06\t0.62\nchr10\t72271427\t72271869\t443\t225\t8\t50.24\t14.25\tNA\nchr3\t179584381\t179584987\t607\t177\t11\t50.71\t9.19\tNA\nchr4\t163405047\t163405425\t379\t189\t7\t64.2\t22\t0.77\nchr7\t72657437\t72658247\t811\t241\t19\t81.36\t8.15\t0.91\nchr17\t47813522\t47813967\t446\t184\t7\t50.24\t12.3\tNA\nchr19\t17019713\t17020403\t691\t501\t10\t51.25\t11.12\tNA\nchr2\t112229859\t112230262\t404\t203\t8\t56.88\t14.85\tNA\nchr10\t69646478\t69646970\t493\t313\t7\t56.06\t17.74\t0.84\nchr22\t21699160\t21699710\t551\t365\t7\t58.69\t17.96\t0.79\nchrX\t119427920\t119428427\t508\t197\t7\t50.24\t14.01\tNA\nchr11\t12358590\t12358928\t339\t169\t7\t53.91\t11.93\t0.91\nchr10\t23293727\t23294379\t653\t446\t8\t50.61\t9.19\tNA\nchr16\t19008911\t19009452\t542\t178\t8\t53.74\t10.45\tNA\nchr11\t71999828\t72000241\t414\t242\t8\t79.93\t25.14\t0.83\nchr17\t38168330\t38169125\t796\t363\t11\t57.63\t15.91\tNA\nchr7\t56124436\t56124960\t525\t314\t11\t88.99\t20.68\tNA\nchr22\t26998729\t26999306\t578\t216\t10\t72.04\t17.51\tNA\nchr11\t108369506\t108370050\t545\t349\t7\t50.52\t13.7\t0.97\nchr1\t247012833\t247013232\t400\t200\t9\t69.75\t18.1\t0.61\nchr4\t57030170\t57030662\t493\t181\t7\t50.52\t14.87\t0.97\nchr2\t85878694\t85879282\t589\t288\t7\t54.39\t16.76\t0.86\nchr8\t57064347\t57064991\t645\t474\t10\t80.87\t22.7\t0.83\nchr18\t60283518\t60284080\t563\t167\t10\t51.62\t9.91\t0.98\nchr14\t19009999\t19010720\t722\t547\t118\t90.28\t6.43\t1\nchr4\t123592232\t123592745\t514\t168\t8\t59.23\t15.08\t0.79\nchr17\t6492901\t6493361\t461\t211\t7\t50.24\t10.59\tNA\nchr1\t201432452\t201432945\t494\t338\t8\t60.21\t12.42\t0.74\nchr5\t25353549\t25354499\t951\t468\t23\t91.86\t10.49\tNA\nchr21\t41376711\t41377162\t452\t162\t8\t60.21\t13.63\t0.74\nchr10\t135193549\t135194110\t562\t159\t8\t51.28\t8.74\t0.98\nchr2\t165659676\t165660060\t385\t192\t10\t106.7\t36.32\t1.94\nchr1\t18733754\t18734314\t561\t405\t7\t50.52\t14.1\t0.97\nchr3\t181501405\t181501917\t513\t333\t11\t91.78\t18.05\t1.01\nchr15\t44748091\t44748538\t448\t246\t9\t74.71\t17.6\t0.72\nchr17\t64850164\t64850608\t445\t267\t8\t50.18\t11.73\t0.97\nchr3\t50293565\t50294504\t940\t683\t16\t95.98\t16.76\t1.24\nchr1\t6780373\t6780997\t625\t429\t11\t70.72\t16.09\t0.68\nchr11\t60722310\t60723055\t746\t403\t14\t55.49\t6.89\tNA\nchr16\t47690030\t47690397\t368\t185\t7\t84.74\t28.08\tNA\nchr3\t42881399\t42881811\t413\t207\t8\t64.41\t18.38\tNA\nchr17\t73738304\t73739104\t801\t267\t11\t54.12\t13.13\tNA\nchr7\t72886499\t72887134\t636\t426\t14\t55.22\t8.77\tNA\nchr19\t479390\t480356\t967\t608\t15\t62.85\t9.19\tNA\nchr1\t223554052\t223554581\t530\t360\t7\t50.24\t12.18\tNA\nchr19\t55642648\t55643120\t473\t316\t7\t55.56\t11.17\t0.86\nchr2\t160150507\t160150819\t313\t156\t10\t55.46\t7.87\t0.85\nchr6\t108379579\t108380048\t470\t218\t12\t135.66\t40.11\tNA\nchr7\t66659861\t66660350\t490\t334\t7\t60.23\t20.11\t0.79\nchr21\t45158341\t45159044\t704\t246\t12\t57.42\t9.19\tNA\nchr17\t12142123\t12142576\t454\t256\t10\t95.08\t28.28\t1.31\nchr12\t111535820\t111536662\t843\t508\t13\t55.22\t10.33\tNA\nchr4\t74053898\t74054391\t494\t220\t7\t50.24\t13.62\tNA\nchr17\t41107403\t41107930\t528\t362\t12\t82.37\t13.13\tNA\nchr5\t37022195\t37022565\t371\t186\t9\t90.79\t22.98\tNA\nchr14\t89037667\t89038455\t789\t465\t11\t91.33\t25.38\tNA\nchr19\t55988686\t55989638\t953\t457\t17\t103.53\t17.24\tNA\nchrX\t73187260\t73187705\t446\t145\t7\t65.84\t18.16\t0.71\nchr16\t68160728\t68161185\t458\t216\t7\t70.84\t24.07\tNA\nchr1\t72383088\t72383669\t582\t167\t8\t54.89\t12.57\t0.85\nchr3\t65290871\t65291616\t746\t399\t12\t101.35\t21.79\t1.55\nchr17\t4396192\t4396640\t449\t265\t7\t57.21\t16.76\t0.81\nchr7\t129241408\t129242063\t656\t201\t10\t66.64\t15.32\tNA\nchr8\t105375364\t105376057\t694\t485\t11\t92.72\t22.98\tNA\nchr8\t48921895\t48922221\t327\t164\t7\t60.45\t11.09\tNA\nchr22\t47339473\t47339954\t482\t276\t8\t59.45\t15.32\tNA\nchr7\t5545814\t5546438\t625\t216\t11\t52.8\t12.42\tNA\nchr13\t77761699\t77762182\t484\t158\t7\t63.2\t18.16\t0.78\nchr9\t6582021\t6582849\t829\t425\t15\t106.11\t17.6\t1.87\nchr5\t1430193\t1430994\t802\t506\t13\t51.97\t8.04\tNA\nchr11\t64532961\t64533474\t514\t199\t7\t50.52\t12.92\t0.95\nchr21\t42055751\t42056309\t559\t218\t7\t50.24\t15.42\tNA\nchr12\t107211163\t107211704\t542\t359\t10\t80.87\t19.07\t0.83\nchr3\t161238645\t161239047\t403\t201\t7\t60.41\t19.55\t0.8\nchr17\t3680122\t3680663\t542\t281\t14\t67.79\t10.11\tNA\nchr17\t20455343\t20455801\t459\t191\t10\t51.62\t9.23\t0.97\nchr2\t196378101\t196378811\t711\t519\t11\t59.14\t14.71\tNA\nchr3\t194245019\t194245449\t431\t268\t7\t55.17\t12.57\t0.84\nchr16\t14626541\t14627285\t745\t509\t11\t54.27\t10.72\tNA\nchr2\t236773772\t236774222\t451\t251\t8\t59.88\t14.51\tNA\nchr11\t67551724\t67552171\t448\t168\t7\t50.24\t12.35\tNA\nchr10\t54421011\t54421536\t526\t249\t7\t60.53\t14.53\t0.8\nchr11\t121028043\t121028655\t613\t198\t10\t51.25\t11.27\tNA\nchr10\t27600469\t27601208\t740\t354\t12\t82.79\t17.6\t0.92\nchr12\t83275694\t83276252\t559\t296\t8\t69.37\t21.79\t0.65\nchr14\t63851416\t63851824\t409\t209\t9\t70.34\t16.45\t0.64\nchr4\t185478067\t185478514\t448\t299\t7\t58.8\t16.22\tNA\nchr14\t51818623\t51819136\t514\t341\t7\t50.52\t12.92\t0.95\nchr3\t179394999\t179395365\t367\t183\t7\t68.9\t25.42\t0.61\nchr6\t90496197\t90496532\t336\t168\t7\t53.32\t11.73\t0.94\nchr3\t48664669\t48665218\t550\t301\t8\t50.25\t11.49\tNA\nchr16\t17028641\t17029305\t665\t407\t12\t107.4\t29.05\t2.05\nchr14\t101874985\t101876019\t1035\t571\t13\t74.1\t13.79\tNA\nchr12\t42519772\t42520228\t457\t260\t7\t53.33\t15.08\t0.96\nchr9\t135276277\t135277082\t806\t298\t12\t73.73\t16\t0.72\nchr14\t104160631\t104161383\t753\t182\t11\t60.5\t11.6\t0.8\nchr3\t85070344\t85071044\t701\t413\t8\t59.88\t19.33\tNA\nchr11\t107900936\t107901419\t484\t287\t7\t51.54\t12.57\t0.97\nchr12\t130291640\t130292042\t403\t201\t7\t50.3\t14.18\t0.97\nchr21\t41159981\t41160669\t689\t191\t9\t59.49\t18.79\t100\nchr6\t118954241\t118954912\t672\t336\t10\t80.87\t23.65\t0.83\nchr10\t70228566\t70229168\t603\t226\t10\t72.72\t17.6\t0.69\nchr22\t46914023\t46914630\t608\t424\t9\t51.27\t11.17\t0.98\nchr14\t61232829\t61233495\t667\t481\t10\t80.46\t21.46\tNA\nchr4\t74134806\t74135475\t670\t418\t9\t58.57\t13.79\tNA\nchr10\t7869332\t7869993\t662\t176\t10\t78.36\t15.71\t0.76\nchr5\t120189408\t120189950\t543\t325\t8\t56.98\t16.09\tNA\nchr1\t11542794\t11543378\t585\t312\t8\t59.88\t16.13\tNA\nchr1\t9413700\t9414272\t573\t159\t8\t55.43\t12.57\t0.85\nchr12\t131218246\t131219043\t798\t602\t12\t58.77\t10.72\tNA\nchr1\t29032216\t29033010\t795\t333\t11\t59.42\t14.71\tNA\nchr2\t187827920\t187828577\t658\t291\t11\t78.27\t20.68\tNA\nchr11\t28235164\t28235576\t413\t214\t10\t71.56\t15.08\t0.64\nchr4\t173579304\t173579706\t403\t201\t9\t78.66\t22.62\t0.77\nchr12\t80194469\t80194868\t400\t200\t8\t81.06\t29.05\t0.84\nchr11\t26134912\t26135581\t670\t472\t12\t103.04\t23.58\t1.57\nchr9\t88771969\t88772469\t501\t371\t8\t50.17\t13.79\tNA\nchr8\t146115421\t146115783\t363\t182\t7\t65.63\t22.99\tNA\nchr6\t140093330\t140093770\t441\t169\t7\t54.45\t15.08\t0.86\n"
  },
  {
    "path": "inst/test-plot/test-plotPeakProf.R",
    "content": "library(ChIPseeker)\nlibrary(TxDb.Hsapiens.UCSC.hg19.knownGene)\n\ncontext(\"test plotPeakProf() for a list of windows\")\n\npeak <- getSampleFiles()[[4]]\npeak_list <- getSampleFiles()[4:5]\ntxdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n\n## self-made enhancer region in the form of granges object\nenhancer <- transcripts(txdb)[1:5000,]\n\n## self-made non-enhancer region in the form of granges object\nnon_enhancer <- unlist(fiveUTRsByTranscript(txdb))[1:5000]\n\ngr <- list(enhancer,non_enhancer)\n\ntest_that(\"input two self-made granges object\",{\n  \n  p <- plotPeakProf(peak = peak,\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"non-enhancer\"),\n                    windows_name = c(\"enhancer\",\"non-enhancer\"),\n                    weightCol = \"V5\",\n                    type = \"start_site\",\n                    upstream = 1000,\n                    downstream = 1000,\n                    TxDb = list(enhancer,non_enhancer))\n  \n  expect_is(p,\"gg\")\n  \n})\n\ntest_that(\"input a list of peaks\",{\n  \n  p <- plotPeakProf(peak = peak_list,\n                    TxDb = list(enhancer,non_enhancer),\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"non-enhancer\"),\n                    windows_name = c(\"enhancer\",\"non-enhancer\"),\n                    weightCol = \"V5\",\n                    type = \"start_site\",\n                    upstream = 1000,\n                    downstream = 1000)\n  \n  expect_is(p,\"gg\")\n})\n\n\ntest_that(\"input gr and txdb input\",{\n  \n  p <- plotPeakProf(peak = peak,\n                    TxDb = list(enhancer,txdb),\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"gene\"),\n                    windows_name = c(\"enhancer\",\"gene\"),\n                    weightCol = \"V5\",\n                    type = \"start_site\",\n                    upstream = 1000,\n                    downstream = 1000)\n  \n  expect_is(p,\"gg\")\n})\n\n\ntest_that(\"check body region\",{\n  \n  p <- plotPeakProf(peak = peak,\n                    TxDb = list(enhancer,txdb),\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"gene\"),\n                    windows_name = c(\"enhancer\",\"gene\"),\n                    weightCol = \"V5\",\n                    type = \"body\",\n                    upstream = 1000,\n                    downstream = 1000,\n                    nbin = 800)\n  \n  expect_is(p,\"gg\")\n  \n  p <- plotPeakProf(peak = peak,\n                    TxDb = list(enhancer,txdb),\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"gene\"),\n                    windows_name = c(\"enhancer\",\"gene\"),\n                    weightCol = \"V5\",\n                    type = \"body\",\n                    nbin = 800)\n  \n  expect_is(p,\"gg\")\n  \n  p <- plotPeakProf(peak = peak,\n                    TxDb = list(enhancer,txdb),\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"gene\"),\n                    windows_name = c(\"enhancer\",\"gene\"),\n                    weightCol = \"V5\",\n                    type = \"body\",\n                    upstream = rel(0.2),\n                    downstream = rel(0.2),\n                    nbin = 800)\n  \n  expect_is(p,\"gg\")\n  \n  p <- plotPeakProf(peak = peak_list,\n                    TxDb = list(enhancer,txdb),\n                    conf = 0.95,\n                    by = c(\"enhancer\",\"gene\"),\n                    windows_name = c(\"enhancer\",\"gene\"),\n                    weightCol = \"V5\",\n                    type = \"body\",\n                    upstream = rel(0.2),\n                    downstream = rel(0.2),\n                    nbin = 800)\n  \n  expect_is(p,\"gg\")\n})"
  },
  {
    "path": "inst/test-plot/test-plotTagMatrix.R",
    "content": "library(ChIPseeker)\nlibrary(TxDb.Hsapiens.UCSC.hg19.knownGene)\n\ncontext(\"test plotTagMatrix() and related functions\")\n\ntest_that(\"test plotPeakProf2 function use txdb\",{\n  \n  peak <- getSampleFiles()[[4]]\n  peak_list <- getSampleFiles()[4:5]\n  \n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  \n  # test single peak file\n  p1_1 <- plotPeakProf(peak = peak,\n                       upstream = 1000,\n                       downstream = 1000,\n                       by = \"gene\",\n                       type = \"start_site\",\n                       TxDb = txdb,\n                       nbin = 800)\n  \n  expect_is(p1_1, \"gg\")\n  \n  # test a list of peak files\n  p1_2 <- plotPeakProf(peak = peak_list,\n                       upstream = 1000,\n                       downstream = 1000,\n                       by = \"gene\",\n                       type = \"start_site\",\n                       TxDb = txdb,\n                       nbin = 800)\n  \n  expect_is(p1_2, \"gg\")\n  \n  # test body region\n  # without extension\n  p2_1 <- plotPeakProf(peak = peak_list,\n                       by = \"gene\",\n                       type = \"body\",\n                       TxDb = txdb,\n                       nbin = 800)\n  \n  expect_is(p2_1, \"gg\")\n  \n  # extend with rel object\n  p2_2 <- plotPeakProf(peak = peak_list,\n                       by = \"gene\",\n                       type = \"body\",\n                       TxDb = txdb,\n                       upstream = rel(0.2),\n                       downstream = rel(0.2),\n                       nbin = 800)\n  \n  expect_is(p2_2, \"gg\")\n  \n  # extend with actual number\n  p2_3 <- plotPeakProf(peak = peak_list,\n                       by = \"gene\",\n                       type = \"body\",\n                       TxDb = txdb,\n                       upstream = 1000,\n                       downstream = 1000,\n                       nbin = 800)\n  \n  expect_is(p2_3, \"gg\")\n  \n})\n\n\ntest_that(\"test plotPeakProf2 function use self-made granges\",{\n  \n  peak <- getSampleFiles()[[4]]\n  peak_list <- getSampleFiles()[4:5]\n  \n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  \n  # we consider transcript region as enhancer region\n  # and make self-made granges object\n  # they can be the same in the form of granges object\n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  enhancer <- transcripts(txdb)[1:5000,]\n  \n  # test single peak file\n  p1_1 <- plotPeakProf(peak = peak,\n                       upstream = 1000,\n                       downstream = 1000,\n                       by = \"gene\",\n                       type = \"start_site\",\n                       TxDb = enhancer,\n                       nbin = 800)\n  \n  expect_is(p1_1, \"gg\")\n  \n  # test a list of peak files\n  p1_2 <- plotPeakProf(peak = peak_list,\n                       upstream = 1000,\n                       downstream = 1000,\n                       by = \"gene\",\n                       type = \"start_site\",\n                       TxDb = enhancer,\n                       nbin = 800)\n  \n  expect_is(p1_2, \"gg\")\n  \n  # test body region\n  # without extension\n  p2_1 <- plotPeakProf(peak = peak_list,\n                       by = \"gene\",\n                       type = \"body\",\n                       TxDb = enhancer,\n                       nbin = 800)\n  \n  expect_is(p2_1, \"gg\")\n  \n  # extend with rel object\n  p2_2 <- plotPeakProf(peak = peak_list,\n                       by = \"gene\",\n                       type = \"body\",\n                       TxDb = enhancer,\n                       upstream = rel(0.2),\n                       downstream = rel(0.2),\n                       nbin = 800)\n  \n  expect_is(p2_2, \"gg\")\n  \n  # extend with actual number\n  p2_3 <- plotPeakProf(peak = peak_list,\n                       by = \"gene\",\n                       type = \"body\",\n                       TxDb = enhancer,\n                       upstream = 1000,\n                       downstream = 1000,\n                       nbin = 800)\n  \n  expect_is(p2_3, \"gg\")\n  \n})"
  },
  {
    "path": "man/ChIPseeker-package.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ChIPseeker-package.R\n\\docType{package}\n\\name{ChIPseeker-package}\n\\alias{ChIPseeker}\n\\alias{ChIPseeker-package}\n\\title{ChIPseeker: ChIPseeker for ChIP peak Annotation, Comparison, and Visualization}\n\\description{\nThis package implements functions to retrieve the nearest genes around the peak, annotate genomic region of the peak, statstical methods for estimate the significance of overlap among ChIP peak data sets, and incorporate GEO database for user to compare the own dataset with those deposited in database. The comparison can be used to infer cooperative regulation and thus can be used to generate hypotheses. Several visualization functions are implemented to summarize the coverage of the peak experiment, average profile and heatmap of peaks binding to TSS regions, genomic annotation, distance to TSS, and overlap of peaks or genes.\n}\n\\seealso{\nUseful links:\n\\itemize{\n  \\item \\url{https://yulab-smu.top/contribution-knowledge-mining/}\n  \\item Report bugs at \\url{https://github.com/YuLab-SMU/ChIPseeker/issues}\n}\n\n}\n\\author{\n\\strong{Maintainer}: Guangchuang Yu \\email{guangchuangyu@gmail.com} (\\href{https://orcid.org/0000-0002-6485-8781}{ORCID})\n\nOther contributors:\n\\itemize{\n  \\item Ming Li \\email{limiang929@gmail.com} [contributor]\n  \\item Qianwen Wang \\email{treywea@gmail.com} [contributor]\n  \\item Yun Yan \\email{youryanyun@gmail.com} [contributor]\n  \\item Hervé Pagès \\email{hpages.on.github@gmail.com} [contributor]\n  \\item Michael Kluge \\email{michael.kluge@bio.ifi.lmu.de} [contributor]\n  \\item Thomas Schwarzl \\email{schwarzl@embl.de} [contributor]\n  \\item Zhougeng Xu \\email{xuzhougeng@163.com} [contributor]\n  \\item Chun-Hui Gao \\email{gaospecial@gmail.com} (\\href{https://orcid.org/0000-0002-1445-7939}{ORCID}) [contributor]\n}\n\n}\n\\keyword{internal}\n"
  },
  {
    "path": "man/ChIPseekerCache.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ChIPseeker-package.R\n\\docType{data}\n\\name{ChIPseekerCache}\n\\alias{ChIPseekerCache}\n\\title{Name of the ChIPseeker cache environment (internal static variable)}\n\\format{\ncharacter vector\n}\n\\usage{\nChIPseekerCache\n}\n\\description{\nName of the ChIPseeker cache environment (internal static variable)\n}\n\\keyword{datasets}\n"
  },
  {
    "path": "man/annotatePeak.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/annotatePeak.R\n\\name{annotatePeak}\n\\alias{annotatePeak}\n\\title{annotatePeak}\n\\usage{\nannotatePeak(\n  peak,\n  tssRegion = c(-3000, 3000),\n  TxDb = NULL,\n  level = \"transcript\",\n  assignGenomicAnnotation = TRUE,\n  genomicAnnotationPriority = c(\"Promoter\", \"5UTR\", \"3UTR\", \"Exon\", \"Intron\",\n    \"Downstream\", \"Intergenic\"),\n  annoDb = NULL,\n  addFlankGeneInfo = FALSE,\n  flankDistance = 5000,\n  sameStrand = FALSE,\n  ignoreOverlap = FALSE,\n  ignoreUpstream = FALSE,\n  ignoreDownstream = FALSE,\n  overlap = \"TSS\",\n  verbose = TRUE,\n  columns = c(\"ENTREZID\", \"ENSEMBL\", \"SYMBOL\", \"GENENAME\")\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{tssRegion}{Region Range of TSS}\n\n\\item{TxDb}{TxDb or EnsDb annotation object}\n\n\\item{level}{one of transcript and gene}\n\n\\item{assignGenomicAnnotation}{logical, assign peak genomic annotation or not}\n\n\\item{genomicAnnotationPriority}{genomic annotation priority}\n\n\\item{annoDb}{annotation package}\n\n\\item{addFlankGeneInfo}{logical, add flanking gene information from the peaks}\n\n\\item{flankDistance}{distance of flanking sequence}\n\n\\item{sameStrand}{logical, whether find nearest/overlap gene in the same strand}\n\n\\item{ignoreOverlap}{logical, whether ignore overlap of TSS with peak}\n\n\\item{ignoreUpstream}{logical, if True only annotate gene at the 3' of the peak.}\n\n\\item{ignoreDownstream}{logical, if True only annotate gene at the 5' of the peak.}\n\n\\item{overlap}{one of 'TSS' or 'all', if overlap=\"all\", then gene overlap with peak will be reported as nearest gene, no matter the overlap is at TSS region or not.}\n\n\\item{verbose}{print message or not}\n\n\\item{columns}{names of columns to be obtained from database}\n}\n\\value{\ndata.frame or GRanges object with columns of:\n\nall columns provided by input.\n\nannotation: genomic feature of the peak, for instance if the peak is\nlocated in 5'UTR, it will annotated by 5'UTR. Possible annotation is\nPromoter-TSS, Exon, 5' UTR, 3' UTR, Intron, and Intergenic.\n\ngeneChr: Chromosome of the nearest gene\n\ngeneStart: gene start\n\ngeneEnd: gene end\n\ngeneLength: gene length\n\ngeneStrand: gene strand\n\ngeneId: entrezgene ID\n\ndistanceToTSS: distance from peak to gene TSS\n\nif annoDb is provided, extra column will be included:\n\nENSEMBL: ensembl ID of the nearest gene\n\nSYMBOL: gene symbol\n\nGENENAME: full gene name\n}\n\\description{\nAnnotate peaks\n}\n\\examples{\n\\dontrun{\nrequire(TxDb.Hsapiens.UCSC.hg19.knownGene)\ntxdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\npeakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"ChIPseeker\")\npeakAnno <- annotatePeak(peakfile, tssRegion=c(-3000, 3000), TxDb=txdb)\npeakAnno\n}\n}\n\\seealso{\n\\code{\\link{plotAnnoBar}} \\code{\\link{plotAnnoPie}} \\code{\\link{plotDistToTSS}}\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/as.GRanges.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\name{as.GRanges}\n\\alias{as.GRanges}\n\\title{as.GRanges}\n\\usage{\nas.GRanges(x)\n}\n\\arguments{\n\\item{x}{csAnno object}\n}\n\\value{\nGRanges object\n}\n\\description{\nconvert csAnno object to GRanges\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/as.data.frame.csAnno.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\name{as.data.frame.csAnno}\n\\alias{as.data.frame.csAnno}\n\\title{as.data.frame.csAnno}\n\\usage{\n\\method{as.data.frame}{csAnno}(x, row.names = NULL, optional = FALSE, ...)\n}\n\\arguments{\n\\item{x}{csAnno object}\n\n\\item{row.names}{row names}\n\n\\item{optional}{should be omitted.}\n\n\\item{...}{additional parameters}\n}\n\\value{\ndata.frame\n}\n\\description{\nconvert csAnno object to data.frame\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/check_upstream_and_downstream.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\name{check_upstream_and_downstream}\n\\alias{check_upstream_and_downstream}\n\\title{check upstream and downstream parameter}\n\\usage{\ncheck_upstream_and_downstream(upstream, downstream)\n}\n\\arguments{\n\\item{upstream}{upstream}\n\n\\item{downstream}{downstream}\n}\n\\description{\ncheck_upstream_and_downstream\n}\n"
  },
  {
    "path": "man/combine_csAnno.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\name{combine_csAnno}\n\\alias{combine_csAnno}\n\\title{combine_csAnno}\n\\usage{\ncombine_csAnno(x, ...)\n}\n\\arguments{\n\\item{x}{csAnno object}\n\n\\item{...}{csAnno objects}\n}\n\\value{\ncsAnno object\n}\n\\description{\nCombine csAnno Object\n}\n\\details{\nhttps://github.com/YuLab-SMU/ChIPseeker/issues/157\n}\n"
  },
  {
    "path": "man/covplot.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/covplot.R\n\\name{covplot}\n\\alias{covplot}\n\\title{covplot}\n\\usage{\ncovplot(\n  peak,\n  weightCol = NULL,\n  xlab = \"Chromosome Size (bp)\",\n  ylab = \"\",\n  title = \"ChIP Peaks over Chromosomes\",\n  chrs = NULL,\n  xlim = NULL,\n  lower = 1,\n  fill_color = \"black\"\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{weightCol}{weight column of peak}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{chrs}{selected chromosomes to plot, all chromosomes by default}\n\n\\item{xlim}{ranges to plot, default is whole chromosome}\n\n\\item{lower}{lower cutoff of coverage signal}\n\n\\item{fill_color}{specify the color/palette for the plot. Order matters}\n}\n\\value{\nggplot2 object\n}\n\\description{\nplot peak coverage\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/csAnno-class.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\docType{class}\n\\name{csAnno-class}\n\\alias{csAnno-class}\n\\alias{show,csAnno-method}\n\\alias{vennpie,csAnno-method}\n\\alias{plotDistToTSS,csAnno-method}\n\\alias{plotAnnoBar,csAnno-method}\n\\alias{plotAnnoPie,csAnno-method}\n\\alias{upsetplot,csAnno-method}\n\\alias{subset,csAnno-method}\n\\title{Class \"csAnno\"\nThis class represents the output of ChIPseeker Annotation}\n\\description{\nClass \"csAnno\"\nThis class represents the output of ChIPseeker Annotation\n}\n\\section{Slots}{\n\n\\describe{\n\\item{\\code{anno}}{annotation}\n\n\\item{\\code{tssRegion}}{TSS region}\n\n\\item{\\code{level}}{transcript or gene}\n\n\\item{\\code{hasGenomicAnnotation}}{logical}\n\n\\item{\\code{detailGenomicAnnotation}}{Genomic Annotation in detail}\n\n\\item{\\code{annoStat}}{annotation statistics}\n\n\\item{\\code{peakNum}}{number of peaks}\n}}\n\n\\seealso{\n\\code{\\link{annotatePeak}}\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n\\keyword{classes}\n"
  },
  {
    "path": "man/dot-ChIPseekerEnv.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\name{.ChIPseekerEnv}\n\\alias{.ChIPseekerEnv}\n\\title{env function for ChIPseeker}\n\\usage{\n.ChIPseekerEnv(TxDb, item = \"ChIPseekerEnv\", force = FALSE)\n}\n\\arguments{\n\\item{TxDb}{txdb object}\n\n\\item{item}{item name}\n\n\\item{force}{force to update txdb item in cache or not.}\n}\n\\description{\nenv function for ChIPseeker\n}\n"
  },
  {
    "path": "man/dotFun.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\name{.}\n\\alias{.}\n\\title{.}\n\\usage{\n.(..., .env = parent.frame())\n}\n\\arguments{\n\\item{...}{expression}\n\n\\item{.env}{environment}\n}\n\\value{\nexpression\n}\n\\description{\ncapture name of variable\n}\n\\examples{\nx <- 1\neval(.(x)[[1]])\n}\n"
  },
  {
    "path": "man/downloadGEObedFiles.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GEO.R\n\\name{downloadGEObedFiles}\n\\alias{downloadGEObedFiles}\n\\title{downloadGEObedFiles}\n\\usage{\ndownloadGEObedFiles(genome, destDir = getwd())\n}\n\\arguments{\n\\item{genome}{genome version}\n\n\\item{destDir}{destination folder}\n}\n\\description{\ndownload all BED files of a particular genome version\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/downloadGSMbedFiles.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GEO.R\n\\name{downloadGSMbedFiles}\n\\alias{downloadGSMbedFiles}\n\\title{downloadGSMbedFiles}\n\\usage{\ndownloadGSMbedFiles(GSM, destDir = getwd())\n}\n\\arguments{\n\\item{GSM}{GSM accession numbers}\n\n\\item{destDir}{destination folder}\n}\n\\description{\ndownload BED supplementary files of a list of GSM accession numbers\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/dropAnno.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/annotatePeak.R\n\\name{dropAnno}\n\\alias{dropAnno}\n\\title{dropAnno}\n\\usage{\ndropAnno(csAnno, distanceToTSS_cutoff = 10000)\n}\n\\arguments{\n\\item{csAnno}{output of annotatePeak}\n\n\\item{distanceToTSS_cutoff}{distance to TSS cutoff}\n}\n\\value{\ncsAnno object\n}\n\\description{\ndropAnno\n}\n\\details{\ndrop annotation exceeding distanceToTSS_cutoff\n}\n\\author{\nGuangchuang Yu\n}\n"
  },
  {
    "path": "man/enrichAnnoOverlap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/enrichOverlap.R\n\\name{enrichAnnoOverlap}\n\\alias{enrichAnnoOverlap}\n\\title{enrichAnnoOverlap}\n\\usage{\nenrichAnnoOverlap(\n  queryPeak,\n  targetPeak,\n  TxDb = NULL,\n  pAdjustMethod = \"BH\",\n  chainFile = NULL,\n  distanceToTSS_cutoff = NULL\n)\n}\n\\arguments{\n\\item{queryPeak}{query bed file}\n\n\\item{targetPeak}{target bed file(s) or folder containing bed files}\n\n\\item{TxDb}{TxDb}\n\n\\item{pAdjustMethod}{pvalue adjustment method}\n\n\\item{chainFile}{chain file for liftOver}\n\n\\item{distanceToTSS_cutoff}{restrict nearest gene annotation by distance cutoff}\n}\n\\value{\ndata.frame\n}\n\\description{\ncalcuate overlap significant of ChIP experiments based on their nearest gene annotation\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/enrichPeakOverlap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/enrichOverlap.R\n\\name{enrichPeakOverlap}\n\\alias{enrichPeakOverlap}\n\\title{enrichPeakOverlap}\n\\usage{\nenrichPeakOverlap(\n  queryPeak,\n  targetPeak,\n  TxDb = NULL,\n  pAdjustMethod = \"BH\",\n  nShuffle = 1000,\n  chainFile = NULL,\n  pool = TRUE,\n  mc.cores = detectCores() - 1,\n  verbose = TRUE\n)\n}\n\\arguments{\n\\item{queryPeak}{query bed file or GRanges object}\n\n\\item{targetPeak}{target bed file(s) or folder that containing bed files or a list of GRanges objects}\n\n\\item{TxDb}{TxDb}\n\n\\item{pAdjustMethod}{pvalue adjustment method}\n\n\\item{nShuffle}{shuffle numbers}\n\n\\item{chainFile}{chain file for liftOver}\n\n\\item{pool}{logical, whether pool target peaks}\n\n\\item{mc.cores}{number of cores, see \\link[parallel]{mclapply}}\n\n\\item{verbose}{logical}\n}\n\\value{\ndata.frame\n}\n\\description{\ncalculate overlap significant of ChIP experiments based on the genome coordinations\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getAnnoStat.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\name{getAnnoStat}\n\\alias{getAnnoStat}\n\\title{getAnnoStat}\n\\usage{\ngetAnnoStat(x)\n}\n\\arguments{\n\\item{x}{csAnno object}\n}\n\\description{\ngetting status of annotation\n}\n"
  },
  {
    "path": "man/getBioRegion.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getBioRegion}\n\\alias{getBioRegion}\n\\title{getBioRegion}\n\\usage{\ngetBioRegion(\n  TxDb = NULL,\n  upstream = 1000,\n  downstream = 1000,\n  by = \"gene\",\n  type = \"start_site\"\n)\n}\n\\arguments{\n\\item{TxDb}{TxDb}\n\n\\item{upstream}{upstream from start site or end site}\n\n\\item{downstream}{downstream from start site or end site}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n}\n\\value{\nGRanges object\n}\n\\description{\nprepare a bioregion of selected feature\n}\n\\details{\nthis function combined previous functions getPromoters(), getBioRegion() and getGeneBody() in order\nto solve the following issues.\n\n(1) \\url{https://github.com/GuangchuangYu/ChIPseeker/issues/16}\n\n(2) \\url{https://github.com/GuangchuangYu/ChIPseeker/issues/87}\n\nThe getBioRegion() function can prevoid a region of interest from\n\\code{txdb} object. There are three kinds of regions, \\code{start_site},\n\\code{end_site} and \\code{body}. \n\nWe take transcript region to expain the differences of these three regions.\ntx: chr1 1000 1400. \n\n\\code{body} region refers to the 1000-1400bp.\n\n\\code{start_site} region with \\code{upstream = 100, downstream = 100} refers to 900-1100bp. \n\n\\code{end_site} region with \\code{upstream = 100, downstream = 100} refers to 1300-1500bp.\n}\n\\author{\nGuangchuang Yu, Ming L\n}\n"
  },
  {
    "path": "man/getGEOInfo.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GEO.R\n\\name{getGEOInfo}\n\\alias{getGEOInfo}\n\\title{getGEOInfo}\n\\usage{\ngetGEOInfo(genome, simplify = TRUE)\n}\n\\arguments{\n\\item{genome}{genome version}\n\n\\item{simplify}{simplify result or not}\n}\n\\value{\ndata.frame\n}\n\\description{\nget subset of GEO information by genome version keyword\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getGEOgenomeVersion.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GEO.R\n\\name{getGEOgenomeVersion}\n\\alias{getGEOgenomeVersion}\n\\title{getGEOgenomeVersion}\n\\usage{\ngetGEOgenomeVersion()\n}\n\\value{\ndata.frame\n}\n\\description{\nget genome version statistics collecting from GEO ChIPseq data\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getGEOspecies.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GEO.R\n\\name{getGEOspecies}\n\\alias{getGEOspecies}\n\\title{getGEOspecies}\n\\usage{\ngetGEOspecies()\n}\n\\value{\ndata.frame\n}\n\\description{\naccessing species statistics collecting from GEO database\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getGeneAnno.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/addGeneAnno.R\n\\name{getGeneAnno}\n\\alias{getGeneAnno}\n\\title{getGeneAnno}\n\\usage{\ngetGeneAnno(annoDb, geneID, type, columns)\n}\n\\arguments{\n\\item{annoDb}{annotation package}\n\n\\item{geneID}{query geneID}\n\n\\item{type}{gene ID type}\n\n\\item{columns}{names of columns to be obtained from database}\n}\n\\value{\ndata.frame\n}\n\\description{\nget gene annotation, symbol, gene name etc.\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getGenomicAnnotation.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/getGenomicAnnotation.R\n\\name{getGenomicAnnotation}\n\\alias{getGenomicAnnotation}\n\\title{getGenomicAnnotation}\n\\usage{\ngetGenomicAnnotation(\n  peaks,\n  distance,\n  tssRegion = c(-3000, 3000),\n  TxDb,\n  level,\n  genomicAnnotationPriority,\n  sameStrand = FALSE\n)\n}\n\\arguments{\n\\item{peaks}{peaks in GRanges object}\n\n\\item{distance}{distance of peak to TSS}\n\n\\item{tssRegion}{tssRegion, default is -3kb to +3kb}\n\n\\item{TxDb}{TxDb object}\n\n\\item{level}{one of gene or transcript}\n\n\\item{genomicAnnotationPriority}{genomic Annotation Priority}\n\n\\item{sameStrand}{whether annotate gene in same strand}\n}\n\\value{\ncharacter vector\n}\n\\description{\nget Genomic Annotation of peaks\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getNearestFeatureIndicesAndDistances.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/getNearestFeatureIndicesAndDistances.R\n\\name{getNearestFeatureIndicesAndDistances}\n\\alias{getNearestFeatureIndicesAndDistances}\n\\title{getNearestFeatureIndicesAndDistances}\n\\usage{\ngetNearestFeatureIndicesAndDistances(\n  peaks,\n  features,\n  sameStrand = FALSE,\n  ignoreOverlap = FALSE,\n  ignoreUpstream = FALSE,\n  ignoreDownstream = FALSE,\n  overlap = \"TSS\"\n)\n}\n\\arguments{\n\\item{peaks}{peak in GRanges}\n\n\\item{features}{features in GRanges}\n\n\\item{sameStrand}{logical, whether find nearest gene in the same strand}\n\n\\item{ignoreOverlap}{logical, whether ignore overlap of TSS with peak}\n\n\\item{ignoreUpstream}{logical, if True only annotate gene at the 3' of the peak.}\n\n\\item{ignoreDownstream}{logical, if True only annotate gene at the 5' of the peak.}\n\n\\item{overlap}{one of \"TSS\" or \"all\"}\n}\n\\value{\nlist\n}\n\\description{\nget index of features that closest to peak and calculate distance\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getPromoters.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getPromoters}\n\\alias{getPromoters}\n\\title{getPromoters}\n\\usage{\ngetPromoters(TxDb = NULL, upstream = 1000, downstream = 1000, by = \"gene\")\n}\n\\arguments{\n\\item{TxDb}{TxDb}\n\n\\item{upstream}{upstream from TSS site}\n\n\\item{downstream}{downstream from TSS site}\n\n\\item{by}{one of gene or transcript}\n}\n\\value{\nGRanges object\n}\n\\description{\nprepare the promoter regions\n}\n"
  },
  {
    "path": "man/getSampleFiles.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\name{getSampleFiles}\n\\alias{getSampleFiles}\n\\title{getSampleFiles}\n\\usage{\ngetSampleFiles()\n}\n\\value{\nlist of file names\n}\n\\description{\nget filenames of sample files\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getTagMatrix.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getTagMatrix}\n\\alias{getTagMatrix}\n\\title{getTagMatrix}\n\\usage{\ngetTagMatrix(\n  peak,\n  upstream,\n  downstream,\n  windows,\n  type,\n  by,\n  TxDb = NULL,\n  weightCol = NULL,\n  nbin = NULL,\n  verbose = TRUE,\n  ignore_strand = FALSE\n)\n}\n\\arguments{\n\\item{peak}{peak peak file or GRanges object}\n\n\\item{upstream}{the distance of upstream extension}\n\n\\item{downstream}{the distance of downstream extension}\n\n\\item{windows}{a collection of region}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users}\n\n\\item{TxDb}{TxDb or self-made granges object, served as txdb}\n\n\\item{weightCol}{column name of weight, default is NULL}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{verbose}{print message or not}\n\n\\item{ignore_strand}{ignore the strand information or not}\n}\n\\value{\ntagMatrix\n}\n\\description{\ncalculate the tag matrix\n}\n\\details{\n\\code{getTagMatrix()} function can produce the matrix for visualization.\n\\code{peak} stands for the peak file. \n\\code{window} stands for a collection of regions that users want to look into. \nUsers can use \\code{window} to capture the peak of interest.\nThere are two ways to input \\code{window}. \n\nThe first way is that users can use\n\\code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()} to \nget \\code{window} and put it into \\code{getTagMatrix()}. \n\nThe second way is that users can use \\code{getTagMatrix()} to\ncall \\code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()}. In this way\nusers do not need to input \\code{window} parameter but they need to input\n\\code{txdb}. \n\n\\code{txdb} is a set of packages contained annotation \nof regions of different genomes. Users can\nget the regions of interest through specific functions. These specific functions\nare built in \\code{getPromoters()/getBioRegion()}. Many regions can not be gain\nthrough \\code{txdb}, like insulator and enhancer regions. \nUsers can provide these regions in the form of granges object. \nThese self-made granges object will be passed to \\code{TxDb} parameter and they will\nbe passed to \\code{makeBioRegionFromGranges()} to produce the \\code{window}.\nIn a word, \\code{TxDb} parameter is a reference information. Users can\npass \\code{txdb object} or self-made granges into it.\n\nDetails see \\code{\\link{getPromoters}},\\code{\\link{getBioRegion}} and \\code{\\link{makeBioRegionFromGranges}}\n\n\\code{upstream} and \\code{downstream} parameter have different usages:\n\n(1) \\code{window} parameter is provided, \n\nif \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \nthe flank of body region.\n\nif \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} do not\nplay a role in \\code{getTagMatrix()} function.\n\n(2) \\code{window} parameter is missing,\n\nif \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \nthe flank of body region.\n\nif \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\nthe upstream and downstream of the start_site or the end_site.\n\n\\code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details\nsee \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n\n\\code{nbin} refers to the number of bins. \\code{getTagMatrix()} provide a binning method\nto get the tag matrix.\n}\n"
  },
  {
    "path": "man/getTagMatrix.binning.internal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getTagMatrix.binning.internal}\n\\alias{getTagMatrix.binning.internal}\n\\title{getTagMatrix.binning.internal}\n\\usage{\ngetTagMatrix.binning.internal(\n  peak,\n  weightCol = NULL,\n  windows,\n  nbin = 800,\n  upstream = NULL,\n  downstream = NULL,\n  ignore_strand = FALSE\n)\n}\n\\arguments{\n\\item{peak}{peak peak file or GRanges object}\n\n\\item{weightCol}{weightCol column name of weight, default is NULL}\n\n\\item{windows}{windows a collection of region with equal or not equal size, eg. promoter region, gene region.}\n\n\\item{nbin}{the amount of nbines needed to be splited and it should not be more than min_body_length}\n\n\\item{upstream}{rel object, NULL or actual number}\n\n\\item{downstream}{rel object, NULL or actual number}\n\n\\item{ignore_strand}{ignore the strand information or not}\n}\n\\value{\ntagMatrix\n}\n\\description{\ncalculate the tagMatrix by binning\nthe idea was derived from the function of deeptools\nhttps://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html\n}\n"
  },
  {
    "path": "man/getTagMatrix.internal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getTagMatrix.internal}\n\\alias{getTagMatrix.internal}\n\\title{getTagMatrix.internal}\n\\usage{\ngetTagMatrix.internal(peak, weightCol = NULL, windows, ignore_strand = FALSE)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{weightCol}{column name of weight, default is NULL}\n\n\\item{windows}{a collection of region with equal size, eg. promoter region.}\n\n\\item{ignore_strand}{ignore the strand information or not}\n}\n\\value{\ntagMatrix\n}\n\\description{\ncalculate the tag matrix\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/getTagMatrix2.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getTagMatrix2}\n\\alias{getTagMatrix2}\n\\title{getTagMatrix2}\n\\usage{\ngetTagMatrix2(\n  peak,\n  upstream,\n  downstream,\n  windows_name,\n  type,\n  by,\n  TxDb = NULL,\n  weightCol = NULL,\n  nbin = NULL,\n  verbose = TRUE,\n  ignore_strand = FALSE\n)\n}\n\\arguments{\n\\item{peak}{peak peak file or GRanges object}\n\n\\item{upstream}{the distance of upstream extension}\n\n\\item{downstream}{the distance of downstream extension}\n\n\\item{windows_name}{the names of windows}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users}\n\n\\item{TxDb}{TxDb or self-made granges object, served as txdb}\n\n\\item{weightCol}{column name of weight, default is NULL}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{verbose}{print message or not}\n\n\\item{ignore_strand}{ignore the strand information or not}\n}\n\\value{\ntagMatrix\n}\n\\description{\nNested function for getTagMatrix() to deal with multiple windows\n}\n\\details{\nThis is an internal function.\n}\n"
  },
  {
    "path": "man/getTagMatrix2.binning.internal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getTagMatrix2.binning.internal}\n\\alias{getTagMatrix2.binning.internal}\n\\title{internal function}\n\\usage{\ngetTagMatrix2.binning.internal(\n  peak,\n  weightCol = NULL,\n  windows,\n  windows_name,\n  nbin = 800,\n  upstream = NULL,\n  downstream = NULL,\n  ignore_strand = FALSE\n)\n}\n\\arguments{\n\\item{peak}{peak peak file or GRanges object}\n\n\\item{weightCol}{column name of weight, default is NULL}\n\n\\item{windows}{a collection of region}\n\n\\item{windows_name}{the name of windows}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{upstream}{the distance of upstream extension}\n\n\\item{downstream}{the distance of downstream extension}\n\n\\item{ignore_strand}{ignore the strand information or not}\n}\n\\description{\ninternal function\n}\n"
  },
  {
    "path": "man/getTagMatrix2.internal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{getTagMatrix2.internal}\n\\alias{getTagMatrix2.internal}\n\\title{getTagMatrix2.internal}\n\\usage{\ngetTagMatrix2.internal(\n  peak,\n  weightCol = NULL,\n  windows,\n  windows_name,\n  ignore_strand = FALSE\n)\n}\n\\arguments{\n\\item{peak}{peak peak file or GRanges object}\n\n\\item{weightCol}{column name of weight, default is NULL}\n\n\\item{windows}{a collection of region}\n\n\\item{windows_name}{the name of windows}\n\n\\item{ignore_strand}{ignore the strand information or not}\n}\n\\description{\ngetTagMatrix2.internal\n}\n"
  },
  {
    "path": "man/info.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ChIPseeker-package.R\n\\docType{data}\n\\name{info}\n\\alias{info}\n\\alias{ucsc_release}\n\\alias{gsminfo}\n\\alias{tagMatrixList}\n\\title{Information Datasets}\n\\description{\nucsc genome version, precalcuated data and gsm information\n}\n\\keyword{datasets}\n"
  },
  {
    "path": "man/makeBioRegionFromGranges.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/tagMatrix.R\n\\name{makeBioRegionFromGranges}\n\\alias{makeBioRegionFromGranges}\n\\title{makeBioRegionFromGranges}\n\\usage{\nmakeBioRegionFromGranges(gr, by, type, upstream = 1000, downstream = 1000)\n}\n\\arguments{\n\\item{gr}{a grange object contain region of interest}\n\n\\item{by}{specify be users, e.g. gene, insulator, enhancer}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{upstream}{upstream from start site or end site, can be NULL if the type == 'body'}\n\n\\item{downstream}{downstream from start site or end site, can be NULL if the type == 'body'}\n}\n\\value{\nGRanges object\n}\n\\description{\nmake windows from granges object\n}\n\\details{\n\\code{makeBioRegionFromGranges()} function can make bioregion from granges object.\n\nThe differences between \\code{makeBioRegionFromGranges()} and \\code{getBioRegion()} is that\n\\code{getBioRegion()} get the region object from \\code{txdb} object but\n\\code{makeBioRegionFromGranges()} get the region from the granges object provided by users.\nFor example, \\code{txdb} object do not contain insulator or enhancer regions. Users can\nprovide these regions through self-made granges object\n\nThere are three kinds of regions, \\code{start_site}, \\code{end_site} and \\code{body}. \n\nWe take enhancer region to explain the differences of these three regions.\nenhancer: chr1 1000 1400. \n\n\\code{body} region refers to the 1000-1400bp.\n\n\\code{start_site} region with \\code{upstream = 100, downstream = 100} refers to 900-1100bp. \n\n\\code{end_site} region with \\code{upstream = 100, downstream = 100} refers to 1300-1500bp.\n\nIn \\code{makeBioRegionFromGranges()}, \\code{upstream} and \\code{downstream} can be\n\\code{NULL} if the \\code{type == 'body'}. \\code{by} should be specified by users and \ncan not be omitted. \\code{by} parameter will be used to made labels. \\code{type} should also\nbe specified.\n\n\\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}\n}\n"
  },
  {
    "path": "man/make_label.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\name{make_label}\n\\alias{make_label}\n\\title{make label for figures}\n\\usage{\nmake_label(type, by)\n}\n\\arguments{\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}\n}\n\\description{\nmake label for figures\n}\n"
  },
  {
    "path": "man/overlap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\name{overlap}\n\\alias{overlap}\n\\title{overlap}\n\\usage{\noverlap(Sets)\n}\n\\arguments{\n\\item{Sets}{a list of objects}\n}\n\\value{\ndata.frame\n}\n\\description{\ncalculate the overlap matrix, which is useful for vennplot\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/peakHeatmap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{peakHeatmap}\n\\alias{peakHeatmap}\n\\title{peakHeatmap}\n\\usage{\npeakHeatmap(\n  peak,\n  weightCol = NULL,\n  TxDb = NULL,\n  upstream = 1000,\n  downstream = 1000,\n  xlab = \"\",\n  ylab = \"\",\n  title = NULL,\n  palette = NULL,\n  verbose = TRUE,\n  by = \"gene\",\n  type = \"start_site\",\n  nbin = NULL,\n  ignore_strand = FALSE,\n  windows,\n  ncol = NULL,\n  nrow = NULL\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{palette}{palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}}\n\n\\item{verbose}{print message or not}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{windows}{a collection of region}\n\n\\item{ncol}{the ncol of plotting a list of peak}\n\n\\item{nrow}{the nrow of plotting a list of peak}\n}\n\\value{\nfigure\n}\n\\description{\nplot the heatmap of peaks\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/peakHeatmap_multiple_Sets.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{peakHeatmap_multiple_Sets}\n\\alias{peakHeatmap_multiple_Sets}\n\\title{peakHeatmap}\n\\usage{\npeakHeatmap_multiple_Sets(\n  peak,\n  weightCol = NULL,\n  TxDb = NULL,\n  upstream = 1000,\n  downstream = 1000,\n  xlab = \"\",\n  ylab = \"\",\n  title = NULL,\n  palette = NULL,\n  verbose = TRUE,\n  by = \"gene\",\n  type = \"start_site\",\n  nbin = NULL,\n  ignore_strand = FALSE,\n  windows_name = NULL,\n  ncol = NULL,\n  nrow = NULL,\n  facet_label_text_size = 12\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{palette}{palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}}\n\n\\item{verbose}{print message or not}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{windows_name}{the name for each window, which will also be showed in the picture as labels}\n\n\\item{ncol}{the ncol of plotting a list of peak}\n\n\\item{nrow}{the nrow of plotting a list of peak}\n\n\\item{facet_label_text_size}{the size of facet label text}\n}\n\\value{\nfigure\n}\n\\description{\nplot the heatmap of peaks align to a sets of regions\n}\n"
  },
  {
    "path": "man/peak_Profile_Heatmap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{peak_Profile_Heatmap}\n\\alias{peak_Profile_Heatmap}\n\\title{peak_Profile_Heatmap}\n\\usage{\npeak_Profile_Heatmap(\n  peak,\n  weightCol = NULL,\n  TxDb = NULL,\n  upstream = 1000,\n  downstream = 1000,\n  xlab = \"\",\n  ylab = \"\",\n  title = NULL,\n  palette = NULL,\n  verbose = TRUE,\n  by = \"gene\",\n  type = \"start_site\",\n  nbin = NULL,\n  ignore_strand = FALSE,\n  windows_name = NULL,\n  ncol = NULL,\n  nrow = NULL,\n  facet_label_text_size = 12,\n  conf,\n  facet = \"row\",\n  free_y = TRUE,\n  height_proportion = 4\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{palette}{palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}}\n\n\\item{verbose}{print message or not}\n\n\\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{windows_name}{the name for each window, which will also be showed in the picture as labels}\n\n\\item{ncol}{the ncol of plotting a list of peak}\n\n\\item{nrow}{the nrow of plotting a list of peak}\n\n\\item{facet_label_text_size}{the size of facet label text}\n\n\\item{conf}{confidence interval}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{height_proportion}{the proportion of profiling picture and heatmap}\n}\n\\description{\nplot peak heatmap and profile in a picture\n}\n"
  },
  {
    "path": "man/plotAnnoBar-methods.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/AllGenerics.R, R/csAnno.R\n\\docType{methods}\n\\name{plotAnnoBar}\n\\alias{plotAnnoBar}\n\\alias{plotAnnoBar,list-method}\n\\alias{plotAnnoBar,csAnno,ANY-method}\n\\title{plotAnnoBar method generics}\n\\usage{\nplotAnnoBar(\n  x,\n  xlab = \"\",\n  ylab = \"Percentage(\\%)\",\n  title = \"Feature Distribution\",\n  ...\n)\n\n\\S4method{plotAnnoBar}{list}(\n  x,\n  xlab = \"\",\n  ylab = \"Percentage(\\%)\",\n  title = \"Feature Distribution\",\n  ...\n)\n\nplotAnnoBar(x, xlab=\"\", ylab='Percentage(\\%)',title=\"Feature Distribution\", ...)\n}\n\\arguments{\n\\item{x}{\\code{csAnno} instance}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{...}{additional paramter}\n}\n\\value{\nplot\n}\n\\description{\nplotAnnoBar method for \\code{csAnno} instance\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/plotAnnoBar.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotAnno.R\n\\name{plotAnnoBar.data.frame}\n\\alias{plotAnnoBar.data.frame}\n\\title{plotAnnoBar.data.frame}\n\\usage{\nplotAnnoBar.data.frame(\n  anno.df,\n  xlab = \"\",\n  ylab = \"Percentage(\\%)\",\n  title = \"Feature Distribution\",\n  categoryColumn\n)\n}\n\\arguments{\n\\item{anno.df}{annotation stats}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{plot title}\n\n\\item{categoryColumn}{category column}\n}\n\\value{\nbar plot that summarize genomic features of peaks\n}\n\\description{\nplot feature distribution based on their chromosome region\n}\n\\details{\nplot chromosome region features\n}\n\\seealso{\n\\code{\\link{annotatePeak}} \\code{\\link{plotAnnoPie}}\n}\n\\author{\nGuangchuang Yu \\url{https://yulab-smu.top}\n}\n"
  },
  {
    "path": "man/plotAnnoPie-methods.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/AllGenerics.R, R/csAnno.R\n\\docType{methods}\n\\name{plotAnnoPie}\n\\alias{plotAnnoPie}\n\\alias{plotAnnoPie,csAnno,ANY-method}\n\\title{plotAnnoPie method generics}\n\\usage{\nplotAnnoPie(\n  x,\n  ndigit = 2,\n  cex = 0.9,\n  col = NA,\n  legend.position = \"rightside\",\n  pie3D = FALSE,\n  radius = 0.8,\n  ...\n)\n\nplotAnnoPie(x,ndigit=2,cex=0.9,col=NA,legend.position=\"rightside\",pie3D=FALSE,radius=0.8,...)\n}\n\\arguments{\n\\item{x}{\\code{csAnno} instance}\n\n\\item{ndigit}{number of digit to round}\n\n\\item{cex}{label cex}\n\n\\item{col}{color}\n\n\\item{legend.position}{topright or other.}\n\n\\item{pie3D}{plot in 3D or not}\n\n\\item{radius}{radius of the pie}\n\n\\item{...}{extra parameter}\n}\n\\value{\nplot\n}\n\\description{\nplotAnnoPie method for \\code{csAnno} instance\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/plotAnnoPie.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotAnno.R\n\\name{plotAnnoPie.csAnno}\n\\alias{plotAnnoPie.csAnno}\n\\title{plotAnnoPie}\n\\usage{\nplotAnnoPie.csAnno(\n  x,\n  ndigit = 2,\n  cex = 0.8,\n  col = NA,\n  legend.position = \"rightside\",\n  pie3D = FALSE,\n  radius = 0.8,\n  ...\n)\n}\n\\arguments{\n\\item{x}{csAnno object}\n\n\\item{ndigit}{number of digit to round}\n\n\\item{cex}{label cex}\n\n\\item{col}{color}\n\n\\item{legend.position}{topright or other.}\n\n\\item{pie3D}{plot in 3D or not}\n\n\\item{radius}{radius of Pie}\n\n\\item{...}{extra parameter}\n}\n\\value{\npie plot of peak genomic feature annotation\n}\n\\description{\npieplot from peak genomic annotation\n}\n\\examples{\n\\dontrun{\nrequire(TxDb.Hsapiens.UCSC.hg19.knownGene)\ntxdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\npeakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"chipseeker\")\npeakAnno <- annotatePeak(peakfile, TxDb=txdb)\nplotAnnoPie(peakAnno)\n}\n}\n\\seealso{\n\\code{\\link{annotatePeak}} \\code{\\link{plotAnnoBar}}\n}\n\\author{\nGuangchuang Yu \\url{https://yulab-smu.top}\n}\n"
  },
  {
    "path": "man/plotAvgProf.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotAvgProf}\n\\alias{plotAvgProf}\n\\title{plotAvgProf}\n\\usage{\nplotAvgProf(\n  tagMatrix,\n  xlim,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  conf,\n  facet = \"none\",\n  free_y = TRUE,\n  origin_label = \"TSS\",\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix or a list of tagMatrix}\n\n\\item{xlim}{xlim}\n\n\\item{xlab}{x label}\n\n\\item{ylab}{y label}\n\n\\item{conf}{confidence interval}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{origin_label}{label of the center}\n\n\\item{verbose}{print message or not}\n\n\\item{...}{additional parameter}\n}\n\\value{\nggplot object\n}\n\\description{\nplot the profile of peaks\n}\n\\author{\nG Yu; Y Yan\n}\n"
  },
  {
    "path": "man/plotAvgProf.binning.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotAvgProf.binning}\n\\alias{plotAvgProf.binning}\n\\title{plotAvgProf.binning}\n\\usage{\nplotAvgProf.binning(\n  tagMatrix,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  conf,\n  facet = \"none\",\n  free_y = TRUE,\n  upstream = NULL,\n  downstream = NULL,\n  label,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix or a list of tagMatrix}\n\n\\item{xlab}{x label}\n\n\\item{ylab}{y label}\n\n\\item{conf}{confidence interval}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled}\n\n\\item{upstream}{rel object reflects the percentage of flank extension, e.g rel(0.2)\ninteger reflects the actual length of flank extension or TSS region\nNULL reflects the gene body with no extension}\n\n\\item{downstream}{rel object reflects the percentage of flank extension, e.g rel(0.2)\ninteger reflects the actual length of flank extension or TSS region\nNULL reflects the gene body with no extension}\n\n\\item{label}{label}\n\n\\item{...}{additional parameter}\n}\n\\value{\nggplot object\n}\n\\description{\nplot the profile of peaks  by binning\n}\n"
  },
  {
    "path": "man/plotAvgProf2.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotAvgProf2}\n\\alias{plotAvgProf2}\n\\title{plotAvgProf}\n\\usage{\nplotAvgProf2(\n  peak,\n  weightCol = NULL,\n  TxDb = NULL,\n  upstream = 1000,\n  downstream = 1000,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  conf,\n  facet = \"none\",\n  free_y = TRUE,\n  verbose = TRUE,\n  ignore_strand = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{conf}{confidence interval}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{verbose}{print message or not}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{...}{additional parameter}\n}\n\\value{\nggplot object\n}\n\\description{\nplot the profile of peaks that align to flank sequences of TSS\n}\n\\details{\nThis function is the old function of \\code{plotPeakProf2}. It can\nonly plot the start site region of gene.\n}\n\\author{\nG Yu, Ming L\n}\n"
  },
  {
    "path": "man/plotDistToTSS-methods.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/AllGenerics.R, R/csAnno.R\n\\docType{methods}\n\\name{plotDistToTSS}\n\\alias{plotDistToTSS}\n\\alias{plotDistToTSS,list-method}\n\\alias{plotDistToTSS,csAnno,ANY-method}\n\\title{plotDistToTSS method generics}\n\\usage{\nplotDistToTSS(\n  x,\n  distanceColumn = \"distanceToTSS\",\n  xlab = \"\",\n  ylab = \"Binding sites (\\%) (5'->3')\",\n  title = \"Distribution of transcription factor-binding loci relative to TSS\",\n  ...\n)\n\n\\S4method{plotDistToTSS}{list}(\n  x,\n  distanceColumn = \"distanceToTSS\",\n  xlab = \"\",\n  ylab = \"Binding sites (\\%) (5'->3')\",\n  title = \"Distribution of transcription factor-binding loci relative to TSS\",\n  distanceBreaks = c(0, 1000, 3000, 5000, 10000, 1e+05),\n  palette = NULL,\n  ...\n)\n\nplotDistToTSS(x,distanceColumn=\"distanceToTSS\", xlab=\"\",\nylab=\"Binding sites (\\%) (5'->3')\",\ntitle=\"Distribution of transcription factor-binding loci relative to TSS\",...)\n}\n\\arguments{\n\\item{x}{\\code{csAnno} instance}\n\n\\item{distanceColumn}{distance column name}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{...}{additional parameter}\n\n\\item{distanceBreaks}{breaks of distance, default is 'c(0, 1000, 3000, 5000, 10000, 100000)'}\n\n\\item{palette}{palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.}\n}\n\\value{\nplot\n}\n\\description{\nplotDistToTSS method for \\code{csAnno} instance\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/plotDistToTSS.data.frame.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotDistToTSS.R\n\\name{plotDistToTSS.data.frame}\n\\alias{plotDistToTSS.data.frame}\n\\title{plotDistToTSS.data.frame}\n\\usage{\nplotDistToTSS.data.frame(\n  peakDist,\n  distanceColumn = \"distanceToTSS\",\n  distanceBreaks = c(0, 1000, 3000, 5000, 10000, 1e+05),\n  palette = NULL,\n  xlab = \"\",\n  ylab = \"Binding sites (\\%) (5'->3')\",\n  title = \"Distribution of transcription factor-binding loci relative to TSS\",\n  categoryColumn = \".id\"\n)\n}\n\\arguments{\n\\item{peakDist}{peak annotation}\n\n\\item{distanceColumn}{column name of the distance from peak to nearest gene}\n\n\\item{distanceBreaks}{default is 'c(0, 1000, 3000, 5000, 10000, 100000)'}\n\n\\item{palette}{palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.}\n\n\\item{xlab}{x label}\n\n\\item{ylab}{y lable}\n\n\\item{title}{figure title}\n\n\\item{categoryColumn}{category column, default is \".id\"}\n}\n\\value{\nbar plot that summarize distance from peak to\nTSS of the nearest gene.\n}\n\\description{\nplot feature distribution based on the distances to the TSS\n}\n\\examples{\n\\dontrun{\nrequire(TxDb.Hsapiens.UCSC.hg19.knownGene)\ntxdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\npeakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"ChIPseeker\")\npeakAnno <- annotatePeak(peakfile, TxDb=txdb)\nplotDistToTSS(peakAnno)\n}\n}\n\\seealso{\n\\code{\\link{annotatePeak}}\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/plotMultiProf.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotMultiProf}\n\\alias{plotMultiProf}\n\\title{internal function for plotPeakProf_MultiWindows}\n\\usage{\nplotMultiProf(\n  tagMatrix,\n  conf,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  facet = \"none\",\n  free_y = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix}\n\n\\item{conf}{confidence interval}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{...}{additional parameter}\n}\n\\description{\ninternal function for plotPeakProf_MultiWindows\n}\n"
  },
  {
    "path": "man/plotMultiProf.binning.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotMultiProf.binning}\n\\alias{plotMultiProf.binning}\n\\title{internal function}\n\\usage{\nplotMultiProf.binning(\n  tagMatrix,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  conf,\n  facet = \"none\",\n  free_y = TRUE,\n  upstream = NULL,\n  downstream = NULL,\n  label,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{conf}{confidence interval}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{upstream}{the upstream extension}\n\n\\item{downstream}{the downstream extension}\n\n\\item{label}{the label of the center}\n\n\\item{...}{additional parameter}\n}\n\\description{\ninternal function\n}\n"
  },
  {
    "path": "man/plotMultiProf.binning.internal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotMultiProf.binning.internal}\n\\alias{plotMultiProf.binning.internal}\n\\title{internal function}\n\\usage{\nplotMultiProf.binning.internal(\n  tagMatrix,\n  conf,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  facet = \"none\",\n  free_y = TRUE,\n  upstream = NULL,\n  downstream = NULL,\n  label,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix}\n\n\\item{conf}{confidence interval}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{upstream}{the upstream extension}\n\n\\item{downstream}{the downstream extension}\n\n\\item{label}{the label of the center}\n\n\\item{...}{additional parameter}\n}\n\\description{\ninternal function\n}\n"
  },
  {
    "path": "man/plotMultiProf.normal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotMultiProf.normal}\n\\alias{plotMultiProf.normal}\n\\title{internal function}\n\\usage{\nplotMultiProf.normal(\n  tagMatrix,\n  xlim,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  conf,\n  facet = \"none\",\n  free_y = TRUE,\n  origin_label = \"TSS\",\n  verbose = TRUE,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix}\n\n\\item{xlim}{xlim}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{conf}{confidence interval}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{origin_label}{the label of the center}\n\n\\item{verbose}{print message or not}\n\n\\item{...}{additional parameter}\n}\n\\description{\ninternal function\n}\n"
  },
  {
    "path": "man/plotMultiProf.normal.internal.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotMultiProf.normal.internal}\n\\alias{plotMultiProf.normal.internal}\n\\title{internal function}\n\\usage{\nplotMultiProf.normal.internal(\n  tagMatrix,\n  conf,\n  xlim = c(-3000, 3000),\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  facet = \"row\",\n  free_y = TRUE,\n  origin_label,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix}\n\n\\item{conf}{confidence interval}\n\n\\item{xlim}{xlim}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{origin_label}{the label of the center}\n\n\\item{...}{additional parameter}\n}\n\\description{\ninternal function\n}\n"
  },
  {
    "path": "man/plotPeakProf.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotPeakProf}\n\\alias{plotPeakProf}\n\\title{plotPeakProf_MultiWindows}\n\\usage{\nplotPeakProf(\n  tagMatrix = NULL,\n  peak,\n  upstream,\n  downstream,\n  conf,\n  by,\n  type,\n  windows_name = NULL,\n  weightCol = NULL,\n  TxDb = NULL,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  facet = \"row\",\n  free_y = TRUE,\n  verbose = TRUE,\n  nbin = NULL,\n  ignore_strand = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix or a list of tagMatrix}\n\n\\item{peak}{peak file or GRanges object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{conf}{confidence interval}\n\n\\item{by}{feature of interest}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{windows_name}{the name for each window, which will also be showed in the picture as labels}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object or self-made granges objects}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{verbose}{print message or not}\n\n\\item{nbin}{the amount of bines}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{...}{additional parameter}\n}\n\\value{\nggplot object\n}\n\\description{\nplot the profile of peaks\n`\n\\code{plotPeakProf_MultiWindows()} is almost the same as \\code{plotPeakProf2()}, having\nthe main difference of accepting two or more granges objects. Accepting more\ngranges objects can help compare the same peaks in different windows.\n}\n\\details{\n\\code{TxDb} parameter can accept txdb object.\nBut many regions can not be obtained by txdb object. In this case,\nUsers can provide self-made granges served the same role \nas txdb object and pass to \\code{TxDb} object.\n\n\\code{by} the features of interest. \n\n(1) if users use \\code{txdb}, \\code{by} can be one of 'gene', 'transcript', 'exon', \n'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.\n\n(2) if users use self-made granges object, \\code{by} can be everything. Because this \\code{by}\nwill not pass to functions to get features, which is different from the case of using \ntxdb object. This \\code{by} is only used to made labels showed in picture.\n\n\\code{type} means the property of the region. one of the \"start site\",\n\"end site\" and \"body\".\n\n\\code{upstream} and \\code{downstream} parameter have different usages:\n\n(1) if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \nthe flank of body region.\n\n(2) if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\nthe upstream and downstream of the start_site or the end_site.\n\n\\code{weightCol} refers to column in peak file. This column acts as a weight value. Details\nsee \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n\n\\code{nbin} refers to the number of bins. \\code{getTagMatrix()} provide a binning method\nto get the tag matrix.\n\nThere are two ways input a list of window.\n\n(1) Users can input a list of self-made granges objects\n\n(2) Users can input a list of \\code{by} and only one \\code{type}. In this way, \n\\code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \\code{by} and \\code{type}.\n\nWarning: \n\n(1) All of these window should be the same type. It means users can only\ncompare a list of \"start site\"/\"end site\"/\"body region\" with the same upstream\nand downstream.\n\n(2) So it will be only one \\code{type} and several \\code{by}.\n\n(3) Users can make window by txdb object or self-made granges object. Users can only\nchoose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the\nway of using txdb object. User can input any \\code{by} in the way of using \nself-made granges object.\n\n(4) Users can mingle the \\code{by} designed for the two ways. \\code{plotPeakProf_MultiWindows} can\naccpet the hybrid \\code{by}. But the above rules should be followed.\n\n\\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}\n}\n"
  },
  {
    "path": "man/plotPeakProf2.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotPeakProf2}\n\\alias{plotPeakProf2}\n\\title{plotPeakProf2}\n\\usage{\nplotPeakProf2(\n  peak,\n  upstream,\n  downstream,\n  conf,\n  by,\n  type,\n  weightCol = NULL,\n  TxDb = NULL,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  facet = \"none\",\n  free_y = TRUE,\n  verbose = TRUE,\n  nbin = NULL,\n  ignore_strand = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{conf}{confidence interval}\n\n\\item{by}{e.g. 'gene', 'transcript', 'exon' or features of interest(e.g. \"enhancer\")}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object, or self-made granges object}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{verbose}{print message or not}\n\n\\item{nbin}{the amount of nbines}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{...}{additional parameter}\n}\n\\value{\nggplot object\n}\n\\description{\nplot the profile of peaks automatically\n}\n\\details{\n\\code{peak} stands for the peak file. \n\n\\code{by} the features of interest. \n\n(1) if users use \\code{txdb}, \\code{by} can be one of 'gene', 'transcript', 'exon', \n'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.\n\n(2) if users use self-made granges object, \\code{by} can be everything. Because this \\code{by}\nwill not pass to functions to get features, which is different from the case of using \ntxdb object. This \\code{by} is only used to made labels showed in picture.\n\n\\code{type} means the property of the region. one of the \"start site\",\n\"end site\" and \"body\".\n\n\\code{upstream} and \\code{downstream} parameter have different usages:\n\n(1) if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \nthe flank of body region.\n\n(2) if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\nthe upstream and downstream of the start_site or the end_site.\n\n\\code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details\nsee \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n\n\\code{nbin} refers to the number of bins, providing a binning method\nto get the tag matrix.\n\n\\code{TxDb} parameter can accept txdb object.\nBut many regions can not be obtained by txdb object. In this case,\nUsers can provide self-made granges served the same role \nas txdb object and pass to \\code{TxDb} object.\n\n\\code{plotPeakProf2()} is different from the \\code{plotPeakProf()}. \\code{plotPeakProf2()} do not\nneed to provide \\code{window} parameter, which means \\code{plotPeakProf2()} will call relevent\nfunctions to make \\code{window} automatically.\n}\n\\author{\nG Yu, Ming Li\n}\n"
  },
  {
    "path": "man/plotPeakProf_MultiWindows.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{plotPeakProf_MultiWindows}\n\\alias{plotPeakProf_MultiWindows}\n\\title{plotPeakProf_MultiWindows}\n\\usage{\nplotPeakProf_MultiWindows(\n  peak,\n  upstream,\n  downstream,\n  conf,\n  by,\n  type,\n  windows_name = NULL,\n  weightCol = NULL,\n  TxDb = NULL,\n  xlab = \"Genomic Region (5'->3')\",\n  ylab = \"Peak Count Frequency\",\n  facet = \"row\",\n  free_y = TRUE,\n  verbose = TRUE,\n  nbin = NULL,\n  ignore_strand = FALSE,\n  ...\n)\n}\n\\arguments{\n\\item{peak}{peak file or GRanges object}\n\n\\item{upstream}{upstream position}\n\n\\item{downstream}{downstream position}\n\n\\item{conf}{confidence interval}\n\n\\item{by}{feature of interest}\n\n\\item{type}{one of \"start_site\", \"end_site\", \"body\"}\n\n\\item{windows_name}{the name for each window, which will also be showed in the picture as labels}\n\n\\item{weightCol}{column name of weight}\n\n\\item{TxDb}{TxDb object or self-made granges objects}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{facet}{one of 'none', 'row' and 'column'}\n\n\\item{free_y}{if TRUE, y will be scaled by AvgProf}\n\n\\item{verbose}{print message or not}\n\n\\item{nbin}{the amount of bines}\n\n\\item{ignore_strand}{ignore the strand information or not}\n\n\\item{...}{additional parameter}\n}\n\\value{\nggplot object\n}\n\\description{\nplot the profile of peaks in two or more windows\n}\n\\details{\nThis function comes from \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}\n`\n\\code{plotPeakProf_MultiWindows()} is almost the same as \\code{plotPeakProf2()}, having\nthe main difference of accepting two or more granges objects. Accepting more\ngranges objects can help compare the same peaks in different windows.\n\n\\code{TxDb} parameter can accept txdb object.\nBut many regions can not be obtained by txdb object. In this case,\nUsers can provide self-made granges served the same role \nas txdb object and pass to \\code{TxDb} object.\n\n\\code{by} the features of interest. \n\n(1) if users use \\code{txdb}, \\code{by} can be one of 'gene', 'transcript', 'exon', \n'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.\n\n(2) if users use self-made granges object, \\code{by} can be everything. Because this \\code{by}\nwill not pass to functions to get features, which is different from the case of using \ntxdb object. This \\code{by} is only used to made labels showed in picture.\n\n\\code{type} means the property of the region. one of the \"start site\",\n\"end site\" and \"body\".\n\n\\code{upstream} and \\code{downstream} parameter have different usages:\n\n(1) if \\code{type == 'body'}, \\code{upstream} and \\code{downstream} can use to extend \nthe flank of body region.\n\n(2) if \\code{type == 'start_site'/'end_site'}, \\code{upstream} and \\code{downstream} refer to\nthe upstream and downstream of the start_site or the end_site.\n\n\\code{weightCol} refers to column in peak file. This column acts as a weight value. Details\nsee \\url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}\n\n\\code{nbin} refers to the number of bins. \\code{getTagMatrix()} provide a binning method\nto get the tag matrix.\n\nThere are two ways input a list of window.\n\n(1) Users can input a list of self-made granges objects\n\n(2) Users can input a list of \\code{by} and only one \\code{type}. In this way, \n\\code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \\code{by} and \\code{type}.\n\nWarning: \n\n(1) All of these window should be the same type. It means users can only\ncompare a list of \"start site\"/\"end site\"/\"body region\" with the same upstream\nand downstream.\n\n(2) So it will be only one \\code{type} and several \\code{by}.\n\n(3) Users can make window by txdb object or self-made granges object. Users can only\nchoose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the\nway of using txdb object. User can input any \\code{by} in the way of using \nself-made granges object.\n\n(4) Users can mingle the \\code{by} designed for the two ways. \\code{plotPeakProf_MultiWindows} can\naccpet the hybrid \\code{by}. But the above rules should be followed.\n}\n"
  },
  {
    "path": "man/readPeakFile.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/readPeakFile.R\n\\name{readPeakFile}\n\\alias{readPeakFile}\n\\title{readPeakFile}\n\\usage{\nreadPeakFile(peakfile, as = \"GRanges\", ...)\n}\n\\arguments{\n\\item{peakfile}{peak file}\n\n\\item{as}{output format, one of GRanges or data.frame}\n\n\\item{...}{additional parameter (pass to `utils::read.delim()`)}\n}\n\\value{\npeak information, in GRanges or data.frame object\n}\n\\description{\nread peak file and store in data.frame or GRanges object\n}\n\\examples{\npeakfile <- system.file(\"extdata\", \"sample_peaks.txt\", package=\"ChIPseeker\")\npeak.gr <- readPeakFile(peakfile, as=\"GRanges\")\npeak.gr\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/reexports.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utilities.R\n\\docType{import}\n\\name{reexports}\n\\alias{reexports}\n\\alias{GRangesList}\n\\alias{rel}\n\\title{Objects exported from other packages}\n\\keyword{internal}\n\\description{\nThese objects are imported from other packages. Follow the links\nbelow to see their documentation.\n\n\\describe{\n  \\item{GenomicRanges}{\\code{\\link[GenomicRanges:GRangesList-class]{GRangesList}}}\n\n  \\item{ggplot2}{\\code{\\link[ggplot2:element]{rel}}}\n}}\n\n"
  },
  {
    "path": "man/seq2gene.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/seq2gene.R\n\\name{seq2gene}\n\\alias{seq2gene}\n\\title{seq2gene}\n\\usage{\nseq2gene(seq, tssRegion, flankDistance, TxDb, sameStrand = FALSE)\n}\n\\arguments{\n\\item{seq}{genomic regions in GRanges object}\n\n\\item{tssRegion}{TSS region}\n\n\\item{flankDistance}{flanking search radius}\n\n\\item{TxDb}{TranscriptDb object}\n\n\\item{sameStrand}{logical whether find nearest/overlap gene in the same strand}\n}\n\\value{\ngene vector\n}\n\\description{\nannotate genomic regions to genes in many-to-many mapping\n}\n\\details{\nThis funciton associates genomic regions with coding genes in a many-to-many mapping. It first maps genomic regions to host genes (either located in exon or intron), proximal genes (located in promoter regions) and flanking genes (located in upstream and downstream within user specify distance).\n}\n\\examples{\n\\dontrun{\nlibrary(TxDb.Hsapiens.UCSC.hg19.knownGene)\nTxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene\nfile <- getSampleFiles()[[1]] # a bed file\ngr <- readPeakFile(file)\ngenes <- seq2gene(gr, tssRegion=c(-1000, 1000), flankDistance = 3000, TxDb) \n}\n}\n\\author{\nGuangchuang Yu\n}\n"
  },
  {
    "path": "man/show-methods.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\docType{methods}\n\\name{show}\n\\alias{show}\n\\alias{show,csAnno,ANY-method}\n\\title{show method}\n\\usage{\nshow(object)\n}\n\\arguments{\n\\item{object}{A \\code{csAnno} instance}\n}\n\\value{\nmessage\n}\n\\description{\nshow method for \\code{csAnno} instance\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/shuffle.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/enrichOverlap.R\n\\name{shuffle}\n\\alias{shuffle}\n\\title{shuffle}\n\\usage{\nshuffle(peak.gr, TxDb)\n}\n\\arguments{\n\\item{peak.gr}{GRanges object}\n\n\\item{TxDb}{TxDb}\n}\n\\value{\nGRanges object\n}\n\\description{\nshuffle the position of peak\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/tagHeatmap.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/plotTagMatrix.R\n\\name{tagHeatmap}\n\\alias{tagHeatmap}\n\\title{tagHeatmap}\n\\usage{\ntagHeatmap(\n  tagMatrix,\n  xlab = \"\",\n  ylab = \"\",\n  title = NULL,\n  palette = \"RdBu\",\n  nrow = NULL,\n  ncol = NULL\n)\n}\n\\arguments{\n\\item{tagMatrix}{tagMatrix or a list of tagMatrix}\n\n\\item{xlab}{xlab}\n\n\\item{ylab}{ylab}\n\n\\item{title}{title}\n\n\\item{palette}{palette to be filled in,details see \\link[ggplot2]{scale_colour_brewer}}\n\n\\item{nrow}{the nrow of plotting a list of peak}\n\n\\item{ncol}{the ncol of plotting a list of peak}\n}\n\\value{\nfigure\n}\n\\description{\nplot the heatmap of tagMatrix\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/upsetplot-methods.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/csAnno.R\n\\docType{methods}\n\\name{upsetplot}\n\\alias{upsetplot}\n\\title{upsetplot method}\n\\usage{\nupsetplot(x, ...)\n}\n\\arguments{\n\\item{x}{A \\code{csAnno} instance}\n\n\\item{...}{additional parameter}\n}\n\\value{\nplot\n}\n\\description{\nupsetplot method generics\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/vennpie-methods.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/AllGenerics.R, R/csAnno.R\n\\docType{methods}\n\\name{vennpie}\n\\alias{vennpie}\n\\title{vennpie method generics}\n\\usage{\nvennpie(x, r = 0.2, cex = 1.2, ...)\n\nvennpie(x, r = 0.2, cex=1.2, ...)\n}\n\\arguments{\n\\item{x}{A \\code{csAnno} instance}\n\n\\item{r}{initial radius}\n\n\\item{cex}{value to adjust legend}\n\n\\item{...}{additional parameter}\n}\n\\value{\nplot\n}\n\\description{\nvennpie method generics\n}\n\\author{\nGuangchuang Yu \\url{https://guangchuangyu.github.io}\n}\n"
  },
  {
    "path": "man/vennplot.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/vennplot.R\n\\name{vennplot}\n\\alias{vennplot}\n\\title{vennplot}\n\\usage{\nvennplot(Sets, by = \"gplots\", ...)\n}\n\\arguments{\n\\item{Sets}{a list of object, can be vector or GRanges object}\n\n\\item{by}{one of gplots, ggVennDiagram or Vennerable}\n\n\\item{...}{extra parameters using ggVennDiagram. Details see \\link[ggVennDiagram]{ggVennDiagram}}\n}\n\\value{\nvenn plot that summarize the overlap of peaks\nfrom different experiments or gene annotation from\ndifferent peak files.\n}\n\\description{\nplot the overlap of a list of object\n}\n\\details{\nThere are two ways to plot, which users can specify through `by`.\n\nThe first way is to use `gplots` packages, by setting `by = gplots`. This method\nis default method. The venn plot produced through this way has no color.\n\nThe second way is to use `ggVennDiagram` packages, by setting `by = ggVennDiagram`. \nThe venn plot produced through this way has colors which can be defined by users using\nggplot2 grammar e.g.(scale_fill_distiller()). And users can specify any details, like digital number,\ntext size and showing percentage or not, by inputting `...` extra parameters.\n}\n\\examples{\n## example not run\n## require(TxDb.Hsapiens.UCSC.hg19.knownGene)\n## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n## peakfiles <- getSampleFiles()\n## peakAnnoList <- lapply(peakfiles, annotatePeak)\n## names(peakAnnoList) <- names(peakfiles)\n## genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)\n## vennplot(genes)\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "man/vennplot.peakfile.Rd",
    "content": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/vennplot.R\n\\name{vennplot.peakfile}\n\\alias{vennplot.peakfile}\n\\title{vennplot.peakfile}\n\\usage{\nvennplot.peakfile(files, labels = NULL)\n}\n\\arguments{\n\\item{files}{peak files}\n\n\\item{labels}{labels for peak files}\n}\n\\value{\nfigure\n}\n\\description{\nvennplot for peak files\n}\n\\author{\nG Yu\n}\n"
  },
  {
    "path": "tests/testthat/test-bed.R",
    "content": "library(ChIPseeker)\n\ncontext(\"bed file\")\n\ntest_that(\"parse bed file\", {\n    files <- getSampleFiles()\n    for (i in seq_along(files)) {\n        expect_true(is(readPeakFile(files[[i]]), \"GRanges\"))\n    }\n})\n\n"
  },
  {
    "path": "tests/testthat/test-getTagMatrix.R",
    "content": "library(ChIPseeker)\nlibrary(TxDb.Hsapiens.UCSC.hg19.knownGene)\n\ncontext(\"test getTagMatrix() and related functions\")\n\ntest_that(\"getBioRegion function\", {\n  \n  # test three kinds of regions derived from getBioRegion()\n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  \n  gene_start <- getBioRegion(TxDb = txdb,\n                             upstream = 1000,\n                             downstream = 1000,\n                             by = 'gene',\n                             type = \"start_site\")\n  expect_is(gene_start,\"GRanges\")\n  \n  gene_end <- getBioRegion(TxDb = txdb,\n                           upstream = 1000,\n                           downstream = 1000,\n                           by = 'gene',\n                           type = \"end_site\")\n  expect_is(gene_end,\"GRanges\")\n  \n  gene_body <- getBioRegion(TxDb = txdb,\n                            upstream = 1000,\n                            downstream = 1000,\n                            by = 'gene',\n                            type = \"body\")\n  expect_is(gene_body,\"GRanges\")\n  \n  })\n\ntest_that(\"getPromoters functions\",{\n  \n  # test two kinds of regions derived from getPromoters\n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  \n  gene <- getPromoters(TxDb=txdb,\n                       upstream=1000,\n                       downstream=1000,\n                       by = \"gene\")\n  \n  transcript <- getPromoters(TxDb=txdb,\n                             upstream=1000,\n                             downstream=1000,\n                             by = \"transcript\")\n  \n  expect_is(gene,\"GRanges\")\n  expect_is(transcript,\"GRanges\")\n  \n})\n\ntest_that(\"makeBioRegionFromGranges function\",{\n  \n  # we consider transcript region as enhancer region\n  # and make self-made granges object\n  # they can be the same in the form of granges object\n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  enhancer <- transcripts(txdb)[1:5000,]\n  \n  ## we test three kinds of region, start_site, end_site and body\n  enhancer_body <- makeBioRegionFromGranges(gr = enhancer,\n                                            by = \"enhancer\",\n                                            type = \"body\")\n  \n  enhancer_start <- makeBioRegionFromGranges(gr = enhancer,\n                                             by = \"enhancer\",\n                                             type = \"start_site\",\n                                             upstream = 1000,\n                                             downstream = 1000)\n  \n  enhancer_end <- makeBioRegionFromGranges(gr = enhancer,\n                                           by = \"enhancer\",\n                                           type = \"end_site\",\n                                           upstream = 1000,\n                                           downstream = 1000)\n  \n  expect_is(enhancer_body,\"GRanges\")\n  expect_is(enhancer_start,\"GRanges\")\n  expect_is(enhancer_end,\"GRanges\")\n  \n  ## test the label\n  expect_equal(attr(enhancer_body,'label'),c(\"enhancer_SS\",\"enhancer_TS\"))\n  expect_equal(attr(enhancer_start,'label'),\"enhancer_SS\")\n  expect_equal(attr(enhancer_end,'label'),\"enhancer_TS\")\n  \n})\n\ntest_that(\"getTagMatrix function for single peak file\",{\n  \n  peak <- getSampleFiles()[[4]]\n  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n  \n  # make the window by getBioRegion()\n  gene_start <- getBioRegion(TxDb = txdb,\n                             upstream = 1000,\n                             downstream = 1000,\n                             by = 'gene',\n                             type = \"start_site\")\n  \n  # make the window by makeBioRegionFromGranges()\n  enhancer <- transcripts(txdb)[1:5000,]\n  \n  enhancer_body <- makeBioRegionFromGranges(gr = enhancer,\n                                            by = \"enhancer\",\n                                            type = \"body\")\n  \n  # test input window parameter\n  mt1 <- getTagMatrix(peak = peak,\n                      windows = gene_start,\n                      weightCol = \"V5\")\n  \n  expect_is(mt1, \"matrix\")\n  \n  # without extending flank\n  mt2_1 <- getTagMatrix(peak = peak,\n                        windows = enhancer_body,\n                        weightCol = \"V5\",\n                        nbin = 800)\n  \n  expect_is(mt2_1, \"matrix\")\n  \n  # extend flank by rel object\n  mt2_2 <- getTagMatrix(peak = peak,\n                        windows = enhancer_body,\n                        weightCol = \"V5\",\n                        upstream = rel(0.2),\n                        downstream = rel(0.2),\n                        nbin = 800)\n  \n  expect_is(mt2_2, \"matrix\")\n  \n  # extend flank by actual number\n  mt2_3 <- getTagMatrix(peak = peak,\n                        windows = enhancer_body,\n                        weightCol = \"V5\",\n                        upstream = 1000,\n                        downstream = 1000,\n                        nbin = 800)\n  \n  expect_is(mt2_3, \"matrix\")\n  \n  # test input without window parameter \n  \n  # make window through txdb object\n  mt3 <- getTagMatrix(peak = peak,\n                      weightCol = \"V5\",\n                      TxDb = txdb,\n                      by = \"gene\",\n                      type = \"start_site\",\n                      upstream = 3000,\n                      downstream = 3000)\n  \n  expect_is(mt3, \"matrix\")\n  \n  # make window through self-made grange object\n  mt4 <- getTagMatrix(peak = peak,\n                      weightCol = \"V5\",\n                      TxDb = enhancer,\n                      by = \"gene\",\n                      type = \"start_site\",\n                      upstream = 1000,\n                      downstream = 1000)\n  \n  expect_is(mt4, \"matrix\")\n  \n  # without extending flank\n  mt5_1 <- getTagMatrix(peak = peak,\n                        weightCol = \"V5\",\n                        TxDb = txdb,\n                        by = \"gene\",\n                        type = \"body\",\n                        nbin = 800)\n  \n  expect_is(mt5_1, \"matrix\")\n  \n  # extend flank by rel object\n  mt5_2 <- getTagMatrix(peak = peak,\n                        TxDb = enhancer,\n                        weightCol = \"V5\",\n                        by = \"enhancer\",\n                        type = \"body\",\n                        upstream = rel(0.2),\n                        downstream = rel(0.2),\n                        nbin = 800)\n  \n  expect_is(mt5_2, \"matrix\")\n  \n  # extend flank by actual number\n  mt5_3 <- getTagMatrix(peak = peak,\n                        TxDb = txdb,\n                        weightCol = \"V5\",\n                        by = \"gene\",\n                        type = \"body\",\n                        upstream = 1000,\n                        downstream = 1000,\n                        nbin = 800)\n  \n  expect_is(mt5_3, \"matrix\")\n  \n})\n"
  },
  {
    "path": "tests/testthat/test-txdb.R",
    "content": "library(TxDb.Hsapiens.UCSC.hg19.knownGene)\nlibrary(TxDb.Hsapiens.UCSC.hg38.knownGene)\nlibrary(ChIPseeker)\nlibrary(yulab.utils)\n\ncontext(\"TXDB\")\n\ntest_that(\"Update txdb\", {\n    hg19_txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n    ChIPseeker:::.ChIPseekerEnv(hg19_txdb)\n    expect_equal(ChIPseeker:::get_env_genome(), \"hg19\")\n\n    hg38_txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene\n    ChIPseeker:::.ChIPseekerEnv(hg38_txdb)\n    expect_equal(ChIPseeker:::get_env_genome(), \"hg38\")\n})\n\n\ntest_that(\"txdb\", {\n    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\n    ChIPseeker:::.ChIPseekerEnv(txdb)\n    expect_equal(ChIPseeker:::IDType(txdb), \"Entrez Gene ID\")\n\n    if (packageVersion(\"TxDb.Hsapiens.UCSC.hg19.knownGene\") > \"3.22\") {\n        expect_equal(\n            ChIPseeker:::TXID2EG(\"70455\"),\n            \"ENST00000487630.1_3/ENST00000487630.1_3\"\n        )\n        expect_equal(\n            ChIPseeker:::TXID2EG(\"70455\", geneIdOnly = TRUE),\n            \"ENST00000487630.1_3\"\n        )\n    } else {\n        expect_equal(ChIPseeker:::TXID2EG(\"70455\"), \"uc002qsd.4/1\")\n        expect_equal(ChIPseeker:::TXID2EG(\"70455\", geneIdOnly = TRUE), \"1\")\n    }\n})\n"
  },
  {
    "path": "tests/testthat.R",
    "content": "library(testthat)\nlibrary(ChIPseeker)\n\ntest_check(\"ChIPseeker\")\n"
  },
  {
    "path": "vignettes/ChIPseeker.Rmd",
    "content": "---\ntitle: \"ChIPseeker: an R package for ChIP peak Annotation, Comparison and Visualization\"\nauthor: \"Guangchuang Yu\\\\\n\n        School of Basic Medical Sciences, Southern Medical University\"\ndate: \"`r Sys.Date()`\"\nbibliography: ChIPseeker.bib\nbiblio-style: apalike\noutput:\n  prettydoc::html_pretty:\n    toc: true\n    theme: cayman\n    highlight: github\n  pdf_document:\n    toc: true\nvignette: >\n  %\\VignetteIndexEntry{ChIPseeker: an R package for ChIP peak Annotation, Comparison and Visualization}\n  %\\VignetteEngine{knitr::rmarkdown}\n  %\\usepackage[utf8]{inputenc}\n  %\\VignetteEncoding{UTF-8}\n---\n\n\n```{r style, echo=FALSE, results='asis', message=FALSE}\nknitr::opts_chunk$set(tidy         = FALSE,\n                      warning      = FALSE,\n                      message      = FALSE)\n\nlibrary(yulab.utils)\nBiocannopkg <- yulab.utils::Biocpkg\n```\n\n```{r echo=FALSE, results='hide', message=FALSE}\nlibrary(GenomicFeatures)\nlibrary(GenomicRanges)\nlibrary(TxDb.Hsapiens.UCSC.hg19.knownGene)\nlibrary(org.Hs.eg.db)\nlibrary(ggplot2)\nlibrary(clusterProfiler)\nlibrary(ReactomePA)\nlibrary(ChIPseeker)\n```\n\n# Abstract\n\nChIPseeker is an R package for annotating ChIP-seq data analysis. It supports annotating ChIP peaks and provides functions to visualize ChIP peaks coverage over chromosomes and profiles of peaks binding to TSS regions. Comparison of ChIP peak profiles and annotation are also supported. Moreover, it supports evaluating significant overlap among ChIP-seq datasets. Currently, ChIPseeker contains 17,000 bed file information from GEO database. These datasets can be downloaded and compare with user's own data to explore significant overlap datasets for inferring co-regulation or transcription factor complex for further investigation.\n\n\n# Citation\n\nIf you use `r Biocpkg(\"ChIPseeker\")`[@yu_chipseeker_2015] in published research, please cite:\n\n+ Q Wang<sup>#</sup>, M Li<sup>#</sup>, T Wu, L Zhan, L Li, M Chen, W Xie, Z Xie, E Hu, S Xu, __G Yu__<sup>\\*</sup>. [Exploring epigenomic datasets by ChIPseeker](https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585). __*Current Protocols*__, 2022, 2(10): e585. \n+ __G Yu__<sup>\\*</sup>, LG Wang, QY He<sup>\\*</sup>. [ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparision and visualization](http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btv145). __*Bioinformatics*__. 2015, 31(14):2382-2383. \n\n\n# Introduction\n\nChromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) has become standard technologies for genome wide identification of DNA-binding protein target sites. After read mappings and peak callings, the peak should be annotated to answer the biological questions. Annotation also create the possibility of integrating expression profile data to predict gene expression regulation. `r Biocpkg(\"ChIPseeker\")`[@yu_chipseeker_2015] was developed for annotating nearest genes and genomic features to peaks.\n\nChIP peak data set comparison is also very important. We can use it as an index to estimate how well biological replications are. Even more important is applying to infer cooperative regulation. If two ChIP seq data, obtained by two different binding proteins, overlap significantly, these two proteins may form a complex or have interaction in regulation chromosome remodelling or gene expression. `r Biocpkg(\"ChIPseeker\")`[@yu_chipseeker_2015] support statistical testing of significant overlap among ChIP seq data sets, and incorporate open access database GEO for users to compare their own dataset to those deposited in database. Protein interaction hypothesis can be generated by mining data deposited in database. Converting genome coordinations from one genome version to another is also supported, making this comparison available for different genome version and different species.\n\nSeveral visualization functions are implemented to visualize the coverage of the ChIP seq data, peak annotation, average profile and heatmap of peaks binding to TSS region.\n\nFunctional enrichment analysis of the peaks can be performed by my Bioconductor packages `r Biocpkg(\"DOSE\")`[@yu_dose_2015], `r Biocpkg(\"ReactomePA\")`[@yu_reactomepa_2016], `r Biocpkg(\"clusterProfiler\")`[@yu_clusterprofiler_2012].\n\n```{r}\n## loading packages\nlibrary(ChIPseeker)\nlibrary(TxDb.Hsapiens.UCSC.hg19.knownGene)\ntxdb <- TxDb.Hsapiens.UCSC.hg19.knownGene\nlibrary(clusterProfiler)\n```\n\n# ChIP profiling\nThe datasets _CBX6_ and _CBX7_ in this vignettes were downloaded from _GEO (GSE40740)_[@pemberton_genome-wide_2014] while _ARmo\\_0M_, _ARmo\\_1nM_ and _ARmo\\_100nM_ were downloaded from _GEO (GSE48308)_[@urbanucci_overexpression_2012] . `r Biocpkg(\"ChIPseeker\")` provides `readPeakFile` to load the peak and store in `GRanges` object.\n\n```{r}\nfiles <- getSampleFiles()\nprint(files)\npeak <- readPeakFile(files[[4]])\npeak\n```\n\n## ChIP peaks coverage plot\n\nAfter peak calling, we would like to know the peak locations over the whole genome, `covplot` function calculates the coverage of peak regions over chromosomes and generate a figure to visualize. [GRangesList](https://guangchuangyu.github.io/2016/02/covplot-supports-grangeslist) is also supported and can be used to compare coverage of multiple bed files.\n\n\n```{r fig.height=8, fig.width=10}\ncovplot(peak, weightCol=\"V5\")\n```\n\n```{r fig.height=4, fig.width=10}\ncovplot(peak, weightCol=\"V5\", chrs=c(\"chr17\", \"chr18\"), xlim=c(4.5e7, 5e7))\n```\n\nWhen `peak` is a `GRangsList` object, user can set the colors directly or by passing a palette to `fill_color`.\n\n```{r fig.height=8, fig.width=10}\npeaks = lapply(files[4:5], readPeakFile)\ncovplot(peaks, weightCol = \"V5\", fill_color = c(\"red\",\"blue\")) +\n  theme(legend.position = \"inside\",\n        legend.position.inside = c(0.8,0.2))\n```\n\n\n## Profile of ChIP peaks binding to TSS regions\n\nFirst of all, for calculating the profile of ChIP peaks binding to TSS regions, we should prepare the TSS regions, which are defined as the flanking sequence of the TSS sites. Then align the peaks that are mapping to these regions, and generate the tagMatrix.\n\n\n```{r}\n## promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)\n## tagMatrix <- getTagMatrix(peak, windows=promoter)\n##\n## to speed up the compilation of this vignettes, we use a precalculated tagMatrix\ndata(\"tagMatrixList\")\ntagMatrix <- tagMatrixList[[4]]\n```\n\nIn the above code, you should notice that tagMatrix is not restricted to TSS regions. The regions can be other types that defined by the user. `r Biocpkg(\"ChIPseeker\")` expanded the scope of region. Users can input the `type` and `by` parameters to get the regions they want.\n\n### Heatmap of ChIP binding to TSS regions\n\n```{r fig.cap=\"Heatmap of ChIP peaks binding to TSS regions\", fig.align=\"center\", fig.height=9, fig.width=6}\ntagHeatmap(tagMatrix)\n```\n\n`r Biocpkg(\"ChIPseeker\")` provide a one step function to generate this figure from bed file. The following function will generate the same figure as above.\n\n```{r eval=FALSE}\npeakHeatmap(files[[4]], TxDb=txdb, upstream=3000, downstream=3000)\n```\n\nUsers can use `nbin` parameter to speed up.\n```{r eval=FALSE}\npeakHeatmap(files[[4]],TxDb = txdb,nbin = 800,upstream=3000, downstream=3000)\n\n```\n\nUsers can also use ggplot method to change the details of the figures.\n```{r eval=FALSE}\npeakHeatmap(files[[4]],TxDb = txdb,nbin = 800,upstream=3000, downstream=3000) +\n  scale_fill_distiller(palette = \"RdYlGn\")\n```\n\nUsers can also profile genebody regions with `peakHeatmap()`.\n```{r fig.cap=\"Heatmap of genebody regions\", fig.align=\"center\", fig.height=9, fig.width=6,results='hide'}\npeakHeatmap(peak = files[[4]],\n            TxDb = txdb,\n            upstream = rel(0.2),\n            downstream = rel(0.2),\n            by = \"gene\",\n            type = \"body\",\n            nbin = 800)\n```\n\nSometimes there will be a need to explore the comparison of the peak heatmap over two regions, for example, the following picture is the peak over two gene sets. One possible scenery of using this method is to compare the peak heatmap over up-regulating genes and down-regulating genes. Here `txdb1` and `txdb2` is the simulated gene sets obtain from `TxDb.Hsapiens.UCSC.hg19.knownGene`. Using `peakHeatmap_multiple_Sets()`, accepting `list` object containing different regions information. The length of each part is correlated to the amount of regions.\n```{r fig.cap=\"Heatmap of over two regions\", fig.align=\"center\", fig.height=9, fig.width=6,results='hide'}\ntxdb1 <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene)\ntxdb2 <- unlist(fiveUTRsByTranscript(TxDb.Hsapiens.UCSC.hg19.knownGene))[1:10000,]\n\nregion_list <- list(geneX = txdb1, geneY = txdb2)\npeakHeatmap_multiple_Sets(peak = files[[4]],\n                          upstream = 1000,downstream = 1000,\n                          by = c(\"geneX\",\"geneY\"),\n                          type = \"start_site\",\n                          TxDb = region_list,nbin = 800)\n```\n\nWe also meet the need of ploting heatmap and peak profiling together.\n```{r fig.cap=\"Combination of heatmap and peak profiling\", fig.align=\"center\", fig.height=9, fig.width=6,results='hide'}\npeak_Profile_Heatmap(peak = files[[4]],\n                     upstream = 1000,\n                     downstream = 1000,\n                     by = \"gene\",\n                     type = \"start_site\",\n                     TxDb = txdb,\n                     nbin = 800)\n```\n\nExploring several regions with heatmap and peak profiling is also supported.\n```{r fig.cap=\"Combination of heatmap and peak profiling over several regions\", fig.align=\"center\", fig.height=12, fig.width=6,results='hide'}\ntxdb1 <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene)\ntxdb2 <- unlist(fiveUTRsByTranscript(TxDb.Hsapiens.UCSC.hg19.knownGene))[1:10000,]\n\nregion_list <- list(geneX = txdb1, geneY = txdb2)\npeak_Profile_Heatmap(peak = files[[4]],\n                     upstream = 1000,\n                     downstream = 1000,\n                     by = c(\"geneX\",\"geneY\"),\n                     type = \"start_site\",\n                     TxDb = region_list,nbin = 800)\n```\n\n### Average Profile of ChIP peaks binding to TSS region\n\n```{r eval=TRUE, fig.cap=\"Average Profile of ChIP peaks binding to TSS region\", fig.align=\"center\", fig.height=4, fig.width=7}\nplotAvgProf(tagMatrix, xlim=c(-3000, 3000),\n            xlab=\"Genomic Region (5'->3')\", ylab = \"Read Count Frequency\")\n```\n\n\nThe function `plotAvgProf2` provide a one step from bed file to average profile plot. The following command will generate the same figure as shown above.\n\n```{r eval=FALSE}\nplotAvgProf2(files[[4]], TxDb=txdb, upstream=3000, downstream=3000,\n             xlab=\"Genomic Region (5'->3')\", ylab = \"Read Count Frequency\")\n```\n\nConfidence interval estimated by bootstrap method is also supported for characterizing ChIP binding profiles.\n\n```{r fig.cap=\"Average Profile of ChIP peaks binding to TSS region\", fig.align=\"center\", fig.height=4, fig.width=7, eval=F}\nplotAvgProf(tagMatrix, xlim=c(-3000, 3000), conf = 0.95, resample = 1000)\n```\n\n![](figures/plotAvgProf_boot.png)\n\n## Profile of ChIP peaks binding to different regions\n\nReferring to the [issue #16](https://github.com/GuangchuangYu/ChIPseeker/issues/16) , we developed and improved several functions support start site region, end site region and body region of Gene/Transcript/Exon/Intron/3UTR/5UTR. `getBioRegion` can prepare the different regions for ChIP peaks to bind. `getTagMatrix` can accept `type`, `by`, `upstream` and `downstream` parameters to get tagmatrix according to different needs. `plotPeakProf` and `plotPeakProf2` supports the plotting of profiles of peaks binding to different regions.Users can also create heatmap or average profile of ChIP peaks binding to these regions.\n\nIn order to plot body regions, a new methond `binning`,was introduced to `getTagMatrix`. The idea of `binning` was derived from [deeptools](https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html)[@ramirez2016deeptools2]. `binning` scaled the regions having different lengths to the equal length by deviding the regions into the same amounts of boxes. Because the amount of boxes is equal, the regions can be thought of scaling to equal length.`binning` method can speed up the `getTagMatrix` by changing the precision from bp to box(several bps).\n\nThere are three ways to plot these regions. First, users can use `getBioRegion` to prepare the regions. Then align the peaks that are mapping to these regions, and generate the tagMatrix by `getTagMatrix`. At Last, plot the figures by `plotPeakProf`. Second, users can input `type` and `by` parameters to `getTagMatrix` to get the tagMatrix and plot the figures. Third, users can use `plotPeakProf2` to do everything in one step.\n\n### Binning method for profile of ChIP peaks binding to TSS regions\nHere uses the method of inputting `type` and `by` parameters. `type = \"start_site\"` means the start site region. `by = \"gene\"` means that it is the start site region of gene(TSS regions). If users want to use binning method, the `nbin` method must be set.\n```{r eval=F}\n## The results of binning method and normal method are nearly the same. \ntagMatrix_binning <- getTagMatrix(peak = peak, TxDb = txdb, \n                                  upstream = 3000, downstream = 3000, \n                                  type = \"start_site\", by = \"gene\", \n                                  weightCol = \"V5\", nbin = 800)\n```\n\n### Profile of ChIP peaks binding to body regions \nWe improved and developed several functions to plot body region of Gene/Transcript/Exon/Intron/3UTR/5UTR. If users want to get more information from the body region, we added `upstream` and `downstream` parameters to functions in order to get flank extension of body regions. `upstream` and `downstream` can be NULL(default), rel object and actual numbers. NULL(default) reflects body regions with no flank extension. Rel object reflects the percentage of total length of body regions. Actual numbers reflects the actual length of flank extension. \n\n```{r eval=F}\n## Here uses `plotPeakProf2` to do all things in one step.\n## Gene body regions having lengths smaller than nbin will be filtered\n## A message will be given to warning users about that.\n## >> 9 peaks(0.872093%), having lengths smaller than 800bp, are filtered...\n\n## the ignore_strand is FALSE in default. We put here to emphasize that.\n## We will not show it again in the below example\nplotPeakProf2(peak = peak, upstream = rel(0.2), downstream = rel(0.2),\n              conf = 0.95, by = \"gene\", type = \"body\", nbin = 800,\n              TxDb = txdb, weightCol = \"V5\",ignore_strand = F)\n```\n![](figures/plotPeakProf2_body_extend.png)\n\nUsers can also get the profile ChIP peaks binding to gene body regions with no flank extension or flank extension decided by actual length.\n```{r eval=F}\n## The first method using getBioRegion(), getTagMatrix() and plotPeakProf() to plot in three steps.\ngenebody <- getBioRegion(TxDb = txdb,\n                         by = \"gene\",\n                         type = \"body\")\n\nmatrix_no_flankextension <- getTagMatrix(peak,windows = genebody, nbin = 800)\n\nplotPeakProf(matrix_no_flankextension,conf = 0.95)\n\n## The second method of using getTagMatrix() and plotPeakProf() to plot in two steps\nmatrix_actual_extension <- getTagMatrix(peak,windows = genebody, nbin = 800,\n                                        upstream = 1000,downstream = 1000)\nplotPeakProf(matrix_actual_extension,conf = 0.95)\n\n```\n\nUsers can also get the body region of 5UTR/3UTR.\n```{r eval=F}\nfive_UTR_body <- getTagMatrix(peak = peak, \n                              TxDb = txdb,\n                              upstream = rel(0.2),\n                              downstream = rel(0.2), \n                              type = \"body\",\n                              by = \"5UTR\",\n                              weightCol = \"V5\",\n                              nbin = 50)\n\nplotPeakProf(tagMatrix = five_UTR_body, conf = 0.95)\n```\n\n### Profile of ChIP peaks binding to TTS regions \n```{r eval=F}\nTTS_matrix <- getTagMatrix(peak = peak, \n                           TxDb = txdb,\n                           upstream = 3000,\n                           downstream = 3000, \n                           type = \"end_site\",\n                           by = \"gene\",\n                           weightCol = \"V5\")\n\nplotPeakProf(tagMatrix = TTS_matrix, conf = 0.95)\n```\n\n# Peak Annotation\n```{r}\npeakAnno <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000),\n                         TxDb=txdb, annoDb=\"org.Hs.eg.db\")\n```\n\nNote that it would also be possible to use Ensembl-based `EnsDb` annotation \ndatabases created by the `r Biocpkg(\"ensembldb\")` package for the\npeak annotations by providing it with the `TxDb` parameter. Since UCSC-style \nchromosome names are used we have to change the style of the chromosome names\nfrom *Ensembl* to *UCSC* in the example below.\n\n```{r, eval = FALSE}\nlibrary(EnsDb.Hsapiens.v75)\nedb <- EnsDb.Hsapiens.v75\nseqlevelsStyle(edb) <- \"UCSC\"\n\npeakAnno.edb <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000),\n                             TxDb=edb, annoDb=\"org.Hs.eg.db\")\n```\n\nPeak Annotation is performed by `annotatePeak`. User can define TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb. The output of `annotatePeak` is `csAnno` instance. `r Biocpkg(\"ChIPseeker\")` provides `as.GRanges` to convert `csAnno` to `GRanges` instance, and `as.data.frame` to convert `csAnno` to `data.frame` which can be exported to file by `write.table`.\n\n`TxDb` object contained transcript-related features of a particular genome. Bioconductor provides several package that containing `TxDb` object of model organisms with multiple commonly used genome version, for instance `r Biocannopkg(\"TxDb.Hsapiens.UCSC.hg38.knownGene\")`, `r Biocannopkg(\"TxDb.Hsapiens.UCSC.hg19.knownGene\")` for human genome hg38 and hg19, `r Biocannopkg(\"TxDb.Mmusculus.UCSC.mm10.knownGene\")` and `r Biocannopkg(\"TxDb.Mmusculus.UCSC.mm9.knownGene\")` for mouse genome mm10 and mm9, etc. User can also prepare their own `TxDb` object by retrieving information from UCSC Genome Bioinformatics and BioMart data resources by R function `makeTxDbFromBiomart` and `makeTxDbFromUCSC`. `TxDb` object should be passed for peak annotation.\n\nAll the peak information contained in peakfile will be retained in the output of `annotatePeak`. The position and strand information of nearest genes are reported. The distance from peak to the TSS of its nearest gene is also reported. The genomic region of the peak is reported in annotation column. Since some annotation may overlap, `r Biocpkg(\"ChIPseeker\")` adopted the following priority in genomic annotation.\n\n* Promoter\n* 5' UTR\n* 3' UTR\n* Exon\n* Intron\n* Downstream\n* Intergenic\n\n\n_Downstream_ is defined as the downstream of gene end.\n\n`r Biocpkg(\"ChIPseeker\")` also provides parameter _genomicAnnotationPriority_ for user to prioritize this hierachy.\n\n`annotatePeak` report detail information when the annotation is Exon or Intron, for instance \"Exon (uc002sbe.3/9736, exon 69 of 80)\", means that the peak is overlap with an Exon of transcript uc002sbe.3, and the corresponding Entrez gene ID is 9736 (Transcripts that belong to the same gene ID may differ in splice events), and this overlaped exon is the 69th exon of the 80 exons that this transcript uc002sbe.3 prossess.\n\nParameter annoDb is optional, if provided, extra columns including SYMBOL, GENENAME, ENSEMBL/ENTREZID will be added. The geneId column in annotation output will be consistent with the geneID in TxDb. If it is ENTREZID, ENSEMBL will be added if annoDb is provided, while if it is ENSEMBL ID, ENTREZID will be added.\n\n## Visualize Genomic Annotation\n\nTo annotate the location of a given peak in terms of genomic features, `annotatePeak` assigns peaks to genomic annotation in \"annotation\" column of the output, which includes whether a peak is in the TSS, Exon, 5' UTR, 3' UTR, Intronic or Intergenic. Many researchers are very interesting in these annotations. TSS region can be defined by user and `annotatePeak` output in details of which exon/intron of which genes as illustrated in previous section.\n\nPie and Bar plot are supported to visualize the genomic annotation.\n```{r fig.cap=\"Genomic Annotation by pieplot\", fig.align=\"center\", fig.height=6, fig.width=8}\nplotAnnoPie(peakAnno)\n```\n\n```{r fig.cap=\"Genomic Annotation by barplot\", fig.align=\"center\", fig.height=4, fig.width=10}\nplotAnnoBar(peakAnno)\n```\n\nSince some annotation overlap, user may interested to view the full annotation with their overlap, which can be partially resolved by `vennpie` function.\n\n```{r fig.cap=\"Genomic Annotation by vennpie\", fig.align=\"center\", fig.height=8, fig.width=11}\nvennpie(peakAnno)\n```\n\nWe extend `r CRANpkg(\"UpSetR\")` to view full annotation overlap. User can user `upsetplot` function.\n\n```{r eval=F, fig.cap=\"Genomic Annotation by upsetplot\", fig.align=\"center\", fig.height=8, fig.width=12}\nupsetplot(peakAnno)\n```\n![](figures/upset.png)\n\nWe can combine `vennpie` with `upsetplot` by setting *vennpie = TRUE*.\n```{r eval=F, fig.cap=\"Genomic Annotation by upsetplot\", fig.align=\"center\", fig.height=8, fig.width=12}\nupsetplot(peakAnno, vennpie=TRUE)\n```\n![](figures/upset_vennpie.png)\n\n\n## Visualize distribution of TF-binding loci relative to TSS\n\nThe distance from the peak (binding site) to the TSS of the nearest gene is calculated by `annotatePeak` and reported in the output. We provide `plotDistToTSS` to calculate the percentage of binding sites upstream and downstream from the TSS of the nearest genes, and visualize the distribution.\n```{r fig.cap=\"Distribution of Binding Sites\", fig.align=\"center\", fig.height=2, fig.width=6}\nplotDistToTSS(peakAnno,\n              title=\"Distribution of transcription factor-binding loci\\nrelative to TSS\")\n```\n\n# Functional enrichment analysis\n\nOnce we have obtained the annotated nearest genes, we can perform functional enrichment analysis to identify predominant biological themes among these genes by incorporating biological knowledge provided by biological ontologies. For instance, Gene Ontology (GO)[@ashburner_gene_2000] annotates genes to biological processes, molecular functions, and cellular components in a directed acyclic graph structure, Kyoto Encyclopedia of Genes and Genomes (KEGG)[@kanehisa_kegg_2004] annotates genes to pathways, Disease Ontology (DO)[@schriml_disease_2011] annotates genes with human disease association, and Reactome[@croft_reactome_2013] annotates gene to pathways and reactions.\n\n`r Biocpkg(\"ChIPseeker\")` also provides a function, __*seq2gene*__, for linking genomc regions to genes in a many-to-many mapping. It consider host gene (exon/intron), promoter region and flanking gene from intergenic region that may under control via cis-regulation. This function is designed to link both coding and non-coding genomic regions to coding genes and facilitate functional analysis.\n\n\nEnrichment analysis is a widely used approach to identify biological themes. I have developed several Bioconductor packages for investigating whether the number of selected genes associated with a particular biological term is larger than expected, including `r Biocpkg(\"DOSE\")`[@yu_dose_2015] for Disease Ontology, `r Biocpkg(\"ReactomePA\")` for reactome pathway, `r Biocpkg(\"clusterProfiler\")`[@yu_clusterprofiler_2012] for Gene Ontology and KEGG enrichment analysis.\n\n```{r fig.width=8, fig.height=5}\nlibrary(ReactomePA)\n\npathway1 <- enrichPathway(as.data.frame(peakAnno)$geneId)\nhead(pathway1, 2)\n\ngene <- seq2gene(peak, tssRegion = c(-1000, 1000), flankDistance = 3000, TxDb=txdb)\npathway2 <- enrichPathway(gene)\nhead(pathway2, 2)\ndotplot(pathway2)\n```\n\nMore information can be found in the vignettes of Bioconductor packages `r Biocpkg(\"DOSE\")`[@yu_dose_2015], `r Biocpkg(\"ReactomePA\")`, `r Biocpkg(\"clusterProfiler\")`[@yu_clusterprofiler_2012], which also provide several methods to visualize enrichment results. The `r Biocpkg(\"clusterProfiler\")`[@yu_clusterprofiler_2012] is designed for comparing and visualizing functional profiles among gene clusters, and can directly applied to compare biological themes at GO, DO, KEGG, Reactome perspective.\n\n\n# ChIP peak data set comparison\n\n## Profile of several ChIP peak data binding to TSS region\n\nFunction `plotAvgProf`, `tagHeatmap` and `plotPeakProf` can accept a list of `tagMatrix` and visualize profile or heatmap among several ChIP experiments, while `plotAvgProf2` , `peakHeatmap` and `plotPeakProf2` can accept a list of bed files and perform the same task in one step.\n\n\n### Average profiles\n\n```{r eval=TRUE, fig.cap=\"Average Profiles of ChIP peaks among different experiments\", fig.align=\"center\", fig.height=4, fig.width=6}\n## promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)\n## tagMatrixList <- lapply(files, getTagMatrix, windows=promoter)\n##\n## to speed up the compilation of this vigenette, we load a precaculated tagMatrixList\ndata(\"tagMatrixList\")\nplotAvgProf(tagMatrixList, xlim=c(-3000, 3000))\n```\n\n```{r eval=FALSE, fig.cap=\"Average Profiles of ChIP peaks among different experiments\", fig.align=\"center\", fig.height=7, fig.width=6}\nplotAvgProf(tagMatrixList, xlim=c(-3000, 3000), conf=0.95,resample=500, facet=\"row\")\n```\n\n![](figures/plotAvgProf_boot_list.png)\n\n```{r eval=F}\n## normal method\nplotPeakProf2(files, upstream = 3000, downstream = 3000, conf = 0.95,\n              by = \"gene\", type = \"start_site\", TxDb = txdb,\n              facet = \"row\")\n\n## binning method \nplotPeakProf2(files, upstream = 3000, downstream = 3000, conf = 0.95,\n              by = \"gene\", type = \"start_site\", TxDb = txdb,\n              facet = \"row\", nbin = 800)\n\n```\n\n### Peak heatmaps\n\n```{r eval=TRUE, fig.cap=\"Heatmap of ChIP peaks among different experiments\", fig.align=\"center\", fig.height=8, fig.width=16}\ntagHeatmap(tagMatrixList)\n```\n\n## Profile of several ChIP peak data binding to body region\nFunctions `plotPeakProf` and `plotPeakProf2` also support to plot profile of several ChIP peak data binding to body region.\n```{r eval=F}\nplotPeakProf2(files, upstream = rel(0.2), downstream = rel(0.2),\n              conf = 0.95, by = \"gene\", type = \"body\",\n              TxDb = txdb, facet = \"row\", nbin = 800)\n```\n![](figures/plotPeakProf_body_boot_list_.png)\n\n## ChIP peak annotation comparision\nThe `plotAnnoBar` and `plotDistToTSS` can also accept input of a named list of annotated peaks (output of `annotatePeak`).\n\n\n```{r}\npeakAnnoList <- lapply(files, annotatePeak, TxDb=txdb,\n                       tssRegion=c(-3000, 3000), verbose=FALSE)\n```\n\nWe can use `plotAnnoBar` to comparing their genomic annotation.\n```{r fig.cap=\"Genomic Annotation among different ChIPseq data\", fig.align=\"center\", fig.height=4, fig.width=6}\nplotAnnoBar(peakAnnoList)\n```\n\nR function `plotDistToTSS` can use to comparing distance to TSS profiles among ChIPseq data.\n```{r fig.cap=\"Distribution of Binding Sites among different ChIPseq data\", fig.align=\"center\", fig.height=5, fig.width=8}\nplotDistToTSS(peakAnnoList)\n```\n\n## Functional profiles comparison\nAs shown in section 4, the annotated genes can analyzed by `r\nBiocpkg(\"clusterProfiler\")`[@yu_clusterprofiler_2012], `r\nBiocpkg(\"DOSE\")`[@yu_dose_2015], `r Biocpkg(\"meshes\")` and `r\nBiocpkg(\"ReactomePA\")` for Gene Ontology, KEGG, Disease Ontology, MeSH and Reactome Pathway enrichment analysis.\n\nThe `r Biocpkg(\"clusterProfiler\")`[@yu_clusterprofiler_2012] package provides `compareCluster` function for comparing biological themes among gene clusters, and can be easily adopted to compare different ChIP peak experiments.\n\n```{r fig.width=8.5, fig.height=8.5}\ngenes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)\nnames(genes) = sub(\"_\", \"\\n\", names(genes))\ncompKEGG <- compareCluster(geneCluster   = genes,\n                         fun           = \"enrichKEGG\",\n                         pvalueCutoff  = 0.05,\n                         pAdjustMethod = \"BH\")\ndotplot(compKEGG, showCategory = 15, title = \"KEGG Pathway Enrichment Analysis\")\n```\n\n\n## Overlap of peaks and annotated genes\n\nUser may want to compare the overlap peaks of replicate experiments or from different experiments. `r Biocpkg(\"ChIPseeker\")` provides `peak2GRanges` that can read peak file and stored in GRanges object. Several files can be read simultaneously using lapply, and then passed to `vennplot` to calculate their overlap and draw venn plot.\n\n`vennplot` accept a list of object, can be a list of GRanges or a list of vector. Here, I will demonstrate using `vennplot` to visualize the overlap of the nearest genes stored in peakAnnoList.\n\n```{r fig.cap=\"Overlap of annotated genes\", fig.align=\"center\", fig.height=7, fig.width=7}\ngenes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)\nvennplot(genes)\n```\n\n# Statistical testing of ChIP seq overlap\n\nOverlap is very important, if two ChIP experiment by two different proteins overlap in a large fraction of their peaks, they may cooperative in regulation. Calculating the overlap is only touch the surface. `r Biocpkg(\"ChIPseeker\")` implemented statistical methods to measure the significance of the overlap.\n\n## Shuffle genome coordination\n\n```{r}\np <- GRanges(seqnames=c(\"chr1\", \"chr3\"),\n             ranges=IRanges(start=c(1, 100), end=c(50, 130)))\nshuffle(p, TxDb=txdb)\n```\n\nWe implement the `shuffle` function to randomly permute the genomic locations of ChIP peaks defined in a genome which stored in `TxDb` object.\n\n## Peak overlap enrichment analysis\n\nWith the ease of this `shuffle` method, we can generate thousands of random ChIP data and calculate the background null distribution of the overlap among ChIP data sets.\n\n```{r}\nenrichPeakOverlap(queryPeak     = files[[5]],\n                  targetPeak    = unlist(files[1:4]),\n                  TxDb          = txdb,\n                  pAdjustMethod = \"BH\",\n                  nShuffle      = 50,\n                  chainFile     = NULL,\n                  verbose       = FALSE)\n```\n\nParameter _queryPeak_ is the query ChIP data, while _targetPeak_ is bed file name or a vector of bed file names from comparison; _nShuffle_ is the number to shuffle the peaks in _targetPeak_. To speed up the compilation of this vignettes, we only set _nShuffle_ to 50 as an example for only demonstration. User should set the number to 1000 or above for more robust result. Parameter _chainFile_ are chain file name for mapping the _targetPeak_ to the genome version consistent with _queryPeak_ when their genome version are different. This creat the possibility of comparison among different genome version and cross species.\n\nIn the output, _qSample_ is the name of _queryPeak_ and _qLen_ is the the number of peaks in _queryPeak_. _N\\_OL_ is the number of overlap between _queryPeak_ and _targetPeak_.\n\n\n# Data Mining with ChIP seq data deposited in GEO\n\nThere are many ChIP seq data sets that have been published and deposited in GEO database. We can compare our own dataset to those deposited in GEO to search for significant overlap data. Significant overlap of ChIP seq data by different binding proteins may be used to infer cooperative regulation and thus can be used to generate hypotheses.\n\nWe collect about **17,000** bed files deposited in GEO, user can use `getGEOspecies` to get a summary based on speices.\n\n## GEO data collection\n\n```{r}\ngetGEOspecies()\n```\n\n\nThe summary can also based on genome version as illustrated below:\n\n```{r}\ngetGEOgenomeVersion()\n```\n\nUser can access the detail information by `getGEOInfo`, for each genome version.\n\n```{r}\nhg19 <- getGEOInfo(genome=\"hg19\", simplify=TRUE)\nhead(hg19)\n```\n\nIf _simplify_ is set to _FALSE_, extra information including _source\\_name_, _extract\\_protocol_, _description_, _data\\_processing_ and _submission\\_date_ will be incorporated.\n\n## Download GEO ChIP data sets\n\n`r Biocpkg(\"ChIPseeker\")` provide function `downloadGEObedFiles` to download all the bed files of a particular genome.\n\n```{r eval=FALSE}\ndownloadGEObedFiles(genome=\"hg19\", destDir=\"hg19\")\n```\n\nOr a vector of GSM accession number by `downloadGSMbedFiles`.\n```{r eval=FALSE}\ngsm <- hg19$gsm[sample(nrow(hg19), 10)]\ndownloadGSMbedFiles(gsm, destDir=\"hg19\")\n```\n\n\n## Overlap significant testing\n\nAfter download the bed files from GEO, we can pass them to `enrichPeakOverlap` for testing the significant of overlap. Parameter _targetPeak_ can be the folder, _e.g._ hg19, that containing bed files. `enrichPeakOverlap` will parse the folder and compare all the bed files. It is possible to test the overlap with bed files that are mapping to different genome or different genome versions, `enrichPeakOverlap` provide a parameter _chainFile_ that can pass a chain file and liftOver the _targetPeak_ to the genome version consistent with _queryPeak_. Signifcant overlap can be use to generate hypothesis of cooperative regulation.By mining the data deposited in GEO, we can identify some putative complex or interacted regulators in gene expression regulation or chromsome remodelling for further validation.\n\n\n# Need helps?\n\n\nIf you have questions/issues, please visit\n[ChIPseeker homepage](https://guangchuangyu.github.io/software/ChIPseeker/) first.\nYour problems are mostly documented. If you think you found a bug, please follow\n[the guide](https://guangchuangyu.github.io/2016/07/how-to-bug-author/) and\nprovide a reproducible example to be posted\non\n[github issue tracker](https://github.com/GuangchuangYu/ChIPseeker/issues).\nFor questions, please post\nto [Bioconductor support site](https://support.bioconductor.org/) and tag your\npost with *ChIPseeker*.\n\n\nFor Chinese user, you can follow me on [WeChat (微信)](https://guangchuangyu.github.io/blog_images/biobabble.jpg).\n\n\n# Session Information\n\nHere is the output of `sessionInfo()` on the system on which this document was compiled:\n\n```{r echo=FALSE}\nsessionInfo()\n```\n\n# References\n"
  },
  {
    "path": "vignettes/ChIPseeker.bib",
    "content": "\n@article{yu_reactomepa_2016,\n\ttitle = {{ReactomePA}: an R/Bioconductor package for reactome pathway analysis and visualization},\n\tvolume = {12},\n\tissn = {1742-2051},\n\turl = {http://pubs.rsc.org.eproxy2.lib.hku.hk/en/content/articlelanding/2016/mb/c5mb00663e},\n\tdoi = {10.1039/C5MB00663E},\n\tshorttitle = {{ReactomePA}},\n\tabstract = {Reactome is a manually curated pathway annotation database for unveiling high-order biological pathways from high-throughput data. {ReactomePA} is an R/Bioconductor package providing enrichment analyses, including hypergeometric test and gene set enrichment analyses. A functional analysis can be applied to the genomic coordination obtained from a sequencing experiment to analyze the functional significance of genomic loci including cis-regulatory elements and non-coding regions. Comparison among different experiments is also supported. Moreover, {ReactomePA} provides several visualization functions to produce highly customizable, publication-quality figures. The source code and documents of {ReactomePA} are freely available through Bioconductor (http://www.bioconductor.org/packages/{ReactomePA}).},\n\tpages = {477--479},\n\tnumber = {2},\n\tjournaltitle = {Molecular {BioSystems}},\n\tshortjournal = {Mol. {BioSyst}.},\n\tauthor = {Yu, Guangchuang and He, Qing-Yu},\n\turldate = {2016-02-17},\n\tdate = {2016-01-26},\n\tlangid = {english}\n}\n\n@article{yu_chipseeker_2015,\n         title  = \"ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization\",\n         author = {Yu, Guangchuang and Wang, Li-Gen and He, Qing-Yu},\n         journal = \"Bioinformatics\",\n         year    = \"2015\",\n         volume  = \"31\",\n         number  = \"14\",\n         pages   = \"2382-2383\",\n         PMID    = \"25765347\",\n\t url     = {http://bioinformatics.oxfordjournals.org/content/31/14/2382.abstract},\n         doi     = \"10.1093/bioinformatics/btv145\",\n}\n\n@article{yu_dose_2015,\n\ttitle = {{DOSE}: an R/Bioconductor package for disease ontology semantic and enrichment analysis},\n\tvolume = {31},\n\tissn = {1367-4803, 1460-2059},\n\turl = {http://bioinformatics.oxfordjournals.org/content/31/4/608},\n\tdoi = {10.1093/bioinformatics/btu684},\n\tshorttitle = {{DOSE}},\n\tabstract = {Summary: Disease ontology ({DO}) annotates human genes in the context of disease. {DO} is important annotation in translating molecular findings from high-throughput data to clinical relevance. {DOSE} is an R package providing semantic similarity computations among {DO} terms and genes which allows biologists to explore the similarities of diseases and of gene functions in disease perspective. Enrichment analyses including hypergeometric model and gene set enrichment analysis are also implemented to support discovering disease associations of high-throughput biological data. This allows biologists to verify disease relevance in a biological experiment and identify unexpected disease associations. Comparison among gene clusters is also supported.\nAvailability and implementation: {DOSE} is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/{DOSE}.html).\nSupplementary information: Supplementary data are available at Bioinformatics online.\nContact: gcyu@connect.hku.hk or tqyhe@jnu.edu.cn},\n\tpages = {608--609},\n\tnumber = {4},\n\tjournaltitle = {Bioinformatics},\n\tshortjournal = {Bioinformatics},\n\tauthor = {Yu, Guangchuang and Wang, Li-Gen and Yan, Guang-Rong and He, Qing-Yu},\n\turldate = {2015-02-13},\n\tdate = {2015-02-15},\n\tlangid = {english}\n}\n\n@article{urbanucci_overexpression_2012,\n\ttitle = {Overexpression of androgen receptor enhances the binding of the receptor to the chromatin in prostate cancer},\n\tvolume = {31},\n\tissn = {1476-5594},\n\tdoi = {10.1038/onc.2011.401},\n\tabstract = {Androgen receptor ({AR)} is overexpressed in the majority of castration-resistant prostate cancers ({CRPCs).} Our goal was to study the effect of {AR} overexpression on the chromatin binding of the receptor and to identify {AR} target genes that may be important in the emergence of {CRPC.} We have established two sublines of {LNCaP} prostate cancer ({PC)} cell line, one overexpressing {AR} 2-3-fold and the other 4-5-fold compared with the control cells. We used chromatin immunoprecipitation ({ChIP)} and deep-sequencing (seq) to identify {AR-binding} sites ({ARBSs).} We found that the number of {ARBSs} and the {AR-binding} strength were positively associated with the level of {AR} when cells were stimulated with low concentrations of androgens. In cells overexpressing {AR}, the chromatin binding of the receptor took place in 100-fold lower concentration of the ligand than in control cells. We confirmed the association of {AR} level and chromatin binding in two {PC} xenografts, one containing {AR} gene amplification with high {AR} expression, and the other with low expression. By combining the {ChIP-seq} and expression profiling, we identified {AR} target genes that are upregulated in {PC.} Of them, the expression of {ZWINT}, {SKP2} (S-phase kinase-associated protein 2 (p45)) and {FEN1} (flap structure-specific endonuclease 1) was demonstrated to be increased in {CRPC}, while the expression of {SNAI2} was decreased in both {PC} and {CRPC.} {FEN1} protein expression was also associated with poor prognosis in prostatectomy-treated patients. Finally, the knock-down of {FEN1} with small interfering {RNA} inhibited the growth of {LNCaP} cells. Our data demonstrate that the overexpression of {AR} sensitizes the receptor binding to chromatin, thus, explaining how {AR} signaling pathway is reactivated in {CRPC} cells.},\n\tpages = {2153-2163},\n\tnumber = {17},\n\tjournaltitle = {Oncogene},\n\tshortjournal = {Oncogene},\n\tauthor = {Urbanucci, A and Sahu, B and Seppälä, J and Larjo, A and Latonen, L M and Waltering, K K and Tammela, T L J and Vessella, R L and Lähdesmäki, H and Jänne, O A and Visakorpi, T},\n\tdate = {2012-04-26},\n\tnote = {{PMID:} 21909140},\n\tkeywords = {Animals, Binding Sites, Cell Line, Tumor, Chromatin, Flap Endonucleases, Gene Amplification, Gene Expression Profiling, Humans, Intracellular Signaling Peptides and Proteins, Male, Mice, Nuclear Proteins, Nucleic Acid Amplification Techniques, Prostatic Neoplasms, Receptors, Androgen, S-Phase Kinase-Associated Proteins, Transplantation, Heterologous}\n}\n\n@article{pemberton_genome-wide_2014,\n\ttitle = {Genome-wide co-localization of Polycomb orthologs and their effects on gene expression in human fibroblasts},\n\tvolume = {15},\n\tissn = {1465-6914},\n\tdoi = {10.1186/gb-2014-15-2-r23},\n\tabstract = {{BACKGROUND:} Polycomb group proteins form multicomponent complexes that are important for establishing lineage-specific patterns of gene expression. Mammalian cells encode multiple permutations of the prototypic Polycomb repressive complex 1 ({PRC1)} with little evidence for functional specialization. An aim of this study is to determine whether the multiple orthologs that are co-expressed in human fibroblasts act on different target genes and whether their genomic location changes during cellular senescence.\n{RESULTS:} Deep sequencing of chromatin immunoprecipitated with antibodies against {CBX6}, {CBX7}, {CBX8}, {RING1} and {RING2} reveals that the orthologs co-localize at multiple sites. {PCR-based} validation at representative loci suggests that a further six {PRC1} proteins have similar binding patterns. Importantly, sequential chromatin immunoprecipitation with antibodies against different orthologs implies that multiple variants of {PRC1} associate with the same {DNA.} At many loci, the binding profiles have a distinctive architecture that is preserved in two different types of fibroblast. Conversely, there are several hundred loci at which {PRC1} binding is cell type-specific and, contrary to expectations, the presence of {PRC1} does not necessarily equate with transcriptional silencing. Interestingly, the {PRC1} binding profiles are preserved in senescent cells despite changes in gene expression.\n{CONCLUSIONS:} The multiple permutations of {PRC1} in human fibroblasts congregate at common rather than specific sites in the genome and with overlapping but distinctive binding profiles in different fibroblasts. The data imply that the effects of {PRC1} complexes on gene expression are more subtle than simply repressing the loci at which they bind.},\n\tpages = {R23},\n\tnumber = {2},\n\tjournaltitle = {Genome biology},\n\tshortjournal = {Genome Biol.},\n\tauthor = {Pemberton, Helen and Anderton, Emma and Patel, Harshil and Brookes, Sharon and Chandler, Hollie and Palermo, Richard and Stock, Julie and Rodriguez-Niedenführ, Marc and Racek, Tomas and de Breed, Lucas and Stewart, Aengus and Matthews, Nik and Peters, Gordon},\n\tdate = {2014-02-03},\n\tnote = {{PMID:} 24485159}\n}\n\n@article{yu_clusterprofiler_2012,\n\ttitle = {{clusterProfiler:} an R Package for Comparing Biological Themes Among Gene Clusters},\n\tvolume = {16},\n\tissn = {1536-2310, 1557-8100},\n\tshorttitle = {{clusterProfiler}},\n\turl = {http://online.liebertpub.com/doi/abs/10.1089/omi.2011.0118},\n\tdoi = {10.1089/omi.2011.0118},\n\tnumber = {5},\n\turldate = {2012-05-05},\n\tjournal = {{OMICS:} A Journal of Integrative Biology},\n\tauthor = {Yu, Guangchuang and Wang, Li-Gen and Han, Yanyan and He, Qing-Yu},\n\tmonth = may,\n\tyear = {2012},\n\tpages = {284--287},\n}\n\n@article{ashburner_gene_2000,\n\ttitle = {Gene Ontology: tool for the unification of biology},\n\tvolume = {25},\n\tissn = {1061-4036},\n\turl = {http://dx.doi.org/10.1038/75556},\n\tdoi = {10.1038/75556},\n\tshorttitle = {Gene Ontology},\n\tissue = {1},\n\tpages = {25-29},\n\tjournaltitle = {Nat Genet},\n\tshortjournal = {Nat Genet},\n\tauthor = {Ashburner, Michael and Ball, Catherine A. and Blake, Judith A. and Botstein, David and Butler, Heather and Cherry, J. Michael and Davis, Allan P. and Dolinski, Kara and Dwight, Selina S. and Eppig, Janan T. and Harris, Midori A. and Hill, David P. and Issel-Tarver, Laurie and Kasarskis, Andrew and Lewis, Suzanna and Matese, John C. and Richardson, Joel E. and Ringwald, Martin and Rubin, Gerald M. and Sherlock, Gavin},\n\turldate = {2010-04-13},\n\tdate = {2000-05},\n}\n\n@article{kanehisa_kegg_2004,\n\ttitle = {The {KEGG} resource for deciphering the genome},\n\tvolume = {32},\n\tissn = {0305-1048, 1362-4962},\n\turl = {http://nar.oxfordjournals.org/content/32/suppl_1/D277},\n\tdoi = {10.1093/nar/gkh063},\n\tlanguage = {en},\n\tissue = {suppl 1},\n\tpages = {D277-D280},\n\tjournaltitle = {Nucleic Acids Research},\n\tshortjournal = {Nucl. Acids Res.},\n\tauthor = {Kanehisa, Minoru and Goto, Susumu and Kawashima, Shuichi and Okuno, Yasushi and Hattori, Masahiro},\n\turldate = {2013-10-15},\n\tdate = {2004},\n\tnote = {{PMID:} 14681412},\n}\n\n@article{schriml_disease_2011,\n\ttitle = {Disease Ontology: a backbone for disease semantic integration},\n\tvolume = {40},\n\tissn = {0305-1048, 1362-4962},\n\turl = {http://nar.oxfordjournals.org/content/40/D1/D940.long},\n\tdoi = {10.1093/nar/gkr972},\n\tshorttitle = {Disease Ontology},\n\tissue = {D1},\n\tpages = {D940-D946},\n\tjournaltitle = {Nucleic Acids Research},\n\tauthor = {Schriml, L. M. and Arze, C. and Nadendla, S. and Chang, Y.-W. W. and Mazaitis, M. and Felix, V. and Feng, G. and Kibbe, W. A.},\n\turldate = {2012-03-01},\n\tdate = {2011-11-12},\n}\n\n\n@article{croft_reactome_2013,\n\ttitle = {The Reactome pathway knowledgebase},\n\tvolume = {42},\n\tissn = {0305-1048, 1362-4962},\n\turl = {http://nar.oxfordjournals.org/content/42/D1/D472.long},\n\tdoi = {10.1093/nar/gkt1102},\n\tissue = {D1},\n\tpages = {D472-D477},\n\tjournaltitle = {Nucleic Acids Research},\n\tauthor = {Croft, D. and Mundo, A. F. and Haw, R. and Milacic, M. and Weiser, J. and Wu, G. and Caudy, M. and Garapati, P. and Gillespie, M. and Kamdar, M. R. and Jassal, B. and Jupe, S. and Matthews, L. and May, B. and Palatnik, S. and Rothfels, K. and Shamovsky, V. and Song, H. and Williams, M. and Birney, E. and Hermjakob, H. and Stein, L. and {D'Eustachio}, P.},\n\turldate = {2014-02-18},\n\tdate = {2013-11-15},\n}\n\n\n@article{ramirez2016deeptools2,\n  title={deepTools2: a next generation web server for deep-sequencing data analysis},\n  author={Ram{\\'\\i}rez, Fidel and Ryan, Devon P and Gr{\\\"u}ning, Bj{\\\"o}rn and Bhardwaj, Vivek and Kilpert, Fabian and Richter, Andreas S and Heyne, Steffen and D{\\\"u}ndar, Friederike and Manke, Thomas},\n  journal={Nucleic acids research},\n  volume={44},\n  number={W1},\n  pages={W160--W165},\n  year={2016},\n  publisher={Oxford University Press}\n}"
  }
]