Repository: YuLab-SMU/ChIPseeker
Branch: devel
Commit: c1f4a507f226
Files: 119
Total size: 433.4 KB

Directory structure:
gitextract_66qccjqf/

├── .Rbuildignore
├── .github/
│   └── issue_template.md
├── .gitignore
├── .svnignore
├── .travis.yml
├── CONDUCT.md
├── DESCRIPTION
├── GEODATA
├── Makefile
├── NAMESPACE
├── NEWS
├── NEWS.md
├── R/
│   ├── AllGenerics.R
│   ├── ChIPseeker-package.R
│   ├── GEO.R
│   ├── addGeneAnno.R
│   ├── annotatePeak.R
│   ├── covplot.R
│   ├── csAnno.R
│   ├── dplyr-verb.R
│   ├── enrichOverlap.R
│   ├── getFlankingGene.R
│   ├── getGenomicAnnotation.R
│   ├── getNearestFeatureIndicesAndDistances.R
│   ├── plotAnno.R
│   ├── plotDistToTSS.R
│   ├── plotTagMatrix.R
│   ├── readPeakFile.R
│   ├── seq2gene.R
│   ├── subset.R
│   ├── tagMatrix.R
│   ├── upsetplot.R
│   ├── utilities.R
│   ├── vennpie.R
│   ├── vennplot.R
│   └── zzz.R
├── README.Rmd
├── README.md
├── appveyor.yml
├── data/
│   ├── gsminfo.rda
│   ├── tagMatrixList.rda
│   └── ucsc_release.rda
├── inst/
│   ├── CITATION
│   ├── extdata/
│   │   ├── processedGSM.rda
│   │   └── sample_peaks.txt
│   └── test-plot/
│       ├── test-plotPeakProf.R
│       └── test-plotTagMatrix.R
├── man/
│   ├── ChIPseeker-package.Rd
│   ├── ChIPseekerCache.Rd
│   ├── annotatePeak.Rd
│   ├── as.GRanges.Rd
│   ├── as.data.frame.csAnno.Rd
│   ├── check_upstream_and_downstream.Rd
│   ├── combine_csAnno.Rd
│   ├── covplot.Rd
│   ├── csAnno-class.Rd
│   ├── dot-ChIPseekerEnv.Rd
│   ├── dotFun.Rd
│   ├── downloadGEObedFiles.Rd
│   ├── downloadGSMbedFiles.Rd
│   ├── dropAnno.Rd
│   ├── enrichAnnoOverlap.Rd
│   ├── enrichPeakOverlap.Rd
│   ├── getAnnoStat.Rd
│   ├── getBioRegion.Rd
│   ├── getGEOInfo.Rd
│   ├── getGEOgenomeVersion.Rd
│   ├── getGEOspecies.Rd
│   ├── getGeneAnno.Rd
│   ├── getGenomicAnnotation.Rd
│   ├── getNearestFeatureIndicesAndDistances.Rd
│   ├── getPromoters.Rd
│   ├── getSampleFiles.Rd
│   ├── getTagMatrix.Rd
│   ├── getTagMatrix.binning.internal.Rd
│   ├── getTagMatrix.internal.Rd
│   ├── getTagMatrix2.Rd
│   ├── getTagMatrix2.binning.internal.Rd
│   ├── getTagMatrix2.internal.Rd
│   ├── info.Rd
│   ├── makeBioRegionFromGranges.Rd
│   ├── make_label.Rd
│   ├── overlap.Rd
│   ├── peakHeatmap.Rd
│   ├── peakHeatmap_multiple_Sets.Rd
│   ├── peak_Profile_Heatmap.Rd
│   ├── plotAnnoBar-methods.Rd
│   ├── plotAnnoBar.Rd
│   ├── plotAnnoPie-methods.Rd
│   ├── plotAnnoPie.Rd
│   ├── plotAvgProf.Rd
│   ├── plotAvgProf.binning.Rd
│   ├── plotAvgProf2.Rd
│   ├── plotDistToTSS-methods.Rd
│   ├── plotDistToTSS.data.frame.Rd
│   ├── plotMultiProf.Rd
│   ├── plotMultiProf.binning.Rd
│   ├── plotMultiProf.binning.internal.Rd
│   ├── plotMultiProf.normal.Rd
│   ├── plotMultiProf.normal.internal.Rd
│   ├── plotPeakProf.Rd
│   ├── plotPeakProf2.Rd
│   ├── plotPeakProf_MultiWindows.Rd
│   ├── readPeakFile.Rd
│   ├── reexports.Rd
│   ├── seq2gene.Rd
│   ├── show-methods.Rd
│   ├── shuffle.Rd
│   ├── tagHeatmap.Rd
│   ├── upsetplot-methods.Rd
│   ├── vennpie-methods.Rd
│   ├── vennplot.Rd
│   └── vennplot.peakfile.Rd
├── tests/
│   ├── testthat/
│   │   ├── test-bed.R
│   │   ├── test-getTagMatrix.R
│   │   └── test-txdb.R
│   └── testthat.R
└── vignettes/
    ├── ChIPseeker.Rmd
    └── ChIPseeker.bib

================================================
FILE CONTENTS
================================================

================================================
FILE: .Rbuildignore
================================================
.travis.yml
.svnignore
.gitignore
^.*\.DS_Store
Makefile
README.Rmd
appveyor.yml
GEODATA
docs
mkdocs
.github
^CONDUCT\.md$


================================================
FILE: .github/issue_template.md
================================================
### Prerequisites

+ [ ] Have you read [Feedback](https://guangchuangyu.github.io/chipseeker/#feedback) and follow the [guide](https://guangchuangyu.github.io/2016/07/how-to-bug-author/)?
	* [ ] make sure your are using the latest release version
	* [ ] read the [documents](https://guangchuangyu.github.io/chipseeker/documentation/)
	* [ ] google your quesion/issue

### Describe you issue

* [ ] Make a reproducible example (*e.g.* [1](https://gist.github.com/talonsensei/e1fad082657054207f249ec98f0920eb))
* [ ] your code should contain comments to describe the problem (*e.g.* what expected and actually happened?)


### Ask in right place

* [ ] for bugs or feature requests, post here (github issue)
* [ ] for questions, please post to [Bioconductor](https://support.bioconductor.org/) or [Biostars](https://www.biostars.org/) with tag `ChIPseeker`


================================================
FILE: .gitignore
================================================
.DS_Store
data/.DS_Store
inst/.DS_Store
inst/extdata/.DS_Store
inst/extdata/GEO_sample_data/.DS_Store
.svn
*~
docs/__init__.py
docs/__pycache__
__init__.pyc


================================================
FILE: .svnignore
================================================
.git
*.Rhistory
.travis.yml
appveyor.yml
docs
mkdocs
.github


================================================
FILE: .travis.yml
================================================
## reference: http://docs.travis-ci.com/user/languages/r/

language: r
r: bioc-devel
  
cache: packages
bioc_required: true
bioc_use_devel: true

os:
  - linux
  - osx


env:
  global:
    - _R_CHECK_FORCE_SUGGESTS_=False
    - R_LIBS="http://cran.rstudio.com"

r_packages:
    - knitr
    - rmarkdown

bioc_packages:
    - DO.db
    - DOSE
    - graphite
    - ReactomePA
    - org.Hs.eg.db
    - TxDb.Hsapiens.UCSC.hg19.knownGene
    - GenomicRanges
    - GenomicFeatures
    
after_failure:
  - ./travis-tool.sh dump_logs

r_github_packages:
  - jimhester/covr

after_success:
  - Rscript -e 'library(covr); codecov()'

notifications:
  email:
    recipients: gcyu@connect.hku.hk
    on_success: never
    on_failure: always


================================================
FILE: CONDUCT.md
================================================
# Contributor Code of Conduct

As contributors and maintainers of this project, we pledge to respect all people who 
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.

We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.

Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this 
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
from the project team.

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
opening an issue or contacting one or more of the project maintainers.

This Code of Conduct is adapted from the Contributor Covenant 
(http:contributor-covenant.org), version 1.0.0, available at 
http://contributor-covenant.org/version/1/0/0/


================================================
FILE: DESCRIPTION
================================================
Package: ChIPseeker
Type: Package
Title: ChIPseeker for ChIP peak Annotation, Comparison, and Visualization
Version: 1.49.0
Authors@R: c( 
    person(given = "Guangchuang", family = "Yu",       email = "guangchuangyu@gmail.com",      role  = c("aut", "cre"), comment = c(ORCID = "0000-0002-6485-8781")),
    person(given = "Ming",        family = "Li",       email = "limiang929@gmail.com",         role = "ctb"),
    person(given = "Qianwen",     family = "Wang",     email = "treywea@gmail.com",            role = "ctb"),
    person(given = "Yun",         family = "Yan",      email = "youryanyun@gmail.com",         role = "ctb"),
    person(given = "Hervé",       family = "Pagès",    email = "hpages.on.github@gmail.com",   role  = "ctb"),
    person(given = "Michael",     family = "Kluge",    email = "michael.kluge@bio.ifi.lmu.de", role  = "ctb"),
    person(given = "Thomas",      family = "Schwarzl", email = "schwarzl@embl.de",             role  = "ctb"),
    person(given = "Zhougeng",    family = "Xu",       email = "xuzhougeng@163.com",           role  = "ctb"),
	person(given = "Chun-Hui",    family = "Gao",      email="gaospecial@gmail.com",           role  = "ctb", comment=c(ORCID = "0000-0002-1445-7939"))
	)
Maintainer: Guangchuang Yu <guangchuangyu@gmail.com>
Description: This package implements functions to retrieve the nearest genes
    around the peak, annotate genomic region of the peak, statstical methods
    for estimate the significance of overlap among ChIP peak data sets, and
    incorporate GEO database for user to compare the own dataset with those
    deposited in database. The comparison can be used to infer cooperative
    regulation and thus can be used to generate hypotheses. Several
    visualization functions are implemented to summarize the coverage of the
    peak experiment, average profile and heatmap of peaks binding to TSS
    regions, genomic annotation, distance to TSS, and overlap of peaks or
    genes.
Depends:
    R (>= 3.5.0)
Imports:
    AnnotationDbi,
    aplot,
    BiocGenerics,
    boot,
    dplyr,
    enrichplot,
    IRanges,
    GenomeInfoDb,
    GenomicRanges,
    GenomicFeatures,
    ggplot2,
    gplots,
    graphics,
    grDevices,
    gtools,
    magrittr,
    methods,
    plotrix,
    parallel,
    RColorBrewer,
    rlang,
    rtracklayer,
    S4Vectors,
    scales,
    stats,
    tibble,
    TxDb.Hsapiens.UCSC.hg19.knownGene,
    utils,
    yulab.utils (>= 0.2.0)
Suggests:
    clusterProfiler,
    ggimage,
    ggplotify,
    ggupset,
    ggVennDiagram,
    knitr,
    org.Hs.eg.db,
    prettydoc,
    ReactomePA,
    rmarkdown,
    testthat,
    TxDb.Hsapiens.UCSC.hg38.knownGene
Remotes:
    GuangchuangYu/enrichplot
URL: https://yulab-smu.top/contribution-knowledge-mining/
BugReports: https://github.com/YuLab-SMU/ChIPseeker/issues
Encoding: UTF-8
VignetteBuilder: knitr
ByteCompile: true
License: Artistic-2.0
biocViews: Annotation, ChIPSeq, Software, Visualization, MultipleComparison
RoxygenNote: 7.3.3


================================================
FILE: GEODATA
================================================
UPDATE OF GEO DATA
 + 20947 bed file information in ChIPseeker (version >=1.9.8) <2016-09-20, Tue>
 + 19348 bed file information in ChIPseeker (version >= 1.7.15) <2016-03-21, Mon>
 + 18813 bed file information in ChIPseeker (version >= 1.5.11, BioC 3.2 devel) <2015-09-24, Thu>
 + >17,726 bed file information in ChIPseeker (version>=1.4.0, BioC 3.1)
 + >15,000 bed file information ChIPseeker (version>=1.2.0, BioC 3.0)


================================================
FILE: Makefile
================================================
PKGNAME := $(shell sed -n "s/Package: *\([^ ]*\)/\1/p" DESCRIPTION)
PKGVERS := $(shell sed -n "s/Version: *\([^ ]*\)/\1/p" DESCRIPTION)
PKGSRC  := $(shell basename `pwd`)
BIOCVER := RELEASE_3_23


all: rd check clean

alldocs: rd readme

rd:
	Rscript -e 'roxygen2::roxygenise(".")'

readme:
	Rscript -e 'rmarkdown::render("README.Rmd", encoding="UTF-8")'

build:
	#cd ..;\
	#R CMD build $(PKGSRC)
	Rscript -e 'devtools::build()'
	
build2:
	cd ..;\
	R CMD build --no-build-vignettes $(PKGSRC)

install:
	cd ..;\
	R CMD INSTALL $(PKGNAME)_$(PKGVERS).tar.gz

check: 
	Rscript -e 'devtools::check()'
	## cd ..;\
	## Rscript -e 'rcmdcheck::rcmdcheck("$(PKGNAME)_$(PKGVERS).tar.gz")'

check2: build
	cd ..;\
	R CMD check $(PKGNAME)_$(PKGVERS).tar.gz

bioccheck:
	cd ..;\
	Rscript -e 'BiocCheck::BiocCheck("$(PKGNAME)_$(PKGVERS).tar.gz")'

clean:
	cd ..;\
	$(RM) -r $(PKGNAME).Rcheck/


gitmaintain:
	git gc --auto;\
	git prune -v;\
	git fsck --full

rmrelease:
	git branch -D $(BIOCVER)

release:
	git checkout $(BIOCVER);\
	git fetch --all


update:
	git fetch --all;\
	git checkout devel;\
	git merge upstream/devel;\
	git merge origin/devel

biocinit:
	git remote add upstream git@git.bioconductor.org:packages/$(PKGNAME).git;\
	git fetch --all

push:
	git push upstream devel;\
	git push origin devel


================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand

S3method(arrange,GRanges)
S3method(as.data.frame,csAnno)
S3method(filter,GRanges)
S3method(mutate,GRanges)
S3method(rename,GRanges)
S3method(subset,csAnno)
export(.)
export(GRangesList)
export(annotatePeak)
export(as.GRanges)
export(combine_csAnno)
export(covplot)
export(downloadGEObedFiles)
export(downloadGSMbedFiles)
export(dropAnno)
export(enrichAnnoOverlap)
export(enrichPeakOverlap)
export(getAnnoStat)
export(getBioRegion)
export(getGEOInfo)
export(getGEOgenomeVersion)
export(getGEOspecies)
export(getPromoters)
export(getSampleFiles)
export(getTagMatrix)
export(makeBioRegionFromGranges)
export(overlap)
export(peakHeatmap)
export(peakHeatmap_multiple_Sets)
export(peak_Profile_Heatmap)
export(plotAnnoBar)
export(plotAnnoPie)
export(plotAnnoPie.csAnno)
export(plotAvgProf)
export(plotAvgProf2)
export(plotDistToTSS)
export(plotPeakProf)
export(plotPeakProf2)
export(readPeakFile)
export(rel)
export(seq2gene)
export(shuffle)
export(tagHeatmap)
export(vennpie)
export(vennplot)
export(vennplot.peakfile)
exportClasses(csAnno)
exportMethods(plotAnnoBar)
exportMethods(plotAnnoPie)
exportMethods(plotDistToTSS)
exportMethods(show)
exportMethods(upsetplot)
exportMethods(vennpie)
import(BiocGenerics)
import(GenomeInfoDb)
import(GenomicRanges)
import(IRanges)
import(S4Vectors)
importFrom(AnnotationDbi,get)
importFrom(AnnotationDbi,select)
importFrom(BiocGenerics,end)
importFrom(BiocGenerics,start)
importFrom(GenomicFeatures,exonsBy)
importFrom(GenomicFeatures,fiveUTRsByTranscript)
importFrom(GenomicFeatures,genes)
importFrom(GenomicFeatures,intronsByTranscript)
importFrom(GenomicFeatures,threeUTRsByTranscript)
importFrom(GenomicFeatures,transcripts)
importFrom(GenomicFeatures,transcriptsBy)
importFrom(GenomicRanges,GRangesList)
importFrom(S4Vectors,metadata)
importFrom(S4Vectors,subset)
importFrom(TxDb.Hsapiens.UCSC.hg19.knownGene,TxDb.Hsapiens.UCSC.hg19.knownGene)
importFrom(aplot,insert_bottom)
importFrom(aplot,plot_list)
importFrom(boot,boot)
importFrom(boot,boot.ci)
importFrom(dplyr,arrange)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,summarise)
importFrom(enrichplot,upsetplot)
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,coord_fixed)
importFrom(ggplot2,coord_flip)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_text)
importFrom(ggplot2,facet_grid)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_blank)
importFrom(ggplot2,geom_hline)
importFrom(ggplot2,geom_line)
importFrom(ggplot2,geom_rect)
importFrom(ggplot2,geom_ribbon)
importFrom(ggplot2,geom_segment)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,geom_tile)
importFrom(ggplot2,geom_vline)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,ggtitle)
importFrom(ggplot2,guide_legend)
importFrom(ggplot2,labs)
importFrom(ggplot2,rel)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_fill_brewer)
importFrom(ggplot2,scale_fill_distiller)
importFrom(ggplot2,scale_fill_hue)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_bw)
importFrom(ggplot2,theme_classic)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,xlab)
importFrom(ggplot2,xlim)
importFrom(ggplot2,ylab)
importFrom(gplots,plot.venn)
importFrom(grDevices,colorRampPalette)
importFrom(graphics,layout)
importFrom(graphics,legend)
importFrom(graphics,par)
importFrom(graphics,pie)
importFrom(graphics,plot.new)
importFrom(gtools,permutations)
importFrom(magrittr,"%<>%")
importFrom(magrittr,"%>%")
importFrom(methods,as)
importFrom(methods,is)
importFrom(methods,missingArg)
importFrom(methods,new)
importFrom(methods,show)
importFrom(parallel,detectCores)
importFrom(parallel,mclapply)
importFrom(plotrix,floating.pie)
importFrom(rlang,.data)
importFrom(rlang,quos)
importFrom(rtracklayer,import.chain)
importFrom(rtracklayer,liftOver)
importFrom(stats,p.adjust)
importFrom(stats,phyper)
importFrom(utils,data)
importFrom(utils,download.file)
importFrom(utils,read.delim)
importFrom(utils,setTxtProgressBar)
importFrom(utils,txtProgressBar)
importFrom(yulab.utils,get_cache_element)
importFrom(yulab.utils,get_cache_item)
importFrom(yulab.utils,initial_cache_item)
importFrom(yulab.utils,mat2df)
importFrom(yulab.utils,rm_cache_item)
importFrom(yulab.utils,update_cache_item)
importFrom(yulab.utils,yulab_msg)


================================================
FILE: NEWS
================================================
CHANGES IN VERSION 1.15.2
------------------------
 o bug fixed for 'overlap = "all"' to consider strand information <2017-12-12, Tue>

CHANGES IN VERSION 1.15.1
------------------------
 o define downstream distance via options(ChIPseeker.downstreamDistance = 3000)
   + https://support.bioconductor.org/p/103135/

CHANGES IN VERSION 1.13.1
------------------------
 o fixed issue of naming intronList <2017-07-06, Thu>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/57#issuecomment-313342399

CHANGES IN VERSION 1.12.0
------------------------
 o BioC 3.5 release <2017-04-26, Wed>

CHANGES IN VERSION 1.11.4
------------------------
 o bug fixed of intron rank <2017-04-19, Wed>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/54

CHANGES IN VERSION 1.11.3
------------------------
 o bug fixed of dropAnno <2017-04-10, Mon>
 o bug fixed of peak width generated by shuffle <2017-03-31, Fri>
   + <https://github.com/GuangchuangYu/ChIPseeker/issues/51>

CHANGES IN VERSION 1.11.2
------------------------
 o optimize getGeneAnno <2016-12-21, Wed>
 o change plotAnnoBar and plotDistToTSS according to stacking bar order change in ggplot2 (v2.2.0) <2016-12-16, Fri>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/47
   + https://blog.rstudio.org/2016/11/14/ggplot2-2-2-0/

CHANGES IN VERSION 1.11.1
------------------------
 o update startup message <2016-11-09, Wed>

CHANGES IN VERSION 1.10.0
------------------------
 o BioC 3.4 released <2016-10-18, Tue>

CHANGES IN VERSION 1.9.8
------------------------
 o plotAvgProf/plotAvgProf2 order of panel by names of input tagMatrix List <2016-09-25, Sun>
 o test ENSEMBL ID using '^ENS' instead of '^ENSG' <2016-09-20, Tue>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/41

CHANGES IN VERSION 1.9.7
------------------------
 o unit test <2016-08-16, Tue>

CHANGES IN VERSION 1.9.6
------------------------
 o update vignette <2016-08-16, Tue>

CHANGES IN VERSION 1.9.5
------------------------
 o when TxDb doesn't have gene_id information, converting gene ID (ensembl/entrez and symbol) will be omitted instead of throw error. <2016-08-02, Tue>
   + https://www.biostars.org/p/204142
 o bug fixed if testing targetPeak is a list of GRanges objects in enrichPeakOverlap function <2016-07-20, Wed>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/37
   + https://github.com/GuangchuangYu/ChIPseeker/issues/36
 o fixed typo in determine gene ID type <2016-06-21, Tue>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/28#issuecomment-227212519
 o move upsetplot generics to DOSE and import from DOSE to prevent function name conflict <2016-06-14, Tue>

CHANGES IN VERSION 1.9.4
------------------------
 o bug fixed <2016-06-08, Wed>
   + https://github.com/GuangchuangYu/ChIPseeker/issues/17#issuecomment-224407402
   + https://github.com/GuangchuangYu/ChIPseeker/pull/24/files

CHANGES IN VERSION 1.9.3
------------------------
 o use byte compiler <2016-05-18, Wed>
 o https://github.com/Bioconductor-mirror/ChIPseeker/commit/f1ada57b9c66a1a44355bbbbdaf5b0a88e10cf7d

CHANGES IN VERSION 1.9.2
------------------------
 o name tagMatrix in plotAvgProf automatically if missing <2016-05-12, Thu>
 o https://github.com/Bioconductor-mirror/ChIPseeker/commit/d5f16b2bc01725e30282c3acb33007ef521a514c

CHANGES IN VERSION 1.9.1
------------------------
 o bug fixed in getNearestFeatureIndicesAndDistances <2016-05-11, Wed>
   + correct metadata in dummy NA feature

CHANGES IN VERSION 1.8.0
------------------------
 o BioC 3.3 released <2016-05-05, Thu>

CHANGES IN VERSION 1.7.15
------------------------
 o update GEO data <2016-03-21, Mon>

CHANGES IN VERSION 1.7.14
------------------------
 o list_to_dataframe works with data frames that have different colnames <2016-03-10, Thu>

CHANGES IN VERSION 1.7.13
------------------------
 o support annotate peaks with custom regions via passing TxDb=user_defined_GRanges to annotatePeak <2016-03-06, Sun>

CHANGES IN VERSION 1.7.12
------------------------
 o fixed R check <2016-03-05, Sat>
 o implement list_to_dataframe that mimic ldply and remove ldply dependency <2016-03-05, Sat>

CHANGES IN VERSION 1.7.11
------------------------
 o fixed issue in testing list in covplot introduced in 1.7.9 <2016-03-02, Wed>

CHANGES IN VERSION 1.7.10
------------------------
 o determined gene ID type if TxDb doesn't contain corresponding metadata <2016-03-01, Tue>
   + fixed https://github.com/GuangchuangYu/ChIPseeker/issues/28

CHANGES IN VERSION 1.7.9
------------------------
 o covplot support GRangesList <2016-02-24, Wed>
 o update ReactomePA citation info <2016-02-17, Wed>

CHANGES IN VERSION 1.7.8
------------------------
 o fixed BUG of Peaks upstream first or downstream last gene not annotated <2016-01-20, Wed>
   + contributed by Michael Kluge
   + see https://github.com/GuangchuangYu/ChIPseeker/pull/24

CHANGES IN VERSION 1.7.7
------------------------
 o bug fixed in newly introduced parameter 'overlap'. solve NA issue. <2016-01-13, Wed>

CHANGES IN VERSION 1.7.6
------------------------
 o introduce 'overlap' parameter in annotatePeak, by default overlap="TSS" and only overlap with TSS will be reported as the nearest gene.
   if overlap="all", then gene overlap with peak will be reported as nearest gene, no matter the overlap is at TSS region or not. <2016-01-12, Tue>
 o bug fixed in find overlap with peaks have strand info. <2016-01-12, Tue>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/23

CHANGES IN VERSION 1.7.5
------------------------
 o add paramters, sameStrand,ignoreOverlap, ignoreUpstream and ignoreDownstream in annotatePeak <2016-01-10, Sun>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/17
 o bug fixed in peak orientation <2016-01-10, Sun>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/22

CHANGES IN VERSION 1.7.4
------------------------
 o stop if input list of csAnno object has no name attribute
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/21
   + plotAnnoBar
   + plotDistToTSS
 o [covplot] xlim now not only restrict the window of data but also set the limit of the graphic object <2015-12-30, Wed>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/20

CHANGES IN VERSION 1.7.3
------------------------
 o fixed R check <2015-12-29, Tue>

CHANGES IN VERSION 1.7.2
------------------------
 o use geom_rect instead of geom_segment in covplot <2015-11-30, Mon>
 o open lower parameter (by default =1) to specific lower cutoff of coverage signal <2015-11-29, Sun>
 o fixed covplot to work with None RleViews of specific chromosome <2015-11-29, Sun>
 o addFlankGeneInfo now works with level="gene" <2015-11-19, Thu>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/18

CHANGES IN VERSION 1.7.1
------------------------
 o fixed extracting ID type from TxDb object, since the change of metadata(TxDb). now using grep to extract. <2015-10-27, Tue>
 o add vp parameter to set viewport of vennpie on top of upsetplot by user request <2015-10-26, Mon>
   + see http://ygc.name/2015/07/28/upsetplot-in-chipseeker/#comment-19470
 o getBioRegion function <2015-10-20, Tue>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/16

CHANGES IN VERSION 1.7.0
------------------------
 o BioC 3.3 branch

CHANGES IN VERSION 1.5.11
------------------------
 o remove ellipsis parameter in enrichPeakOverlap function and extend it to support GRanges objects <2015-10-08, Thu>
    + see https://support.bioconductor.org/p/73069/
 o fixed the issue, https://github.com/GuangchuangYu/ChIPseeker/issues/13 <2015-10-05, Mon>
 o update GEO info, now contains >18,000 bed file information <2015-09-24, Thu>

CHANGES IN VERSION 1.5.10
------------------------
 o dropAnno function, eg. drop nearest gene annotation that far from TSS (>10k). <2015-09-17, Thu>
   + see https://github.com/GuangchuangYu/ChIPseeker/issues/9
   + add parameter distanceToTSS_cutoff to enrichAnnoOverlap
 o use base::subset in plotDistToTSS instead of subsetting data within geom_bar <2015-09-17, Thu>
   + see https://github.com/hadley/ggplot2/issues/1295
   + subset parameter in layer will be removed in next release of ggplot2.

CHANGES IN VERSION 1.5.9
------------------------
 o bug fixed of enrichAnnoOverlap <2015-08-26, Wed>
 o change parameter order.matrix to order.by in upsetplot to meet the change of UpSetR pkg <2015-08-26, Wed>

CHANGES IN VERSION 1.5.8
------------------------
 o better implementation of getFirstHitIndex.  <2015-07-29, Wed>
   + contributed by Herve Pages.
   + see https://support.bioconductor.org/p/70432/#70545.

CHANGES IN VERSION 1.5.7
------------------------
 o add vennpie parameter in upsetplot <2015-07-20, Mon>
 o upsetplot function for csAnno object <2015-07-20, Mon>

CHANGES IN VERSION 1.5.6
------------------------
 o update citation info <2015-07-09, Thu>
 o BED file +1 shift for BED coordinate system start at 0 <2015-07-07, Tue>

CHANGES IN VERSION 1.5.5
------------------------
 o seq2gene for linking genomic regions to genes by many-to-many mapping. <2015-06-29, Mon>

CHANGES IN VERSION 1.5.4
------------------------
 o add pseudocount in enrichPeakOverlap to prevent 0 pvalue <2015-05-22, Fri>

CHANGES IN VERSION 1.5.3
------------------------
 o convert the vignette from Rnw to Rmd format <2015-05-17, Sun>

CHANGES IN VERSION 1.5.1
------------------------
 o minor bug fixed in getChrCov <2015-04-27, Mon>

CHANGES IN VERSION 1.3.15
------------------------
 o update vignette <2015-03-31, Tue>

CHANGES IN VERSION 1.3.14
------------------------
 o add pool parameter in enrichPeakOverlap <2015-03-30, Mon>

CHANGES IN VERSION 1.3.13
------------------------
 o update enrichPeakOverlap to support nShuffle = 0, which now will report only overlay with pvalue = NA <2015-03-29, Sun>
 o add facet and free_y parameter for plotAvgProf and plotAvgProf2 <2015-03-29, Sun>
 o update docs <2015-03-29, Sun>
 o update plotAvgProf and plotAvgProf2 to fully supporting confidence interval,
   see https://github.com/GuangchuangYu/ChIPseeker/pull/6 <2015-03-29, Sun>

CHANGES IN VERSION 1.3.12
------------------------
 o add confidence interval for plotAvgProf, see https://github.com/GuangchuangYu/ChIPseeker/issues/3 <2015-03-26, Thu>

CHANGES IN VERSION 1.3.11
------------------------
 o add citation <2015-03-16, Mon>

CHANGES IN VERSION 1.3.10
------------------------
 o update GEO data <2015-03-03, Tue>

CHANGES IN VERSION 1.3.9
------------------------
 o add parameter *genomicAnnotationPriority* for annotatePeak function <2015-02-27, Fri>

CHANGES IN VERSION 1.3.8
------------------------
 o add DOSE citation <2015-02-13, Fri>

CHANGES IN VERSION 1.3.7
------------------------
 o bug fixed in plotDistToTSS <2015-02-06, Fri>

CHANGES IN VERSION 1.3.6
------------------------
 o when peak is exactly located at gene end and near the end of chromosome,
   NA will be generated and throw error when assigning downstream of gene end.
   This bug has been fixed <2015-02-03, Tue>

CHANGES IN VERSION 1.3.5
------------------------
 o bug fixed in getNearestFeatureIndicesAndDistances when peak in the
   very begining or end of the chromosome <2015-01-30, Fri>

CHANGES IN VERSION 1.3.4
------------------------
 o bug fixed for introducing dplyr in plotDistToTSS <2015-01-28, Wed>

CHANGES IN VERSION 1.3.3
------------------------
 o update vignette to use BiocStyle::latex() <2015-01-26, Mon>

CHANGES IN VERSION 1.3.2
------------------------
 o fixed import issue to meet the changes of AnnotationDbi and S4Vectors <2015-01-22, Thu>

CHANGES IN VERSION 1.1.21
------------------------
 o use data.table instead of data.frame to optimize covplot <2014-10-06, Mon>

CHANGES IN VERSION 1.1.20
------------------------
 o update annotatePeak to store the seqinfo information <2014-09-30, Tue>
 o modified runValue(x) to sapply(x, runValue) <2014-09-30, Tue>

CHANGES IN VERSION 1.1.19
------------------------
 o implement csAnno S4 object <2014-09-28, Sun>
 o modify plot function for csAnno instance <2014-09-28, Sun>
 o implement vennpie function <2014-09-28, Sun>

CHANGES IN VERSION 1.1.17
------------------------
 o deprecate plotChrCov to new function covplot <2014-08-18, Mon>
 o add new paramter chrs and xlim to covplot <2014-08-18, Mon>

CHANGES IN VERSION 1.1.16
------------------------
 o optimize plotChrCov, running time reduce drastically <2014-08-15, Fri>

CHANGES IN VERSION 1.1.15
------------------------
 o remove un-mappable peak to prevent fail in peak annotation <2014-08-14, Thu>

CHANGES IN VERSION 1.1.14
------------------------
 o bug fixed in plotDistToTSS <2014-08-14, Thu>

CHANGES IN VERSION 1.1.13
------------------------
 o change TranscriptDb to TxDb according to GenomicFeatures <2014-07-29, Tue>

CHANGES IN VERSION 1.1.12
------------------------
 o bug fixed in plotChrCov <2014-07-21, Mon>

CHANGES IN VERSION 1.1.10
------------------------
 o bug fixed in calculating distances from peak end <2014-06-18, Wed>

CHANGES IN VERSION 1.1.9
------------------------
 o add level parameter to annotatePeak, and set it to "transcript" by default.
   Now annotatePeak will annotate peaks in transcript level
   except user specify level = "gene" <2014-06-16, Mon>
 o add addFlankGeneInfo parameter to annotatePeak.
   If it set to true, all features within the flankDistance will be annotated. <2014-06-16, Mon>

CHANGES IN VERSION 1.1.8
------------------------
 o bug fixed when peak overlap with feature <2014-06-11, Wed>
 o optimize for getting overlap features of peaks <2014-06-11, Wed>
 o update plotAnnoPie, separate the pie and legend to prevent label overlap <2014-06-12, Thu>

CHANGES IN VERSION 1.1.7
------------------------
 o bug fixed in calculating distanceToTSS <2014-06-03, Tue>

CHANGES IN VERSION 1.1.6
------------------------
 o add chainFile parameter in enrichAnnoOverlap and enrichPeakOverlap to support different genome version comparision <2014-06-01, Sun>
 o fixed color bug in peakHeatmap.internal2 and plotAnnoBar <2014-06-02, Mon>
 o update vignettes <2014-06-02, Mon>

CHANGES IN VERSION 1.1.5
------------------------
 o export getPromoters and getTagMatrix <2014-05-31, Sat>
 o rename plotAvgProf to plotAvgProf2 and implement plotAvgProf based on tagMatrix  <2014-05-31, Sat>
 o implement tagHeatmap for visualize heatmap of the tagMatrix or a list of tagMatrix <2014-05-31, Sat>
 o implement shuffle function to generate a random ChIP data based on a real one <2014-05-31, Sat>
 o implement enrichPeakOverlap to calcuate significant of ChIP experiments based on the genome coordinations <2014-05-31, Sat>
 o implement enrichAnnoOverlap to calculate significant of ChIP experiments based on their nearest gene annotation <2014-05-31, Sat>
 o incorporate GEO database for mining significant overlap of ChIP data <2014-05-31, Sat>
   + getGEOspecies summarize the collected data by species
   + getGEOgenomeVersion summarize the colleted data by genome version
   + getGEOInfo extract the information by genome version query
   + downloadGEObedFiles download all bed files of a particular genome version
   + downloadGSMbedFiles download the bed files of the input GSM list.

CHANGES IN VERSION 1.1.4
------------------------
 o in the annotation column of output of annotatePeak function,
	if Exon/Intron, the output change to 'Transcript_Name/GeneID, Exon no. of total_no.' <2014-05-14, Wed>

CHANGES IN VERSION 1.1.3
------------------------
 o bug fixed when metadata(TranscriptDb) contained NA <2014-04-30, Wed>
 o support ID type of Ensembl in annotatePeak (Entrez was supported) <2014-04-30, Wed>

CHANGES IN VERSION 1.1.2
------------------------
 o implemented plotChrCov <2014-04-25, Fri>
 o implemented plotAvgProf and peakHeatmap <2014-04-24, Thu>

CHANGES IN VERSION 1.1.1
------------------------
 o output of annotatePeak now contain chromosome length information <2014-04-22, Tue>
 o re-implement plotAnnoPie to use ordinary pie plot instead of pie3D <2014-04-21, Mon>

CHANGES IN VERSION 1.0.0
------------------------
 o initial version with the following functions implemented:
   + annotatePeak
   + overlap
   + plotAnnoBar
   + plotAnnoPie
   + plotDistToTSS
   + readPeakFile
   + vennplot
   + vennplot.peakfile


================================================
FILE: NEWS.md
================================================
# ChIPseeker 1.48.0

+ Bioconductor RELEASE_3_23 (2026-04-29, Wed)

# ChIPseeker 1.47.1

+ fixed issue in 'test-txdb.R' as 'TxDb.Hsapiens.UCSC.hg19.knownGene' changes its transcript ID from UCSC (e.g., uc002qsd.4) to Ensembl (e.g., ENST00000487630.1_3) (2025-11-04, Tue)

# ChIPseeker 1.46.0

+ Bioconductor RELEASE_3_22 (2025-11-01, Sat)

# ChIPseeker 1.45.2

+ new cache mechanism from 'yulab.utils' (2025-10-15, Wed)

# ChIPseeker 1.44.0

+ Bioconductor RELEASE_3_21 (2025-04-17, Thu)

# ChIPseeker 1.42.0

+ Bioconductor RELEASE_3_20 (2024-10-30, Wed)

# ChIPseeker 1.41.3

+ Better `covplot()`. Support universal chromosome names, and keep the default order of multiple peaks when plot a list of `GRanges` object.
+ Robust `generate_colors()`. Edit the logical of decision, and can validate color code automatically.
+ Extend dplyr verbs (`filter()`, `mutate()`, `arrange()`, `rename()`) to peak (`GRanges` object or `data.frame`), see #242.

# ChIPseeker 1.41.2

+ Enhancement of `plotDistToTSS()`, see #241.

# ChIPseeker 1.41.1

+ use `yulab.utils::yulab_msg()` for startup message (2024-07-26, Fri)

# ChIPseeker 1.40.0

+ Bioconductor RELEASE_3_19 (2024-05-15, Wed)

# ChIPseeker 1.38.0

+ Bioconductor RELEASE_3_18 (2023-10-25, Wed)

# ChIPseeker 1.36.0

+ Bioconductor RELEASE_3_17 (2023-05-03, Wed)

# ChIPseeker 1.35.3

+ fixed R check by removing calling `BiocStyle::Biocpkg()` in vignette, instead we use `yulab.utils::Biocpkg()` (2023-04-11, Tue)

# ChIPseeker 1.35.2

+ fixed R check by adding 'prettydoc' to Suggests (2023-04-04, Tue)

# ChIPseeker 1.35.1

+ use `ggplot` to plot heatmap (2022-12-30, Fri, #203)
+ update startup message to display the 'Current Protocols (2022)' paper. 

# ChIPseeker 1.34.0

+ Bioconductor RELEASE_3_16 (2022-11-02, Wed)


# ChIPseeker 1.33.4

+ add citation Q. Wang (2022) (2022-10-29, Sat)

# ChIPseeker 1.33.3

+ allows passing user defined color to `vennpie()` (2022-10-20, Thu, #202, #207)
+ add `columns` paramter to `annotatePeak()` to better support passing `EnsDb` to `annoDb` (#193, #205)
+ export `getAnnoStat()` (#200, #204)

# ChIPseeker 1.33.2

+ supports `by = "ggVennDiagram"` in `vennplot` function (2022-09-13, Tue)

# ChIPseeker 1.33.1

+ `plotPeakProf()` allows passing GRanges object or a list of GRanges objects to TxDb parameter (2022-06-04, Sat)
+ add test files for `getTagMatrix()` and `plotTagMatrix()`
+ `getBioRegion()` supports UTR regions (3'UTR + 5'UTR)
+ `makeBioRegionFromGranges()` supports generating windoes from self-made GRanges object
+ allow specify colors in `covplot()` (2022-05-09, Mon, #185, #188)

# ChIPseeker 1.32.0

+ Bioconductor 3.15 release

# ChIPseeker 1.31.4

+ `readPeakFile` now supports `.broadPeak` and `.gappedPeak` files (2021-12-17, Fri, #173) 

# ChIPseeker 1.31.3

+ bug fixed of determining promoter region in minus strand (2021-12-16, Thu, #172)

# ChIPseeker 1.31.2

+ update vignette

# ChIPseeker 1.31.1

+ bug fixed to take strand information (2021-11-10, Wed, #167)

# ChIPseeker 1.30.0

+ Bioconductor 3.14 release

# ChIPseeker 1.29.2

+ extend functions for plotting peak profiles to support other types of bioregions (2021-10-15, Fri, @MingLi-929, #156, #160, #162, #163)

# ChIPseeker 1.29.1

+ add example for `seq2gene` function (2021-05-21, Fri)

# ChIPseeker 1.28.0

+ Bioconductor 3.13 release (2021-05-20, Thu)

# ChIPseeker 1.27.5

+ update GEO data (103398/1973025 GSM) (2021-05-14, Fri)

# ChIPseeker 1.27.4

+ bug fixed in determine downstream gene (2021-04-27, Thu)
  - <https://github.com/YuLab-SMU/ChIPseeker/pull/148>
+ `getBioRegion` now supports '3UTR' and '5UTR' (2021-03-30, Tue)
  - <https://github.com/YuLab-SMU/ChIPseeker/pull/146>

# ChIPseeker 1.27.3

+ add two parameter, cex and radius, to `plotAnnoPie` (2021-03-12, Fri)
  - <https://github.com/YuLab-SMU/ChIPseeker/pull/144>

# ChIPseeker 1.27.2

+ bug fixed of `getGenomicAnnotation` (2021-03-03, Wed)
  - <https://github.com/YuLab-SMU/ChIPseeker/issues/142>

# ChIPseeker 1.27.1

+ Add support for `EnsDb` annotation databases in `annotatePeak`. 
  - <https://github.com/YuLab-SMU/ChIPseeker/pull/120>

# ChIPseeker 1.26.0

+ Bioconductor 3.12 release (2020-10-28, Wed)


# ChIPseeker 1.23.1

+ update GEO data (51079/762820 GSM) (2019-12-20, Fri)

# ChIPseeker 1.22.0

+ Bioconductor 3.10 release
 
# ChIPseeker 1.21.1

+ new implementation of `upsetplot` (2019-08-29, Thu)
  - use `ggupset`, `ggimage` and `ggplotify`
+ `subset` method for `csAnno` object (2019-08-27, Tue)

# ChIPseeker 1.20.0

+ Bioconductor 3.9 release

# ChIPseeker 1.19.1

+ add `origin_label = "TSS"` parameter to `plotAvgProf` (2018-12-12, Wed)
  - <https://github.com/GuangchuangYu/ChIPseeker/issues/91>
  
# ChIPseeker 1.18.0

+ Bioconductor 3.8 release

# ChIPseeker 1.17.2

+ add `flip_minor_strand` parameter in `getTagMatrix` (2018-08-10, Fri)
  - should set to FALSE if windows if not symetric
  
# ChIPseeker 1.17.1

+ fixed issue of `vennpie` by adding pseudo-count +1 (2018-07-21, Sat)
  - <https://www.biostars.org/p/326456/>

# ChIPseeker 1.16.0

+ Bioconductor 3.7 release

# ChIPseeker 1.15.4

+ If the required input is a named list and user input a list without name,
  set the name automatically and throw warning msg instead of error <2018-03-14,
  Wed>
    - <https://support.bioconductor.org/p/106903/#106936>
+ change `plotAvgProf`'s default y label <2018-03-14, Wed>
    - <https://github.com/GuangchuangYu/ChIPseeker/issues/76>
+ plotAnnoBar now visualize barplot according to the order of input list
  (y-axis) (2018-02-27, Tue)
    - <https://github.com/GuangchuangYu/ChIPseeker/issues/73>
+ follow renaming of RangesList class -> IntegerRangesList in IRanges v2.13.12
    - <https://github.com/GuangchuangYu/ChIPseeker/commit/b62d7922fb61e58620bbb685e4def4fb863c8e81>

# ChIPseeker 1.15.3

+ options to ignore '1st exon', '1st intron', 'downstream' and promoter
  subcategory when summarizing result and visualization (2018-01-09, Tue)
    - <https://support.bioconductor.org/p/104676/#104689>
+ throw msg of 'file not found and skip' when requested url is not available
  when downloading BED file from GEO (2017-12-28, Thu)
    - <https://support.bioconductor.org/p/104491/#104507>
+ bug fixed of getGene (2017-12-27, Wed)


================================================
FILE: R/AllGenerics.R
================================================
##' vennpie method generics
##'
##'
##' @docType methods
##' @name vennpie
##' @rdname vennpie-methods
##' @export
setGeneric("vennpie", 
  function(x, r = 0.2, cex = 1.2, ...) 
  standardGeneric("vennpie")
)


##' plotDistToTSS method generics
##'
##'
##' @docType methods
##' @name plotDistToTSS
##' @rdname plotDistToTSS-methods
##' @export
setGeneric("plotDistToTSS", 
  function(x, 
    distanceColumn="distanceToTSS",
    xlab="", ylab="Binding sites (%) (5'->3')",
    title="Distribution of transcription factor-binding loci relative to TSS", 
    ...)
  standardGeneric("plotDistToTSS")
)

##' plotAnnoBar method generics
##'
##'
##' @docType methods
##' @name plotAnnoBar
##' @rdname plotAnnoBar-methods
##' @export
setGeneric("plotAnnoBar", 
  function(x,
    xlab="",
    ylab="Percentage(%)",
    title="Feature Distribution",
    ...)
  standardGeneric("plotAnnoBar")
)


##' plotAnnoPie method generics
##'
##'
##' @docType methods
##' @name plotAnnoPie
##' @rdname plotAnnoPie-methods
##' @export
setGeneric("plotAnnoPie", 
  function(x, 
    ndigit=2,
    cex=0.9,
    col=NA,
    legend.position="rightside",
    pie3D=FALSE,
    radius=0.8,
    ...)
  standardGeneric("plotAnnoPie")
)


================================================
FILE: R/ChIPseeker-package.R
================================================
#' @keywords internal
"_PACKAGE"


##' Information Datasets
##' 
##' ucsc genome version, precalcuated data and gsm information
##' 
##' @name info
##' @aliases ucsc_release
##' gsminfo
##' tagMatrixList
##' @docType data
##' @keywords datasets
NULL

##' Name of the ChIPseeker cache environment (internal static variable)
##' @format character vector 
ChIPseekerCache <- "ChIPseekerEnv"


================================================
FILE: R/GEO.R
================================================
########################################
##                                    ##
## data last update: Mar 03, 2015     ##
##                                    ##
########################################


##' accessing species statistics collecting from GEO database
##'
##'
##' @title getGEOspecies
##' @return data.frame
##' @author G Yu
##' @export
getGEOspecies <- function() {
    gsminfo <- get_gsminfo()
    species <- gsminfo$organism
    res <- as.data.frame(table(species))
    return(res)
}

##' get genome version statistics collecting from GEO ChIPseq data
##'
##'
##' @title getGEOgenomeVersion
##' @return data.frame
##' @author G Yu
##' @export
getGEOgenomeVersion <- function() {
    gsminfo <- get_gsminfo()
    gv <- gsminfo[, c("organism",
                      "genomeVersion")]
    genomeVersion <- gv$genomeVersion
    res <- as.data.frame(table(genomeVersion))
    gv <- unique(gv)

    res <- merge(gv, res, by.x="genomeVersion", by.y="genomeVersion", all.y=TRUE)
    res <- res[, c("organism", "genomeVersion", "Freq")]
    return(res)
}

##' get subset of GEO information by genome version keyword
##'
##'
##' @title getGEOInfo
##' @param genome genome version
##' @param simplify simplify result or not
##' @return data.frame
##' @author G Yu
##' @export
getGEOInfo <- function(genome, simplify =TRUE) {
    gsminfo <- get_gsminfo()
    genomeVersion <- NULL ## to satisfy codetools
    res <- subset(gsminfo, subset = genomeVersion == genome)
    if (simplify) {
        res <- res[,c("series_id", "gsm", "organism", "title", "supplementary_file", "genomeVersion", "pubmed_id")]
    }
    return(res)
}

##' download all BED files of a particular genome version
##'
##'
##' @title downloadGEObedFiles
##' @param genome genome version
##' @param destDir destination folder
##' @return NULL
##' @author G Yu
##' @export
downloadGEObedFiles <- function(genome, destDir=getwd()) {
    info <- getGEOInfo(genome)
    downloadGEO.internal(info, destDir)
}

##' download BED supplementary files of a list of GSM accession numbers
##'
##'
##' @title downloadGSMbedFiles
##' @param GSM GSM accession numbers
##' @param destDir destination folder
##' @return NULL
##' @author G Yu
##' @export
downloadGSMbedFiles <- function(GSM, destDir=getwd()) {
    gsminfo <- get_gsminfo()
    info <- gsminfo[gsminfo$gsm %in% GSM,]
    downloadGEO.internal(info, destDir)
}

##' @importFrom utils download.file
downloadGEO.internal <- function(info, destDir) {
    fnames <- as.character(info$supplementary_file)
    destfiles <- sub(".*\\/", paste(destDir, "/", sep=""), fnames)
    names(destfiles) <- NULL

    for (i in seq_along(fnames)) {
        if ( ! file.exists(destfiles[i]) )
            tryCatch(download.file(fnames[i],
                          destfile=destfiles[i],
                          mode="wb"),
                     error = function(e) message(fnames[i], ': file not found and skip'))
    }
}

##' @importFrom utils data
## @importFrom GEOmetadb
## @importFrom RSQLite dbConnect
## @importFrom RSQLite dbGetQuery
prepareGSMInfo <- function() {
    pkg <- "GEOmetadb"
    require(pkg, character.only=TRUE)
    getSQLiteFile <- eval(parse(text="getSQLiteFile"))
    ## get the latest version of sql file
    is.dl <- tryCatch(getSQLiteFile(), error = function(e) NULL)

    if (is.null(is.dl)) {
        url <- 'http://starbuck1.s3.amazonaws.com/sradb/GEOmetadb.sqlite.gz'
        HEAD <- eval(parse(text = "httr::HEAD"))
        hh <- HEAD(url)
        size <- hh$headers[["content-length"]]
        cmd <- paste('wget -c', url)
        while(file.info("GEOmetadb.sqlite.gz")$size < size) {
            system(cmd)
        }
        if (file.exists('GEOmetadb.sqlite') && file.exists('GEOmetadb.sqlite.gz')) {
            file.remove("GEOmetadb.sqlite")
        }
        system('gunzip GEOmetadb.sqlite.gz')
    }

    GEOmetadbFile="GEOmetadb.sqlite"
    file.info(GEOmetadbFile)

    sqlpkg <- "RSQLite"
    require(sqlpkg, character.only=TRUE)
    dbConnect <- eval(parse(text="dbConnect"))
    dbGetQuery <- eval(parse(text="dbGetQuery"))
    SQLite <- eval(parse(text="SQLite"))

    con <- dbConnect(SQLite(),GEOmetadbFile)
    ## dbListTables(con)


    pkg <- "GEOquery"
    require(pkg, character.only=TRUE)
    getGEO <- eval(parse(text="getGEO"))
    Meta <- eval(parse(text="Meta"))

    ## get all GPL IDs
    ## download soft using gpl = getGEO("GPLXXX")
    ## using Meta(gpl) find the technology match sequencing
    ## get all gsm IDs
    ## parse it

    gpl <- dbGetQuery(con, 'select gpl, technology from gpl')
    gpl <- gpl[gpl[,2] == "high-throughput sequencing",1]
    gpl <- gpl[!is.na(gpl)]

    ## save the processedGSM vector that contain all the GSM that have been processed.
    ## next time when preparing GSMInfo, filter those have been processed before.
    load(system.file("extdata/processedGSM.rda", package="ChIPseeker"))
    processedGSM <- get("processedGSM")
    newGSM <- c()

    gpldir <- "GPL"
    if (!file.exists(gpldir)) {
        dir.create(gpldir)
    }

    for (gid in gpl) {
        gg <- tryCatch(getGEO(gid, destdir=gpldir), error=function(e) NULL)
        if (is.null(gg)) {
            next
        }
        gsm <- Meta(gg)$sample_id
        gsm <- gsm[! (gsm %in% processedGSM) ]
        if (length(gsm) == 0) {
            next
        }
        newGSM <- c(newGSM, gsm)

        sf <- batchGetGSMsuppFile(gsm)
        if (!is.null(sf)) {
            save(sf, file=paste(gid, "_sf.rda", sep=""))
        }
    }

    processedGSM <- c(processedGSM, newGSM)
    processedGSM <- unique(processedGSM)
    save(processedGSM, file="../processedGSM.rda", compress="xz")


    sfiles <- list.files(pattern="_sf.rda")
    res <- data.frame(gsm=NULL, remoteFile=NULL)
    for (ff in sfiles) {
        load(ff)
        if (!is.null(sf)) {
            res <- rbind(res, sf)
        }
    }
    colnames(res)[2] <- "supplementary_file"


    GSMInfo <- lapply(unique(as.character(res$gsm)), function(i) {
        dbGetQuery(con,paste("select gsm,series_id,gpl,organism_ch1,title,characteristics_ch1,source_name_ch1,extract_protocol_ch1,description,data_processing,submission_date ",
                             "from gsm where gsm='", i, "'", sep=""))
    })

    GSMInfo <- do.call("rbind", GSMInfo)

    colnames(GSMInfo) <- sub("_ch1", "", colnames(GSMInfo))

    gsminfo <- merge(GSMInfo, res, by.x="gsm", by.y="gsm")

    tryCatch(utils::data("ucsc_release", package="ChIPseeker"))
    ucsc_release <- get("ucsc_release")

    genVer <- lapply(1:nrow(gsminfo), function(i)
                     getGenomicVersion(ucsc_release,
                                       gsminfo[i, "data_processing"],
                                       gsminfo[i, "organism"],
                                       gsminfo[i, "supplementary_file"])
                     )

    gsminfo$genomeVersion <- unlist(genVer)

    gse <- as.character(gsminfo$series_id)
    pubmed <- lapply(gse, function(i) {
        dbGetQuery(con,paste("select gse,pubmed_id ",
                             "from gse where gse='", i, "'", sep=""))
    })
    pm <- do.call(rbind, pubmed)
    pm <- unique(pm)
    gsminfo <- merge(gsminfo, pm, by.x="series_id", by.y="gse", all.x=TRUE)

    ## remove non-ASCII characters
    for(i in 1:ncol(gsminfo)) {
        gsminfo[,i] = iconv(gsminfo[,i], "latin1", "ASCII", sub="")
    }
    gsminfo2 <- gsminfo
    rm(gsminfo)

    utils::data("gsminfo", package="ChIPseeker")
    gsminfo <- get("gsminfo")
    gsminfo <- rbind(gsminfo, gsminfo2)
    gsminfo <- unique(gsminfo)

    save(gsminfo, file="../gsminfo.rda", compress="xz")
}


getGenomicVersion <- function(ucsc_release, data_processing, organism, supplementary_file) {
    data_processing <- as.character(data_processing)
    organism <- as.character(organism)
    supplementary_file <- as.character(supplementary_file)

    species <- NULL
    gs <- subset(ucsc_release, subset = species == organism)
    if (nrow(gs) == 0) return(NA)

    genMatch <- unlist(sapply(gs$ucsc_version, grep, data_processing))
    if (length(genMatch) == 0) {
        genMatch <- unlist(sapply(gs$ucsc_version, grep, supplementary_file))
        if (length(genMatch) == 0) {
            return(NA)
        }
    }

    genVer <- names(genMatch)
    if (length(genVer) > 1) {
        genVer <- genVer[1]
    }

    return(genVer)
}

## getGSE_ENCODE <- function() {
##     encode=readLines("http://www.ncbi.nlm.nih.gov/geo/info/ENCODE.html")
##     encode.chipseq <- encode[grep("ChIP-Seq", encode)]
##     ## require(gsubfn)
##     ## gse <- sapply(encode.chipseq, function(i) {
##     ##     res <- strapply(i, "(GSE\\d+)")
##     ##     unique(unlist(res))
##     ## })
##     gse <- sapply(encode.chipseq, gsub, pattern='.*(GSE\\d+).*', replacement='\\1')
##     names(gse) <- NULL
##     return(gse)
## }

## GSE2GSM <- function(GSE) {
##     info <- getGEO(GSE, GSEMatrix=FALSE)
##     metaInfo <- Meta(info)
##     gsm <- metaInfo$sample_id
##     return(gsm)
## }

##' @importFrom parallel mclapply
##' @importFrom parallel detectCores
batchGetGSMsuppFile <- function(gsm) {
    suppfiles <- mclapply(seq_along(gsm), function(i) {
        cat("processing ", gsm[i], "\t",  i , " of ", length(gsm), "\n")
        tryCatch(getGSMsuppFile(gsm[i]), error=function(e) NULL)
    }, mc.cores=detectCores())

    suppfiles <- suppfiles[!unlist(lapply(suppfiles, is.null))]

    sf <- do.call("rbind", suppfiles)
    return(sf)
}

getGSMsuppFile <- function(GSM) {
    pkg <- "GEOquery"
    require(pkg, character.only=TRUE)
    getGEO <- eval(parse(text="getGEO"))
    Meta <- eval(parse(text="Meta"))

    destdir="geo_soft"
    if (!file.exists(destdir)) {
        dir.create(destdir)
    }
    info <- getGEO(GSM, GSEMatrix=FALSE, destdir=destdir)
    ## http://www.ncbi.nlm.nih.gov/geo/info/soft2.html
    metaInfo <- Meta(info)

    ## suppmentary file names
    fnames <- unlist(metaInfo[grep("supplementary", names(metaInfo))])
    names(fnames) <- NULL
    i <- c(grep("bed.gz", fnames), grep("Peak.gz", fnames), grep("bedGraph.gz", fnames))

    if (length(i) == 0) {
        message("No bed files found")
        return(NULL)
    }
    fnames <- fnames[i]
    res <- data.frame(gsm=GSM, remoteFile = fnames)
    return(res)
}

get_gsminfo <- function() {
    tryCatch(utils::data("gsminfo", package="ChIPseeker"))
    gsminfo <- get("gsminfo")
    return(gsminfo)
}


================================================
FILE: R/addGeneAnno.R
================================================
##' get gene annotation, symbol, gene name etc.
##'
##'
##' @title getGeneAnno
##' @param annoDb annotation package
##' @param geneID query geneID
##' @param type gene ID type
##' @param columns names of columns to be obtained from database
##' @return data.frame
##' @importFrom AnnotationDbi select
##' @author G Yu
getGeneAnno <- function(annoDb, geneID, type, columns){
    kk <- unlist(geneID)
    require(annoDb, character.only = TRUE)
    annoDb <- eval(parse(text=annoDb))

    if (type == "Entrez Gene ID") {
        kt <- "ENTREZID"
    } else if (type =="Ensembl gene ID" || type == "Ensembl Gene ID") {
        kt <- "ENSEMBL"
    } else {
        message("geneID type is not supported...\tPlease report it to developer...\n")
        return(NA)
    }

    i <- which(!is.na(kk))
    kk <- gsub("\\.\\d+$", "", kk)
    ann <- tryCatch(
        suppressWarnings(select(annoDb,
                                keys=unique(kk[i]),
                                keytype=kt,
                                columns=columns)),
        error = function(e) NULL)

    if (is.null(ann)) {
        warning("ID type not matched, gene annotation will not be added...")
        return(NA)
    }
    idx <- getFirstHitIndex(ann[,kt])
    ann <- ann[idx,]

    ## idx <- unlist(sapply(kk, function(x) which(x==ann[,kt])))
    ## res <- matrix(NA, ncol=ncol(ann), nrow=length(kk)) %>% as.data.frame
    ## colnames(res) <- colnames(ann)
    ## res[i,] <- ann[idx,]

    rownames(ann) <- ann[, kt]
    res <- ann[as.character(kk),]

    return(res)
}


addGeneAnno <- function(peak.gr, annoDb, type, columns) {
    geneAnno <- getGeneAnno(annoDb, peak.gr$geneId, type, columns)
    if (! all(is.na(geneAnno))) {
        for(cn in colnames(geneAnno)[-1]) {
            mcols(peak.gr)[[cn]] <- geneAnno[, cn]
        }
    }
    return(peak.gr)
}


================================================
FILE: R/annotatePeak.R
================================================
##' Annotate peaks
##'
##'
##' @title annotatePeak
##' @param peak peak file or GRanges object
##' @param tssRegion Region Range of TSS
##' @param TxDb TxDb or EnsDb annotation object
##' @param level one of transcript and gene
##' @param assignGenomicAnnotation logical, assign peak genomic annotation or not
##' @param genomicAnnotationPriority genomic annotation priority
##' @param annoDb annotation package
##' @param addFlankGeneInfo logical, add flanking gene information from the peaks
##' @param flankDistance distance of flanking sequence
##' @param sameStrand logical, whether find nearest/overlap gene in the same strand
##' @param ignoreOverlap logical, whether ignore overlap of TSS with peak
##' @param ignoreUpstream logical, if True only annotate gene at the 3' of the peak.
##' @param ignoreDownstream logical, if True only annotate gene at the 5' of the peak.
##' @param overlap one of 'TSS' or 'all', if overlap="all", then gene overlap with peak will be reported as nearest gene, no matter the overlap is at TSS region or not.
##' @param verbose print message or not
##' @param columns names of columns to be obtained from database
##' @return data.frame or GRanges object with columns of:
##'
##' all columns provided by input.
##'
##' annotation: genomic feature of the peak, for instance if the peak is
##' located in 5'UTR, it will annotated by 5'UTR. Possible annotation is
##' Promoter-TSS, Exon, 5' UTR, 3' UTR, Intron, and Intergenic.
##'
##' geneChr: Chromosome of the nearest gene
##'
##' geneStart: gene start
##'
##' geneEnd: gene end
##'
##' geneLength: gene length
##'
##' geneStrand: gene strand
##'
##' geneId: entrezgene ID
##'
##' distanceToTSS: distance from peak to gene TSS
##'
##' if annoDb is provided, extra column will be included:
##'
##' ENSEMBL: ensembl ID of the nearest gene
##'
##' SYMBOL: gene symbol
##'
##' GENENAME: full gene name
##' @import BiocGenerics S4Vectors GenomeInfoDb
##' @examples
##' \dontrun{
##' require(TxDb.Hsapiens.UCSC.hg19.knownGene)
##' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
##' peakfile <- system.file("extdata", "sample_peaks.txt", package="ChIPseeker")
##' peakAnno <- annotatePeak(peakfile, tssRegion=c(-3000, 3000), TxDb=txdb)
##' peakAnno
##' }
##' @seealso \code{\link{plotAnnoBar}} \code{\link{plotAnnoPie}} \code{\link{plotDistToTSS}}
##' @export
##' @author G Yu
annotatePeak <- function(peak,
                         tssRegion=c(-3000, 3000),
                         TxDb=NULL,
                         level = "transcript",
                         assignGenomicAnnotation=TRUE,
                         genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic"),
                         annoDb=NULL,
                         addFlankGeneInfo=FALSE,
                         flankDistance=5000,
                         sameStrand = FALSE,
                         ignoreOverlap=FALSE,
                         ignoreUpstream=FALSE,
                         ignoreDownstream=FALSE,
                         overlap = "TSS",
                         verbose=TRUE,
                         columns=c("ENTREZID", "ENSEMBL", "SYMBOL", "GENENAME")) {

    is_GRanges_of_TxDb <- FALSE
    if (is(TxDb, "GRanges")) {
        is_GRanges_of_TxDb <- TRUE
        assignGenomicAnnotation <- FALSE
        annoDb <- NULL
        addFlankGeneInfo <- FALSE
        message("#\n#.. 'TxDb' is a self-defined 'GRanges' object...\n#")
        message("#.. Some parameters of 'annotatePeak' will be disable,")
        message("#.. including:")
        message("#..\tlevel, assignGenomicAnnotation, genomicAnnotationPriority,")
        message("#..\tannoDb, addFlankGeneInfo and flankDistance.")
        message("#\n#.. Some plotting functions are designed for visualizing genomic annotation")
        message("#.. and will not be available for the output object.\n#")
    }

    if (is_GRanges_of_TxDb) {
        level <- "USER_DEFINED"
    } else {
        level <- match.arg(level, c("transcript", "gene"))
    }

    if (assignGenomicAnnotation && all(genomicAnnotationPriority %in% c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic")) == FALSE) {
        stop('genomicAnnotationPriority should be any order of c("Promoter", "5UTR", "3UTR", "Exon", "Intron", "Downstream", "Intergenic")')
    }

    if ( is(peak, "GRanges") ){
        ## this test will be TRUE
        ## when peak is an instance of class/subclass of "GRanges"
        input <- "gr"
        peak.gr <- peak
    } else {
        input <- "file"
        peak.gr <- loadPeak(peak, verbose)
    }

    peakNum <- length(peak.gr)

    if (verbose)
        cat(">> preparing features information...\t\t",
            format(Sys.time(), "%Y-%m-%d %X"), "\n")

    if (is_GRanges_of_TxDb) {
        features <- TxDb
    } else {
        TxDb <- loadTxDb(TxDb)

        if (level=="transcript") {
            features <- getGene(TxDb, by="transcript")
        } else {
            features <- getGene(TxDb, by="gene")
        }
    }
    if (verbose)
        cat(">> identifying nearest features...\t\t",
            format(Sys.time(), "%Y-%m-%d %X"), "\n")

    ## nearest features
    idx.dist <- getNearestFeatureIndicesAndDistances(peak.gr, features,
                                                     sameStrand, ignoreOverlap,
                                                     ignoreUpstream,ignoreDownstream,
                                                     overlap=overlap)

    if (verbose)
        cat(">> calculating distance from peak to TSS...\t",
            format(Sys.time(), "%Y-%m-%d %X"), "\n")
    ## distance
    distance <- idx.dist$distance

    ## update peak, remove un-map peak if exists.
    peak.gr <- idx.dist$peak

    ## annotation
    if (assignGenomicAnnotation == TRUE) {
        if (verbose)
            cat(">> assigning genomic annotation...\t\t",
                format(Sys.time(), "%Y-%m-%d %X"), "\n")

        anno <- getGenomicAnnotation(peak.gr, distance, tssRegion, TxDb, level, genomicAnnotationPriority, sameStrand=sameStrand)
        annotation <- anno[["annotation"]]
        detailGenomicAnnotation <- anno[["detailGenomicAnnotation"]]
    } else {
        annotation <- NULL
        detailGenomicAnnotation <- NULL
    }

    ## append annotation to peak.gr
    if (!is.null(annotation))
        mcols(peak.gr)[["annotation"]] <- annotation


    has_nearest_idx <- which(idx.dist$index <= length(features))
    nearestFeatures <- features[idx.dist$index[has_nearest_idx]]

    ## duplicated names since more than 1 peak may annotated by only 1 gene
    names(nearestFeatures) <- NULL
    nearestFeatures.df <- as.data.frame(nearestFeatures)
    if (is_GRanges_of_TxDb) {
        colnames(nearestFeatures.df)[1:5] <- c("geneChr", "geneStart", "geneEnd",
                                          "geneLength", "geneStrand")
    } else if (level == "transcript") {
        if (is(TxDb, "EnsDb")) {
            nearestFeatures.df <- nearestFeatures.df[, c("seqnames", "start",
                                                         "end", "width",
                                                         "strand", "gene_id",
                                                         "tx_id", "tx_biotype"),
                                                     drop = FALSE]
            colnames(nearestFeatures.df) <- c(
                "geneChr", "geneStart", "geneEnd", "geneLength", "geneStrand",
                "geneId", "transcriptId", "transcriptBiotype")
        } else {
            colnames(nearestFeatures.df) <- c("geneChr", "geneStart", "geneEnd",
                                              "geneLength", "geneStrand",
                                              "geneId", "transcriptId")
            nearestFeatures.df$geneId <- TXID2EG(
                as.character(nearestFeatures.df$geneId), geneIdOnly=TRUE)
        }
    } else {
        if (is(TxDb, "EnsDb")) {
            nearestFeatures.df <- nearestFeatures.df[, c("seqnames", "start",
                                                         "end", "width",
                                                         "strand", "gene_id",
                                                         "gene_biotype"),
                                                     drop = FALSE]
            colnames(nearestFeatures.df) <- c("geneChr", "geneStart", "geneEnd",
                                              "geneLength", "geneStrand",
                                              "geneId", "geneBiotype")
        } else
            colnames(nearestFeatures.df) <- c("geneChr", "geneStart", "geneEnd",
                                              "geneLength", "geneStrand",
                                              "geneId")
    }

    for(cn in colnames(nearestFeatures.df)) {
        mcols(peak.gr)[[cn]][has_nearest_idx] <- unlist(nearestFeatures.df[, cn])
    }

    mcols(peak.gr)[["distanceToTSS"]] <- distance

    if (!is.null(annoDb)) {
        if (verbose)
            cat(">> adding gene annotation...\t\t\t",
                format(Sys.time(), "%Y-%m-%d %X"), "\n")
        .idtype <- IDType(TxDb)
        if (length(.idtype) == 0 || is.na(.idtype) || is.null(.idtype)) {
            n <- length(peak.gr)
            if (n > 100)
                n <- 100
            sampleID <- peak.gr$geneId[1:n]

            if (all(grepl('^ENS', sampleID))) {
                .idtype <- "Ensembl Gene ID"
            } else if (all(grepl('^\\d+$', sampleID))) {
                .idtype <- "Entrez Gene ID"
            } else {
                warning("Unknown ID type, gene annotation will not be added...")
                .idtype <- NA
            }
        }

        if (!is.na(.idtype)) {
            peak.gr %<>% addGeneAnno(annoDb, .idtype, columns)
        }
    }

    if (addFlankGeneInfo == TRUE) {
        if (verbose)
            cat(">> adding flank feature information from peaks...\t",
                format(Sys.time(), "%Y-%m-%d %X"), "\n")

        flankInfo <- getAllFlankingGene(peak.gr, features, level, flankDistance)

        if (level == "transcript") {
            mcols(peak.gr)[["flank_txIds"]] <- NA
            mcols(peak.gr)[["flank_txIds"]][flankInfo$peakIdx] <- flankInfo$flank_txIds
        }

        mcols(peak.gr)[["flank_geneIds"]] <- NA
        mcols(peak.gr)[["flank_gene_distances"]] <- NA

        mcols(peak.gr)[["flank_geneIds"]][flankInfo$peakIdx] <- flankInfo$flank_geneIds
        mcols(peak.gr)[["flank_gene_distances"]][flankInfo$peakIdx] <- flankInfo$flank_gene_distances

    }

    if (!is_GRanges_of_TxDb) {
        if(verbose)
            cat(">> assigning chromosome lengths\t\t\t",
                format(Sys.time(), "%Y-%m-%d %X"), "\n")

        peak.gr@seqinfo <- seqinfo(TxDb)[names(seqlengths(peak.gr))]
    }

    if(verbose)
        cat(">> done...\t\t\t\t\t",
            format(Sys.time(), "%Y-%m-%d %X"), "\n")

    if (assignGenomicAnnotation) {
        res <- new("csAnno",
                   anno = peak.gr,
                   tssRegion = tssRegion,
                   level=level,
                   hasGenomicAnnotation = TRUE,
                   detailGenomicAnnotation=detailGenomicAnnotation,
                   annoStat=getGenomicAnnoStat(peak.gr),
                   peakNum=peakNum
                   )
    } else {
        res <- new("csAnno",
                   anno = peak.gr,
                   tssRegion = tssRegion,
                   level=level,
                   hasGenomicAnnotation = FALSE,
                   peakNum=peakNum
                   )
    }

    return(res)
}


##' dropAnno
##'
##' drop annotation exceeding distanceToTSS_cutoff
##' @title dropAnno
##' @param csAnno output of annotatePeak
##' @param distanceToTSS_cutoff distance to TSS cutoff
##' @return csAnno object
##' @export
##' @author Guangchuang Yu
dropAnno <- function(csAnno, distanceToTSS_cutoff=10000) {
    idx <- which(abs(mcols(csAnno@anno)[["distanceToTSS"]]) < distanceToTSS_cutoff)
    csAnno@anno <- csAnno@anno[idx]
    csAnno@peakNum <- length(idx)
    if (csAnno@hasGenomicAnnotation) {
        csAnno@annoStat <- getGenomicAnnoStat(csAnno@anno)
        csAnno@detailGenomicAnnotation = csAnno@detailGenomicAnnotation[idx,]
    }
    csAnno
}


================================================
FILE: R/covplot.R
================================================

##' plot peak coverage
##'
##' 
##' @title covplot
##' @param peak peak file or GRanges object
##' @param weightCol weight column of peak
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param chrs selected chromosomes to plot, all chromosomes by default
##' @param xlim ranges to plot, default is whole chromosome
##' @param lower lower cutoff of coverage signal
##' @param fill_color specify the color/palette for the plot. Order matters
##' @return ggplot2 object
##' @import GenomeInfoDb
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 geom_segment
##' @importFrom ggplot2 geom_blank
##' @importFrom ggplot2 geom_rect
##' @importFrom ggplot2 facet_grid
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 theme_classic
##' @importFrom ggplot2 element_text
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 xlim
##' @importFrom ggplot2 ggtitle
##' @export
##' @author G Yu
covplot <- function(peak, weightCol=NULL,
                    xlab  = "Chromosome Size (bp)",
                    ylab  = "",
                    title = "ChIP Peaks over Chromosomes",
                    chrs  = NULL,
                    xlim  = NULL,
                    lower = 1,
                    fill_color = "black") {
    isList <- is.list(peak)
    if(!isList) {  # Note: don't support data.frame
        tm <- getChrCov(peak = peak, weightCol = weightCol, chrs = chrs, xlim = xlim, lower = lower)
    } else {
        ltm <- lapply(peak, getChrCov, weightCol = weightCol, chrs = chrs, xlim = xlim, lower = lower)
        if (is.null(names(ltm))) {
            nn <- paste0("peak", seq_along(ltm))
            warning("input is not a named list, set the name automatically to ", paste(nn, collapse = ' '))
            names(ltm) <- nn
        }
        tm <- dplyr::bind_rows(ltm, .id = ".id")
        chr.sorted <- sortChrName(as.character(unique(tm$chr)))
        tm$chr <- factor(tm$chr, levels = chr.sorted)
    }
    
    chr <- start <- end <- value <- .id <- NULL
    
    if(length(tm$chr) == 0){
        p <- ggplot(data.frame(x = 1)) + geom_blank()
    } else {
        p <- ggplot(tm, aes(start, value))
        
        ## p <- p + geom_segment(aes(x=start, y=0, xend=end, yend= value))
        if (isList) {
            if (length(fill_color) == length(peak) && all(is_valid_color(fill_color))){
                cols = fill_color
            } else {
                cols = generate_colors(fill_color, n = length(peak))
            }
            p <- p + geom_rect(aes(xmin = start, ymin = 0, xmax = end, ymax = value, fill = .id, color = .id)) +
                scale_color_manual(values = cols) +
                scale_fill_manual(values = cols)
        } else {
            p <- p + geom_rect(aes(xmin = start, ymin = 0, xmax = end, ymax = value), fill = fill_color, color = fill_color)
        }
        
        if(length(unique(tm$chr)) > 1) {
            p <- p + facet_grid(chr ~., scales="free")
        }
        
    }
    
    p <- p + theme_classic()
    p <- p + labs(x = xlab, y = ylab, title = title, fill = NULL, color = NULL)
    p <- p + scale_y_continuous(expand = c(0,0))
    p <- p + theme(strip.text.y=element_text(angle=360))
    p <- p + scale_x_continuous(labels = scales::label_number(scale_cut = scales::cut_si("")))
    
    if (!is.null(xlim) && !all(is.na(xlim)) && is.numeric(xlim) && length(xlim) == 2) {
        p <- p + xlim(xlim)
    }
    
    return(p)
}

##' @import S4Vectors IRanges
##' @importFrom dplyr group_by
##' @importFrom dplyr summarise
##' @importFrom magrittr %>%
getChrCov <- function(peak, weightCol, chrs, xlim, lower=1) {
    if (is(peak, "GRanges")) {
        peak.gr <- peak
    } else if (file.exists(peak)) {
        peak.gr <- readPeakFile(peak, as="GRanges")
    } else {
        stop("peak should be a GRanges object or a peak file...")
    }

    if ( is.null(weightCol)) {
        peak.cov <- coverage(peak.gr)
    } else {
        weight <- mcols(peak.gr)[[weightCol]]
        peak.cov <- coverage(peak.gr, weight=weight)
    }

    cov <- lapply(peak.cov, IRanges::slice, lower=lower)

    get.runValue <- function(x) {
        y <- runValue(x)
        sapply(y@listData, mean)
        ## value <- x@subject@values
        ## value[value != 0]
    }

    chr <- start <- end <- cnt <- NULL
    
    ldf <- lapply(1:length(cov), function(i) {
        x <- cov[[i]]
        if (length(x@ranges) == 0) {
            msg <- paste0(names(cov[i]),
                          " dosen't contain signal higher than ",
                          lower)
            message(msg)
            return(NA)
        }
        data.frame(chr   = names(cov[i]),
                   start = start(x),
                   end   = end(x),
                   cnt   = get.runValue(x)
                                        # the following versions are more slower
                                        # unlist(runValue(x)) 
                                        # sapply(x, runValue)
                   )
    })

    ldf <- ldf[!is.na(ldf)]
    df <- do.call("rbind", ldf)
    
    chr.sorted <- sortChrName(as.character(unique(df$chr)))
    df$chr <- factor(df$chr, levels=chr.sorted)
    if (!is.null(chrs) && !all(is.na(chrs)) && all(chrs %in% chr.sorted)) {
        df <- df[df$chr %in% chrs, ]
    }
    if (!is.null(xlim) && !all(is.na(xlim)) && is.numeric(xlim) && length(xlim) == 2) {
        df <- df[df$start >= xlim[1] & df$end <= xlim[2],]
    }

    df2 <- group_by(df, chr, start, end) %>% summarise(value=sum(cnt), .groups = "drop")
    return(df2)
}

# a simple `stringr::str_sort(numeric=TRUE)` implementation
sortChrName <- function(chr.name, decreasing = FALSE) {
    ## universal sort function, support organisms other than human
    chr_part <- sub("^(\\D*)(\\d*)$", "\\1", chr.name)
    num_part <- as.numeric(sub("^(\\D*)(\\d*)$", "\\2", chr.name))
    chr.name[order(chr_part, num_part, decreasing = decreasing)]
}


================================================
FILE: R/csAnno.R
================================================
##' Class "csAnno"
##' This class represents the output of ChIPseeker Annotation
##'
##'
##' @name csAnno-class
##' @aliases csAnno-class
##' show,csAnno-method vennpie,csAnno-method
##' plotDistToTSS,csAnno-method plotAnnoBar,csAnno-method
##' plotAnnoPie,csAnno-method upsetplot,csAnno-method
##' subset,csAnno-method
##'
##' @docType class
##' @slot anno annotation
##' @slot tssRegion TSS region
##' @slot level transcript or gene
##' @slot hasGenomicAnnotation logical
##' @slot detailGenomicAnnotation Genomic Annotation in detail
##' @slot annoStat annotation statistics
##' @slot peakNum number of peaks
##' @exportClass csAnno
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
##' @seealso \code{\link{annotatePeak}}
##' @keywords classes
setClass("csAnno",
         representation=representation(
             anno = "GRanges",
             tssRegion = "numeric",
             level = "character",
             hasGenomicAnnotation = "logical",
             detailGenomicAnnotation="data.frame",
             annoStat="data.frame",
             peakNum="numeric"
             ))


##' convert csAnno object to GRanges
##'
##'
##' @title as.GRanges
##' @param x csAnno object
##' @return GRanges object
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
##' @export
as.GRanges <- function(x) {
    if (!is(x, "csAnno"))
        stop("not supported...")
    return(x@anno)
}

##' getting status of annotation
##' 
##' 
##' @title getAnnoStat
##' @param x csAnno object
##' @export
getAnnoStat <- function(x) {
    if (!is(x, "csAnno"))
        stop("not supported...")
    return(x@annoStat)
}


##' Combine csAnno Object
##'
##'
##' https://github.com/YuLab-SMU/ChIPseeker/issues/157
##' @title combine_csAnno
##' @param x csAnno object
##' @param ... csAnno objects
##' @return csAnno object
##' @export
combine_csAnno <- function(x, ...){
    z <- list(x, ...)
    
    if(sum(vapply(z, function(x) !is(x, "csAnno"), FUN.VALUE = logical(1))) != 0){
        stop("not supported...")
    }
    
    if(length(z)<2){
        stop("need two or more csAnno object...")
    }
    
    
    if(sum(!duplicated(lapply(z, function(x) x@tssRegion[1]))) != 1 
       && sum(!duplicated(lapply(z, function(x) x@tssRegion[2]))) != 1){
        stop("the tss regions of different csAnno objects should be the same...")
    }
    
    if(sum(!duplicated(lapply(z, function(x) x@level))) != 1){
        stop("the level of different csAnno object should be the same...")
    }
    
    if(sum(!duplicated(lapply(z, function(x) x@hasGenomicAnnotation))) != 1){
        stop("the status of GenomicAnnotation should be the same...")
    }
    
    combine_tssRegion <- x@tssRegion
    combine_level <- x@level
    combine_hasGenomicAnnotation <- x@hasGenomicAnnotation
    
    combine_anno <- x@anno
    for(i in 2:length(z)){
        combine_anno <- c(combine_anno,z[[i]]@anno)
    }
    
    combine_detailGenomicAnnotation <- lapply(z, function(x) x@detailGenomicAnnotation)
    combine_detailGenomicAnnotation <- do.call("rbind",combine_detailGenomicAnnotation)
    
    combine_peakNum <- x@peakNum
    for(i in 2:length(z)){
        combine_peakNum <- combine_peakNum+z[[i]]@peakNum
    }
    
    feature <- x@annoStat$Feature
    for(i in 2:length(z)){
        if(length(feature)<length(z[[i]]@annoStat$Feature)){
            feature_levels <- levels(z[[i]]@annoStat$Feature)
            feature <- c(as.vector(feature),as.vector(z[[i]]@annoStat$Feature))
            feature <- feature[!duplicated(feature)]
            feature <- factor(feature, 
                              levels = feature_levels)
            feature <- sort(feature)
        }else{
            feature_levels <- levels(feature)
            feature <- c(as.vector(feature),as.vector(z[[i]]@annoStat$Feature))
            feature <- feature[!duplicated(feature)]
            feature <- factor(feature, 
                              levels = feature_levels)
            feature <- sort(feature)
        }
    }
    
    combine_annoStat <- data.frame(Feature=feature)
    
    for(i in 1:length(z)){
        combine_annoStat <- merge(combine_annoStat, z[[i]]@annoStat, 
                                  by = "Feature", all = T, sort = F)
        combine_annoStat[is.na(combine_annoStat)] <- 0
        combine_annoStat <- combine_annoStat[order(combine_annoStat$Feature),]
    }
    
    total <- (ncol(combine_annoStat)-1)*100
    combine_annoStat$sum <- rowSums(combine_annoStat[, 2:ncol(combine_annoStat)])
    
    
    for (i in 1:length(combine_annoStat$sum)) {
        combine_annoStat$result[i] <- (combine_annoStat$sum[i]/total)*100
    }
    
    annoStat_result <- data.frame(Feature=combine_annoStat[,1],Frequency=combine_annoStat[,ncol(combine_annoStat)])
    
    res <- new("csAnno",
               anno = combine_anno,
               tssRegion = combine_tssRegion,
               level = combine_level,
               hasGenomicAnnotation = combine_hasGenomicAnnotation,
               detailGenomicAnnotation = combine_detailGenomicAnnotation,
               annoStat = annoStat_result,
               peakNum = combine_peakNum
    )
    
    return(res)
}

##' vennpie method generics
##'
##' @name vennpie
##' @docType methods
##' @rdname vennpie-methods
##' 
##' @title vennpie method
##' @param x A \code{csAnno} instance
##' @param r initial radius
##' @param cex value to adjust legend
##' @param ... additional parameter
##' @return plot
##' @usage vennpie(x, r = 0.2, cex=1.2, ...)
##' @exportMethod vennpie
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
setMethod("vennpie", signature(x="csAnno"),
          function(x, 
                   r = 0.2, 
                   cex = 1.2, 
                   ...) {
            vennpie.csAnno(x, r, cex, ...)
          }
          )


##' upsetplot method generics
##'
##' @name upsetplot
##' @docType methods
##' @rdname upsetplot-methods
##'
##' @title upsetplot method
##' @param x A \code{csAnno} instance
##' @param ... additional parameter
##' @return plot
##' @usage upsetplot(x, ...)
##' @importFrom enrichplot upsetplot
##' @exportMethod upsetplot
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
setMethod("upsetplot", signature(x="csAnno"),
          function(x, ...) {
              upsetplot.csAnno(x, ...)
          }
          )

##' convert csAnno object to data.frame
##'
##'
##' @title as.data.frame.csAnno
##' @param x csAnno object
##' @param row.names row names
##' @param optional should be omitted.
##' @param ... additional parameters
##' @return data.frame
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
##' @method as.data.frame csAnno
##' @export
as.data.frame.csAnno <- function(x, row.names=NULL, optional=FALSE, ...) {
    y <- as.GRanges(x)
    if (!(is.null(row.names) || is.character(row.names)))
        stop("'row.names' must be NULL or a character vector")
    df <- as.data.frame(y)
    rownames(df) <- row.names
    return(df)
}

##' show method for \code{csAnno} instance
##'
##' @name show
##' @docType methods
##' @rdname show-methods
##' @aliases show,csAnno,ANY-method
##' @title show method
##' @param object A \code{csAnno} instance
##' @return message
##' @importFrom methods show
##' @exportMethod show
##' @usage show(object)
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
setMethod("show", signature(object="csAnno"),
          function(object) {
              cat("Annotated peaks generated by ChIPseeker\n")
              cat(paste(length(object@anno), object@peakNum, sep="/"),
                  " peaks were annotated\n")
              if (object@hasGenomicAnnotation) {
                  cat("Genomic Annotation Summary:\n")
                  print(object@annoStat)
              }
          }
          )

##' plotAnnoBar method for list of \code{csAnno} instances
##'
##' @name plotAnnoBar
##' @docType methods
##' @rdname plotAnnoBar-methods
##' @aliases plotAnnoBar,list-method
##' @exportMethod plotAnnoBar
setMethod("plotAnnoBar", signature(x="list"),
          function(x,
                   xlab="",
                   ylab='Percentage(%)',
                   title="Feature Distribution",
                   ...) {
              if (is.null(names(x))) {
                  nn <- paste0("Peak", seq_along(x))
                  warning("input is not a named list, set the name automatically to ", paste(nn, collapse = " "))
                  names(x) <- nn
                  ## stop("input object should be a named list...")
              }
              anno <- lapply(x, getAnnoStat)
              ## anno.df <- ldply(anno)
              anno.df <- list_to_dataframe(anno)
              categoryColumn <- ".id"
              plotAnnoBar.data.frame(anno.df, xlab, ylab, title, categoryColumn)
          })

##' plotAnnoBar method for \code{csAnno} instance
##'
##' @name plotAnnoBar
##' @docType methods
##' @rdname plotAnnoBar-methods
##' @aliases plotAnnoBar,csAnno,ANY-method
##' @title plotAnnoBar method
##' @param x \code{csAnno} instance
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param ... additional paramter
##' @return plot
##' @exportMethod plotAnnoBar
##' @usage plotAnnoBar(x, xlab="", ylab='Percentage(\%)',title="Feature Distribution", ...)
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
setMethod("plotAnnoBar", signature(x="csAnno"),
          function(x,
                   xlab="",
                   ylab="Percentage(%)",
                   title="Feature Distribution",
                   ...) {
              anno.df <- getAnnoStat(x)
              categoryColumn <- 1
              plotAnnoBar.data.frame(anno.df, xlab, ylab, title, categoryColumn)
          })


##' plotAnnoPie method for \code{csAnno} instance
##'
##' @name plotAnnoPie
##' @docType methods
##' @rdname plotAnnoPie-methods
##' @aliases plotAnnoPie,csAnno,ANY-method
##' @title plotAnnoPie method
##' @param x \code{csAnno} instance
##' @param ndigit number of digit to round
##' @param cex label cex
##' @param col color
##' @param legend.position topright or other.
##' @param pie3D plot in 3D or not
##' @param radius radius of the pie
##' @param ... extra parameter
##' @return plot
##' @exportMethod plotAnnoPie
##' @usage plotAnnoPie(x,ndigit=2,cex=0.9,col=NA,legend.position="rightside",pie3D=FALSE,radius=0.8,...)
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
setMethod("plotAnnoPie", signature(x="csAnno"),
          function(x,
                   ndigit=2,
                   cex=0.9,
                   col=NA,
                   legend.position="rightside",
                   pie3D=FALSE,
                   radius=0.8,
                   ...){
              plotAnnoPie.csAnno(x, ndigit, cex, col, legend.position, pie3D, radius, ...)
          })


##' plotDistToTSS method for list of \code{csAnno} instances
##'
##' @name plotDistToTSS
##' @docType methods
##' @rdname plotDistToTSS-methods
##' @aliases plotDistToTSS,list-method
##' @exportMethod plotDistToTSS
setMethod("plotDistToTSS", signature(x="list"),
          function(x, distanceColumn="distanceToTSS",
                                     xlab="", ylab="Binding sites (%) (5'->3')",
                                     title="Distribution of transcription factor-binding loci relative to TSS",                       
                     distanceBreaks=c(0, 1000, 3000, 5000, 10000, 100000),
                     palette = NULL, ...) {
              if (is.null(names(x))) {
                  nn <- paste0("Peak", seq_along(x))
                  warning("input is not a named list, set the name automatically to ", paste(nn, collapse = " "))
                  names(x) <- nn
                  ## stop("input object should be a named list...")
              }

              peakAnno <- lapply(x, as.data.frame)
              ## peakDist <- ldply(peakAnno)
              peakDist <- list_to_dataframe(peakAnno)
              categoryColumn <- ".id"
              plotDistToTSS.data.frame(peakDist, distanceColumn = distanceColumn,
                                       distanceBreaks = distanceBreaks, palette = palette,
                                       xlab = xlab, ylab = ylab, title = title, categoryColumn = categoryColumn)
          })


##' plotDistToTSS method for \code{csAnno} instance
##'
##' @name plotDistToTSS
##' @docType methods
##' @rdname plotDistToTSS-methods
##' @aliases plotDistToTSS,csAnno,ANY-method
##' @title plotDistToTSS method
##' @param distanceColumn distance column name
##' @param distanceBreaks breaks of distance, default is 'c(0, 1000, 3000, 5000, 10000, 100000)'
##' @param palette palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.
##' @param x \code{csAnno} instance
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param ... additional parameter
##' @return plot
##' @exportMethod plotDistToTSS
##' @usage plotDistToTSS(x,distanceColumn="distanceToTSS", xlab="",
##' ylab="Binding sites (\%) (5'->3')",
##' title="Distribution of transcription factor-binding loci relative to TSS",...)
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
setMethod("plotDistToTSS", signature(x="csAnno"),
          function(x, distanceColumn="distanceToTSS",
                                     xlab="", ylab="Binding sites (%) (5'->3')",
                                     title="Distribution of transcription factor-binding loci relative to TSS", 
                                     distanceBreaks=c(0, 1000, 3000, 5000, 10000, 100000),
                                     palette = NULL,...) {
              peakDist <- as.data.frame(x)
              categoryColumn <- 1
              plotDistToTSS.data.frame(peakDist, distanceColumn = distanceColumn, distanceBreaks = distanceBreaks, palette = palette,
                                       xlab = xlab, ylab = ylab, title = title, categoryColumn = categoryColumn)
          })


================================================
FILE: R/dplyr-verb.R
================================================
# extend filter to Peak (GRanges class object)
#' @method filter GRanges
#' @importFrom dplyr filter
#' @export
filter.GRanges = function(.data, ..., .by = NULL, .preserve = FALSE) {
  dots = rlang::quos(...)
  as.data.frame(.data) |> 
    dplyr::filter(!!!dots, .by = .by, .preserve = .preserve) |> 
    droplevels() |> 
    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)
}

# extend mutate to Peak (GRanges class object)
#' @method mutate GRanges
#' @importFrom dplyr mutate
#' @export
mutate.GRanges = function(.data, ..., .by = NULL, 
                           .keep = c("all", "used", "unused", "none"),
                           .before = NULL,
                           .after = NULL) {
  dots = rlang::quos(...)
  df = as.data.frame(.data)
  
  if (!is.null(.before) && !is.null(.after)) {
    stop("You can't supply both `.before` and `.after`.")
  }
  
  if (!is.null(.before)) {
    df = df |> 
      dplyr::mutate(!!!dots, .by = .by, .keep = .keep, .before = .before)
  } else if (!is.null(.after)) {
    df = df |> 
      dplyr::mutate(!!!dots, .by = .by, .keep = .keep, .after = .after)
  } else {
    df = df |> dplyr::mutate(!!!dots, .by = .by, .keep = .keep)
  }
  
  df |> 
    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)
}

# S4Vectors::rename
#' @method rename GRanges
#' @importFrom rlang quos
#' @export
rename.GRanges = function(x, ...){
  dots = rlang::quos(...)
  as.data.frame(x) |> 
    dplyr::rename(!!!dots) |> 
    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)
}

#' @method arrange GRanges
#' @importFrom dplyr arrange
#' @export
arrange.GRanges = function(.data, ..., .by_group = FALSE){
  dots = rlang::quos(...)
  as.data.frame(.data) |> 
    dplyr::arrange(!!!dots, .by_group = .by_group) |> 
    GenomicRanges::makeGRangesFromDataFrame(keep.extra.columns = TRUE)
}


================================================
FILE: R/enrichOverlap.R
================================================
##' calcuate overlap significant of ChIP experiments based on their nearest gene annotation
##'
##'
##' @title enrichAnnoOverlap
##' @param queryPeak query bed file
##' @param targetPeak target bed file(s) or folder containing bed files
##' @param TxDb TxDb
##' @param pAdjustMethod pvalue adjustment method
##' @param chainFile chain file for liftOver
##' @param distanceToTSS_cutoff restrict nearest gene annotation by distance cutoff
##' @return data.frame
##' @importFrom stats p.adjust
##' @importFrom stats phyper
##' @export
##' @importFrom rtracklayer import.chain
##' @importFrom rtracklayer liftOver
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
##' @author G Yu
enrichAnnoOverlap <- function(queryPeak, targetPeak, TxDb=NULL, pAdjustMethod="BH", chainFile=NULL, distanceToTSS_cutoff=NULL) {

    TxDb <- loadTxDb(TxDb)

    query.anno <- annotatePeak(queryPeak, TxDb=TxDb,
                               assignGenomicAnnotation=FALSE, annoDb=NULL, verbose=FALSE)


    if (is(targetPeak[1], "GRanges") || is(targetPeak[[1]], "GRanges")) {
        target.gr <- targetPeak
        targetFiles <- NULL
    } else {
        targetFiles <- parse_targetPeak_Param(targetPeak)
        target.gr <- lapply(targetFiles, loadPeak)
    }

    if (!is.null(chainFile)) {
        chain <- import.chain(chainFile)
        target.gr <- lapply(target.gr, liftOver, chain=chain)
    }

    target.anno <- lapply(target.gr, annotatePeak, TxDb=TxDb,
                          assignGenomicAnnotation=FALSE, annoDb=NULL, verbose=FALSE)


    if (!is.null(distanceToTSS_cutoff)) {
        query.anno <- dropAnno(query.anno, distanceToTSS_cutoff)
        target.anno <- lapply(target.anno, dropAnno, distanceToTSS_cutoff = distanceToTSS_cutoff)
    }

    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)
    features <- get_cache_element(item = ChIPseekerCache, elements = "Transcripts")

    if(is.null(features)){
        features <- transcriptsBy(TxDb)
        features <- unlist(features)
        update_cache_item(item = ChIPseekerCache, list("Transcripts" = features))
    }

    # if ( exists("Transcripts", envir=ChIPseekerEnv, inherits=FALSE) ) {
    #     features <- get("Transcripts", envir=ChIPseekerEnv)
    # } else {
    #     features <- transcriptsBy(TxDb)
    #     features <- unlist(features)
    #     assign("Transcripts", features, envir=ChIPseekerEnv)
    # }

    ol <- lapply(target.anno, function(i) unique(intersect(as.GRanges(query.anno)$geneId, as.GRanges(i)$geneId)))
    oln <- unlist(lapply(ol, length))
    N <- length(features)
    ## white ball
    m <- length(unique(as.GRanges(query.anno)$geneId))
    ## black ball
    n <- N - m
    ## drawn
    k <- unlist(lapply(target.anno, function(i) length(unique(as.GRanges(i)$geneId))))
    p <- phyper(oln, m, n, k, lower.tail=FALSE)


    if (is(queryPeak, "GRanges")) {
        qSample <- "queryPeak"
    } else {
        qSample <- basename(queryPeak)
    }

    if (is.null(targetFiles)) {
        tSample <- names(target.gr)
        if(is.null(tSample)) {
            tSample <- paste0("targetPeak", seq_along(target.gr))
        }
    } else {
        tSample <- basename(targetFiles)
    }

    padj <- p.adjust(p, method=pAdjustMethod)
    res <- data.frame(qSample=qSample,
                      tSample=tSample,
                      qLen=length(unique(as.GRanges(query.anno)$geneId)),
                      tLen=unlist(lapply(target.anno, function(i) length(unique(as.GRanges(i)$geneId)))),
                      N_OL=oln,
                      pvalue=p,
                      p.adjust=padj)
    return(res)
}

##' calculate overlap significant of ChIP experiments based on the genome coordinations
##'
##'
##' @title enrichPeakOverlap
##' @param queryPeak query bed file or GRanges object
##' @param targetPeak target bed file(s) or folder that containing bed files or a list of GRanges objects
##' @param TxDb TxDb
##' @param pAdjustMethod pvalue adjustment method
##' @param nShuffle shuffle numbers
##' @param chainFile chain file for liftOver
##' @param pool logical, whether pool target peaks
##' @param mc.cores number of cores, see \link[parallel]{mclapply}
##' @param verbose logical
##' @return data.frame
##' @export
##' @importFrom rtracklayer import.chain
##' @importFrom rtracklayer liftOver
##' @author G Yu
enrichPeakOverlap <- function(queryPeak, targetPeak, TxDb=NULL, pAdjustMethod="BH", nShuffle=1000,
                              chainFile=NULL, pool=TRUE, mc.cores=detectCores()-1, verbose=TRUE) {
    TxDb <- loadTxDb(TxDb)
    query.gr <- loadPeak(queryPeak)
    if (is(targetPeak[1], "GRanges") || is(targetPeak[[1]], "GRanges")) {
        target.gr <- targetPeak
        targetFiles <- NULL
    } else {
        targetFiles <- parse_targetPeak_Param(targetPeak)
        target.gr <- lapply(targetFiles, loadPeak)
    }

    if (!is.null(chainFile)) {
        chain <- import.chain(chainFile)
        target.gr <- lapply(target.gr, liftOver, chain=chain)
    }

    if (pool) {
        p.ol <- enrichOverlap.peak.internal(query.gr, target.gr, TxDb, nShuffle,
                                            mc.cores=mc.cores,verbose=verbose)
    } else {
        res_list <- lapply(1:length(target.gr), function(i) {
            enrichPeakOverlap(queryPeak = queryPeak,
                              targetPeak = target.gr[i],
                              TxDb = TxDb,
                              pAdjustMethod = pAdjustMethod,
                              nShuffle = nShuffle,
                              chainFile = chainFile,
                              mc.cores = mc.cores,
                              verbose = verbose)
        })
        res <- do.call("rbind", res_list)
        return(res)
    }

    if (is.null(p.ol$pvalue)) {
        p <- padj <- NA
    } else {
        p <- p.ol$pvalue
        padj <- p.adjust(p, method=pAdjustMethod)
    }

    ol <- p.ol$overlap


    if (is(queryPeak, "GRanges")) {
        qSample <- "queryPeak"
    } else {
        ## remove path, only keep file name
        qSample <- basename(queryPeak)
    }

    if (is.null(targetFiles)) {
        tSample <- names(target.gr)
        if(is.null(tSample)) {
            tSample <- paste0("targetPeak", seq_along(target.gr))
        }
    } else {
        tSample <- basename(targetFiles)
    }

    res <- data.frame(qSample=qSample,
                      tSample=tSample,
                      qLen=length(query.gr),
                      tLen=unlist(lapply(target.gr, length)),
                      N_OL=ol,
                      pvalue=p,
                      p.adjust=padj)

    return(res)
}


##' shuffle the position of peak
##'
##'
##' @title shuffle
##' @param peak.gr GRanges object
##' @param TxDb TxDb
##' @return GRanges object
##' @export
##' @author G Yu
shuffle <- function(peak.gr, TxDb) {
    chrLens <- seqlengths(TxDb)[names(seqlengths(peak.gr))]
    nn <- as.vector(seqnames(peak.gr))
    ii <- order(nn)
    w <- width(peak.gr)
    nnt <- table(nn)
    jj <- order(names(nnt))
    nnt <- nnt[jj]
    chrLens <- chrLens[jj]
    ss <- unlist(sapply(1:length(nnt), function(i) sample(chrLens[i],nnt[i])))

    res <- GRanges(seqnames=nn[ii], ranges=IRanges(ss, width=w[ii]), strand="*")
    return(res)
}


##' @import GenomeInfoDb
##' @importFrom utils txtProgressBar
##' @importFrom utils setTxtProgressBar
##' @importFrom parallel mclapply
##' @importFrom parallel detectCores
enrichOverlap.peak.internal <- function(query.gr, target.gr, TxDb, nShuffle=1000, mc.cores=detectCores()-1, verbose=TRUE) {
    if (verbose) {
        cat(">> permutation test of peak overlap...\t\t",
            format(Sys.time(), "%Y-%m-%d %X"), "\n")
    }

    idx <- sample(1:length(target.gr), nShuffle, replace=TRUE)
    len <- unlist(lapply(target.gr, length))

    if(Sys.info()[1] == "Windows") {
        qLen <- lapply(target.gr, function(tt) {
            length(intersect(query.gr, tt))
        })
    } else {
        qLen <- mclapply(target.gr, function(tt) {
            length(intersect(query.gr, tt))
        }, mc.cores=mc.cores
                         )
    }
    qLen <- unlist(qLen)
    ## query ratio
    qr <- qLen/len

    if (nShuffle < 1) {
        res <- list(pvalue=NULL, overlap=qLen)
        return(res)
    }

    if (verbose) {
        pb <- txtProgressBar(min=0, max=nShuffle, style=3)
    }
    if(Sys.info()[1] == "Windows") {
        rr <- lapply(seq_along(idx), function(j) {
            if (verbose) {
                setTxtProgressBar(pb, j)
            }
            i <- idx[j]
            tarShuffle <- shuffle(target.gr[[i]], TxDb)
            length(intersect(query.gr, tarShuffle))/len[i]
        })
    } else {
        rr <- mclapply(seq_along(idx), function(j) {
            if (verbose) {
                setTxtProgressBar(pb, j)
            }
            i <- idx[j]
            tarShuffle <- shuffle(target.gr[[i]], TxDb)
            length(intersect(query.gr, tarShuffle))/len[i]
        }, mc.cores=mc.cores
                       )
    }

    if (verbose) {
        close(pb)
    }

    rr <- unlist(rr) ## random ratio

    ## p <- lapply(qr, function(q) mean(rr>q))
    p <- lapply(qr, function(q) (sum(rr>q)+1)/(length(rr)+1))
    res <- list(pvalue=unlist(p), overlap=qLen)
    return(res)
}


================================================
FILE: R/getFlankingGene.R
================================================

##' @import IRanges
##' @importFrom dplyr mutate
##' @importFrom dplyr group_by
getAllFlankingGene <- function(peak.gr, features, level="transcript", distance=5000) {
    peak.gr2 <- peak.gr
    start(ranges(peak.gr)) = start(ranges(peak.gr)) - distance
    end(ranges(peak.gr)) = end(ranges(peak.gr)) + distance
    hit <- findOverlaps(peak.gr, unstrand(features))
    qh <- queryHits(hit)
    sh <- subjectHits(hit)
    
    featureHit <- features[sh]
    names(featureHit)=NULL
    hitInfo <- as.data.frame(featureHit)

    if (level == "transcript") {
        eg <- TXID2EG(featureHit$tx_id, geneIdOnly=TRUE)
        hitInfo$geneId <- eg
    } else {
        cn <- colnames(hitInfo)
        colnames(hitInfo)[cn == "gene_id"] <- "geneId"
    }


    hitInfo$peakIdx <- qh

    overlapHit <- findOverlaps(peak.gr2, unstrand(featureHit))
    hitInfo$distance <- NA
    hitInfo$distance[subjectHits(overlapHit)] <- 0

    psD <- ifelse(strand(featureHit) == "+",
                  start(peak.gr2[qh]) - start(featureHit),
                  end(featureHit)-end(peak.gr2[qh]))
    
    peD <- ifelse(strand(featureHit) == "+",
                  end(peak.gr2[qh]) - start(featureHit),
                  end(featureHit)-start(peak.gr2[qh]))

    idx <- abs(psD) > abs(peD)
    dd <- psD
    dd[idx] <- peD[idx]

    ii <- is.na(hitInfo$distance)
    hitInfo$distance[ii] <- dd[ii]

    peakIdx <- tx_name <- geneId <- distance <- NULL

    if (level == "transcript") {
        hitInfo2 <- group_by(hitInfo, peakIdx) %>%
            mutate(flank_txIds=paste(tx_name, collapse=";"),
                   flank_geneIds=paste(geneId, collapse=";"),
                   flank_gene_distances=paste(distance, collapse=";"))
        res <- hitInfo2[,c("peakIdx", "flank_txIds", "flank_geneIds", "flank_gene_distances")]
        res$flank_txIds <- as.character(res$flank_txIds)
    } else {
        hitInfo2 <- group_by(hitInfo, peakIdx) %>%
            mutate(flank_geneIds=paste(geneId, collapse=";"),
                   flank_gene_distances=paste(distance, collapse=";"))
        res <- hitInfo2[,c("peakIdx", "flank_geneIds", "flank_gene_distances")]
    }

    res <- unique(res)
    res$flank_geneIds <- as.character(res$flank_geneIds)
    res$flank_gene_distances <- as.character(res$flank_gene_distances)
    
    return(res)    
}


================================================
FILE: R/getGenomicAnnotation.R
================================================
updateGenomicAnnotation <- function(peaks, genomicRegion, type, anno, sameStrand=FALSE) {
    hits <- getGenomicAnnotation.internal(peaks, genomicRegion, type, sameStrand=sameStrand)
    if (length(hits) > 1) {
        hitIndex <- hits$queryIndex
        anno[["annotation"]][hitIndex] <- hits$annotation
        anno[["detailGenomicAnnotation"]][hitIndex, type] <- TRUE
    }
    return(anno)
}


##' get Genomic Annotation of peaks
##'
##'
##' @title getGenomicAnnotation
##' @param peaks peaks in GRanges object
##' @param distance distance of peak to TSS
##' @param tssRegion tssRegion, default is -3kb to +3kb
##' @param TxDb TxDb object
##' @param level one of gene or transcript
##' @param genomicAnnotationPriority genomic Annotation Priority
##' @param sameStrand whether annotate gene in same strand
##' @importFrom GenomicFeatures threeUTRsByTranscript
##' @importFrom GenomicFeatures fiveUTRsByTranscript
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
##' @return character vector
##' @author G Yu
getGenomicAnnotation <- function(peaks,
                                 distance,
                                 tssRegion=c(-3000, 3000),
                                 TxDb,
                                 level,
                                 genomicAnnotationPriority,
                                 sameStrand = FALSE
                                 ) {

    ##
    ## since some annotation overlap,
    ## a priority is assign based on *genomicAnnotationPriority*
    ## use the following priority by default:
    ##
    ## 1. Promoter
    ## 2. 5' UTR
    ## 3. 3' UTR
    ## 4. Exon
    ## 5. Intron
    ## 6. Downstream
    ## 7. Intergenic
    ##


    .ChIPseekerEnv(TxDb, item = ChIPseekerCache)
    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)

    annotation <- rep(NA, length(distance))

    flag <- rep(FALSE, length(distance))
    detailGenomicAnnotation <- data.frame(
        genic=flag,
        Intergenic=flag,
        Promoter=flag,
        fiveUTR=flag,
        threeUTR=flag,
        Exon=flag,
        Intron=flag,
        downstream=flag,
        distal_intergenic=flag)

    anno <- list(annotation=annotation,
                 detailGenomicAnnotation=detailGenomicAnnotation)

    genomicAnnotationPriority <- rev(genomicAnnotationPriority)
    for (AP in genomicAnnotationPriority) {
        if (AP == "Intron") {
            ## Introns
            # intronList <- get_intronList(ChIPseekerEnv)
            intronList <- get_intronList(item = ChIPseekerCache)
            anno <- updateGenomicAnnotation(peaks, intronList, "Intron", anno, sameStrand=sameStrand)
        } else if (AP == "Exon") {
            ## Exons
            # exonList <- get_exonList(ChIPseekerEnv)
            exonList <- get_exonList(item = ChIPseekerCache)
            anno <- updateGenomicAnnotation(peaks, exonList, "Exon", anno, sameStrand=sameStrand)
        } else if (AP == "3UTR") {
            ## 3' UTR Exons
            threeUTRList <- get_cache_element(item = ChIPseekerCache, elements = "threeUTRList")

            if(is.null(threeUTRList)){
                threeUTRList <- threeUTRsByTranscript(TxDb)
                update_cache_item(item = ChIPseekerCache, list("threeUTRList" = threeUTRList))
            }

            # if ( exists("threeUTRList", envir=ChIPseekerEnv, inherits=FALSE) ) {
            #     threeUTRList <- get("threeUTRList", envir=ChIPseekerEnv)
            # } else {
            #     threeUTRList <- threeUTRsByTranscript(TxDb)
            #     assign("threeUTRList", threeUTRList, envir=ChIPseekerEnv)
            # }
            anno <- updateGenomicAnnotation(peaks, threeUTRList, "threeUTR", anno, sameStrand=sameStrand)
        } else if (AP == "5UTR") {
            ## 5' UTR Exons
            fiveUTRList <- get_cache_element(item = ChIPseekerCache, elements = "fiveUTRList")

            if(is.null(fiveUTRList)){
                fiveUTRList <- fiveUTRsByTranscript(TxDb)
                update_cache_item(item = ChIPseekerCache, list("fiveUTRList" = fiveUTRList))
            }

            # if ( exists("fiveUTRList", envir=ChIPseekerEnv, inherits=FALSE) ) {
            #     fiveUTRList <- get("fiveUTRList", envir=ChIPseekerEnv)
            # } else {
            #     fiveUTRList <- fiveUTRsByTranscript(TxDb)
            #     assign("fiveUTRList", fiveUTRList, envir=ChIPseekerEnv)
            # }
            anno <- updateGenomicAnnotation(peaks, fiveUTRList, "fiveUTR", anno, sameStrand=sameStrand)
        } else if (AP == "Promoter") {
            annotation <- anno[["annotation"]]
            ## detailGenomicAnnotation <- anno[["detailGenomicAnnotation"]]

            ## TSS
            tssIndex <- distance >= tssRegion[1] & distance <= tssRegion[2]
            annotation[tssIndex] <- "Promoter"
            anno$detailGenomicAnnotation[tssIndex, "Promoter"] <- TRUE

            pm <- max(abs(tssRegion))
            if (pm/1000 >= 2) {
                dd <- seq(1:ceiling(pm/1000))*1000
                for (i in 1:length(dd)) {
                    if (i == 1) {
                        lbs <- paste("Promoter", " (<=", dd[i]/1000, "kb)", sep="")
                        annotation[abs(distance) <= dd[i] &
                                   annotation == "Promoter"] <- lbs
                    } else {
                        lbs <- paste("Promoter", " (", dd[i-1]/1000, "-", dd[i]/1000, "kb)", sep="")
                        annotation[abs(distance) <= dd[i] &
                                   abs(distance) > dd[i-1] &
                                   annotation == "Promoter"] <- lbs
                    }
                }
            }
            anno[["annotation"]] <- annotation
        } else {
            ## Intergenic
            annotation[is.na(annotation)] <- "Intergenic"
            anno[["annotation"]] <- annotation
        }
    }

    annotation <- anno[["annotation"]]
    detailGenomicAnnotation <- anno[["detailGenomicAnnotation"]]
    genicIndex <- which(apply(detailGenomicAnnotation[, c("Exon", "Intron")], 1, any))
    detailGenomicAnnotation[-genicIndex, "Intergenic"] <- TRUE
    detailGenomicAnnotation[genicIndex, "genic"] <- TRUE

    ## intergenicIndex <- anno[["annotation"]] == "Intergenic"
    ## anno[["detailGenomicAnnotation"]][intergenicIndex, "Intergenic"] <- TRUE
    ## anno[["detailGenomicAnnotation"]][!intergenicIndex, "genic"] <- TRUE


    features <- getGene(TxDb, by=level)

    ## nearest from gene end
    if (sameStrand) {
        idx <- follow(peaks, features)
    } else {
        idx <- follow(peaks, unstrand(features))
    }
    
    na.idx <- which(is.na(idx))
    if (length(na.idx)) {
        idx <- idx[-na.idx]
        peaks <- peaks[-na.idx]
    }
    
    peF <- features[idx]
    dd <- ifelse(strand(peF) == "+",
		 start(peaks) - end(peF),
		 end(peaks) - start(peF))
    
    if (length(na.idx)) {
        dd2 <- numeric(length(idx) + length(na.idx))
        dd2[-na.idx] <- dd
    } else {
        dd2 <- dd
    }

    dsd <- getOption("ChIPseeker.downstreamDistance")
    if (is.null(dsd))
	    dsd <- 3000 ## downstream 3k by default

    ## downstream within dsd
    if(dsd/1000<=1){
        j <- which(annotation == "Intergenic" & abs(dd2) <= dsd & dd2 != 0)
        if(length(j)>0){
            lbs <- paste("Downstream (<=", dsd, "bp)", sep="")
            annotation[j] <- lbs
        }
    }else{
        
        ## downstream within 0-dsd/1000 kb
        for(i in 1:(dsd/1000)){
            j <- which(annotation == "Intergenic" & abs(dd2) <= i*1000 & dd2 != 0)
            if (length(j) > 0){
                if (i == 1){
                    lbs <- "Downstream (<1kb)"
                }else{
                    lbs <- paste("Downstream (", i-1, "-", i, "kb)", sep="")
                }
		annotation[j] <- lbs
            }
        }
        
        ## downstream (dsd/1000) kb - dsd bp
        z <- which(annotation == "Intergenic" & abs(dd2) <= dsd & dd2 != 0)
        if(length(z)>0){
            lbs <- paste("Downstream (",dsd/1000,"kb-", dsd, "bp)", sep="")
            annotation[z] <- lbs
        }
    }
    annotation[which(annotation == "Intergenic")] = "Distal Intergenic"

    downstreamIndex <- dd2 > 0 & dd2 < dsd
    detailGenomicAnnotation[downstreamIndex, "downstream"] <- TRUE
    detailGenomicAnnotation[which(annotation == "Distal Intergenic"), "distal_intergenic"] <- TRUE
    return(list(annotation=annotation, detailGenomicAnnotation=detailGenomicAnnotation))
}


##' @import BiocGenerics S4Vectors IRanges
getGenomicAnnotation.internal <- function(peaks, genomicRegion, type, sameStrand=FALSE){
    GRegion <- unlist(genomicRegion)
    GRegionLen <- elementNROWS(genomicRegion)

    names(GRegionLen) <- names(genomicRegion)
    GRegion$gene_id <- rep(names(genomicRegion), times=GRegionLen)


    if (type == "Intron") {
        gr2 <- GRegion[!duplicated(GRegion$gene_id)]
        strd <- as.character(strand(gr2))
        len <- GRegionLen[GRegionLen != 0]

        GRegion$intron_rank <- lapply(seq_along(strd), function(i) {
            rank <- seq(1, len[i])
            if (strd[i] == '-')
                rank <- rev(rank)
            return(rank)
        }) %>% unlist
    }

    if (type == "Intron" || type =="Exon") {
        nn <- TXID2EG(names(genomicRegion))
        names(GRegionLen) <- nn
        GRegion$gene_id <- rep(nn, times=GRegionLen)
    }

    ## find overlap
    if (sameStrand) {
        GRegionHit <- findOverlaps(peaks, GRegion)
    } else {
        GRegionHit <- findOverlaps(peaks, unstrand(GRegion))
    }

    if (length(GRegionHit) == 0) {
        return(NA)
    }
    qh <- queryHits(GRegionHit)
    hit.idx <- getFirstHitIndex(qh)
    GRegionHit <- GRegionHit[hit.idx]
    queryIndex <- queryHits(GRegionHit)
    subjectIndex <- subjectHits(GRegionHit)

    hits <- GRegion[subjectIndex]
    geneID <- hits$gene_id

    if (type == "Intron") {
        anno <- paste(type, " (", geneID, ", intron ", hits$intron_rank,
                      " of ", GRegionLen[geneID], ")", sep="")
    } else if (type == "Exon") {
        anno <- paste(type, " (", geneID, ", exon ", hits$exon_rank,
                      " of ", GRegionLen[geneID], ")", sep="")
    } else if (type == "fiveUTR") {
        anno <- "5' UTR"
    } else if (type == "threeUTR") {
        anno <- "3' UTR"
    } else {
        anno <- type
    }
    res <- list(queryIndex=queryIndex, annotation=anno, gene=geneID)
    return(res)
}


================================================
FILE: R/getNearestFeatureIndicesAndDistances.R
================================================
##' get index of features that closest to peak and calculate distance
##'
##'
##' @title getNearestFeatureIndicesAndDistances
##' @param peaks peak in GRanges
##' @param features features in GRanges
##' @param sameStrand logical, whether find nearest gene in the same strand
##' @param ignoreOverlap logical, whether ignore overlap of TSS with peak
##' @param ignoreUpstream logical, if True only annotate gene at the 3' of the peak.
##' @param ignoreDownstream logical, if True only annotate gene at the 5' of the peak.
##' @param overlap one of "TSS" or "all"
##' @return list
##' @import BiocGenerics IRanges GenomicRanges
##' @author G Yu
getNearestFeatureIndicesAndDistances <- function(peaks, features,
                                                 sameStrand = FALSE,
                                                 ignoreOverlap=FALSE,
                                                 ignoreUpstream=FALSE,
                                                 ignoreDownstream=FALSE,
                                                 overlap = "TSS") {

    overlap <- match.arg(overlap, c("TSS", "all"))

    if (!ignoreOverlap && overlap == "all") {
        overlap_hit <- findOverlaps(peaks, unstrand(features))
    }

    ## peaks only conatin all peak records, in GRanges object
    ## feature is the annotation in GRanges object

    ## only keep start position based on strand
    ## start(features) <- end(features) <- ifelse(strand(features) == "+", start(features), end(features))
    features <- resize(features, width=1) # faster

    ## add dummy NA feature for peaks that are at the last or first feature
    ## suggested by Michael Kluge
    features.bak <- features
    seqlevels(features) <- c(seqlevels(features), "chrNA")
    dummy <- GRanges("chrNA", IRanges(1,1))

    ## dummy$tx_id <- -1
    ## dummy$tx_name <- "NA"

    cns <- names(mcols(features))
    for (cn in cns) {
        if (grepl('id', cn)) {
            mcols(dummy)[[cn]] <- -1
        } else {
            mcols(dummy)[[cn]] <- NA
        }
    }

    features <- append(features, dummy)
    dummyID <- length(features)

    if (sameStrand) {
        ## nearest from peak start
        ps.idx <- follow(peaks, features)

        ## nearest from peak end
        pe.idx <- precede(peaks, features)
    } else {
        ps.idx <- follow(peaks, unstrand(features))
        pe.idx <- precede(peaks, unstrand(features))
    }

    na.idx <- is.na(ps.idx) & is.na(pe.idx)
    if (sum(na.idx) > 0) { ## suggested by Thomas Schwarzl
        ps.idx <- ps.idx[!na.idx]
        pe.idx <- pe.idx[!na.idx]
        ##peaks <- peaks[!na.idx]
    }

    # set NA values to dummy value if only one entry is affected
    ps.idx[is.na(ps.idx)] <- dummyID
    pe.idx[is.na(pe.idx)] <- dummyID

    ## features from nearest peak start
    psF <- features[ps.idx]

    ## feature distances from peak start
    psD <- ifelse(strand(psF) == "+", 1, -1) *
        (start(peaks[!na.idx]) - start(psF))
    psD[ps.idx == dummyID] <- Inf # ensure that there is even no match if a seq with name "chrNA" exists

    ## features from nearest peak end
    peF <- features[pe.idx]
    ## feature distances from peak end
    peD <- ifelse(strand(peF) == "+", 1, -1) *
        (end(peaks[!na.idx]) - start(peF))
    peD[pe.idx == dummyID] <- Inf # ensure that there is even no match if a seq with name "chrNA" exists

    ## restore the old feature object
    features <- features.bak

    pse <- data.frame(ps=psD, pe=peD)
    if (ignoreUpstream) {
        j <- rep(2, nrow(pse))
    } else if (ignoreDownstream) {
        j <- rep(1, nrow(pse))
    } else {
        j <- apply(pse, 1, function(i) which.min(abs(i)))
    }

    ## index
    idx <- ps.idx
    idx[j==2] <- pe.idx[j==2]

    ## distance
    dd <- psD
    dd[j==2] <- peD[j==2]

    index <- distanceToTSS <- rep(NA, length(peaks))
    distanceToTSS[!na.idx] <- dd
    index[!na.idx] <- idx

    if (!ignoreOverlap) {
        ## hit <- findOverlaps(peaks, unstrand(features))

        if (overlap == "all") {
            hit <- overlap_hit
            if ( length(hit) != 0 ) {
                qh <- queryHits(hit)
                hit.idx <- getFirstHitIndex(qh)
                hit <- hit[hit.idx]
                peakIdx <- queryHits(hit)
                featureIdx <- subjectHits(hit)

                index[peakIdx] <- featureIdx
                distance_both_end <- data.frame(start=start(peaks[peakIdx]) - start(features[featureIdx]),
                                          end = end(peaks[peakIdx]) - start(features[featureIdx]))
                distance_idx <- apply(distance_both_end, 1, function(i) which.min(abs(i)))
                distance_minimal <- distance_both_end[,1]
                distance_minimal[distance_idx == 2] <- distance_both_end[distance_idx==2, 2]

                distanceToTSS[peakIdx] <- distance_minimal * ifelse(strand(features[featureIdx]) == "+", 1, -1)

            }
        }

        hit <- findOverlaps(peaks, unstrand(features))

        if ( length(hit) != 0 ) {
            qh <- queryHits(hit)
            hit.idx <- getFirstHitIndex(qh)
            hit <- hit[hit.idx]
            peakIdx <- queryHits(hit)
            featureIdx <- subjectHits(hit)

            index[peakIdx] <- featureIdx
            distanceToTSS[peakIdx] <- 0
        }

    }

    j <- is.na(distanceToTSS) | is.na(index)

    res <- list(index=index[!j],
                distance=distanceToTSS[!j],
                peak=peaks[!j])

    return(res)
}

isPeakFeatureOverlap <- function(peak, feature) {
    peakRange <- ranges(peak)
    featureRange <- ranges(feature)
    x <- intersect(peakRange, featureRange)
    return(length(x) != 0)
}


================================================
FILE: R/plotAnno.R
================================================
##' plot feature distribution based on their chromosome region
##'
##' plot chromosome region features
##' @title plotAnnoBar.data.frame
##' @rdname plotAnnoBar
##' @param anno.df annotation stats
##' @param xlab xlab
##' @param ylab ylab
##' @param title plot title
##' @param categoryColumn category column
##' @return bar plot that summarize genomic features of peaks
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 aes_string
##' @importFrom ggplot2 geom_bar
##' @importFrom ggplot2 coord_flip
##' @importFrom ggplot2 theme_bw
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 scale_fill_manual
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 ggtitle
##' @importFrom ggplot2 guide_legend
##' @seealso \code{\link{annotatePeak}} \code{\link{plotAnnoPie}}
##' @author Guangchuang Yu \url{https://yulab-smu.top}
plotAnnoBar.data.frame <- function(anno.df,
                                   xlab="",
                                   ylab="Percentage(%)",
                                   title="Feature Distribution",
                                   categoryColumn) {


    anno.df$Feature <- factor(anno.df$Feature, levels = rev(levels(anno.df$Feature)))

    p <- ggplot(anno.df, aes_string(x = categoryColumn,
                                    fill = "Feature",
                                    y = "Frequency"))

    p <- p + geom_bar(stat="identity") + coord_flip() + theme_bw()
    p <- p + ylab(ylab) + xlab(xlab) + ggtitle(title)

    if (categoryColumn == 1) {
        p <- p + scale_x_continuous(breaks=NULL)
        p <- p+scale_fill_manual(values=rev(getCols(nrow(anno.df))), guide=guide_legend(reverse=TRUE))
    } else {
        p <- p+scale_fill_manual(values=rev(getCols(length(unique(anno.df$Feature)))), guide=guide_legend(reverse=TRUE))
    }

    return(p)
}

##' pieplot from peak genomic annotation
##'
##'
##' @title plotAnnoPie
##' @rdname plotAnnoPie
##' @param x csAnno object
##' @param ndigit number of digit to round
##' @param cex label cex
##' @param col color
##' @param legend.position topright or other.
##' @param pie3D plot in 3D or not
##' @param radius radius of Pie
##' @param ... extra parameter
##' @return pie plot of peak genomic feature annotation
##' @examples
##' \dontrun{
##' require(TxDb.Hsapiens.UCSC.hg19.knownGene)
##' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
##' peakfile <- system.file("extdata", "sample_peaks.txt", package="chipseeker")
##' peakAnno <- annotatePeak(peakfile, TxDb=txdb)
##' plotAnnoPie(peakAnno)
##' }
##' @seealso \code{\link{annotatePeak}} \code{\link{plotAnnoBar}}
##' @export
##' @author Guangchuang Yu \url{https://yulab-smu.top}
plotAnnoPie.csAnno <- function(x,
                        ndigit=2,
                        cex=0.8,
                        col=NA,
                        legend.position="rightside",
                        pie3D=FALSE,
                        radius=0.8,
                        ...){

    anno.df <- getAnnoStat(x)
    if (is.na(col[1])) {
        col <- getCols(nrow(anno.df))
    }

    if (pie3D)
        annoPie3D(anno.df, ndigit=ndigit, cex=cex, col=col, ...)

    annoPie(anno.df, ndigit=ndigit, cex=cex, col=col, legend.position=legend.position, radius=radius, ...)
 }

##' @importFrom grDevices colorRampPalette
##' @importFrom graphics par
##' @importFrom graphics layout
##' @importFrom graphics pie
##' @importFrom graphics legend
##' @importFrom graphics plot.new
annoPie <- function(anno.df, ndigit=2, cex=0.8, col=NA, legend.position, radius=0.8, ...) {
    if ( ! all(c("Feature", "Frequency") %in% colnames(anno.df))) {
        stop("check your input...")
    }

    if (legend.position == "rightside") {
        labels=paste(anno.df$Feature, " (",
            round(anno.df$Frequency/sum(anno.df$Frequency)*100, ndigit),
            "%)", sep="")

        par(mai = c(0,0,0,0))
        layout(matrix(c(1,2), ncol=2), widths=c(0.6,0.4))
        pie(anno.df$Frequency, labels=NA, cex=cex, col=col, ...)
        plot.new()
        legend("center", legend = labels, fill = col, bty = "n", cex = cex)
    } else {
        par(mai = c(0,0,0,0))
        pie(anno.df$Frequency,
            ##     ## labels=paste(round(anno.df$Frequency/sum(anno.df$Frequency)*100, 2), "%", sep=""),
            labels=paste(anno.df$Feature, " (",
                round(anno.df$Frequency/sum(anno.df$Frequency)*100, ndigit),
                "%)", sep=""),
            cex=cex,
            col=col,
            radius=radius,
            ...
            )
    }
}

## @param ndigit ndigit
## @param radius the radius of the pie
## @param explode the amount to "explode" the pie
## @param labelcex label font size
## @importFrom plotrix pie3D
annoPie3D <- function(anno.df,
                      ndigit=2,
                      cex=1,
                      ...){

    ## anno.df <- getGenomicAnnoStat(peakAnno)

    pkg <- "plotrix"
    require(pkg, character.only=TRUE)
    pie3D <- eval(parse(text="pie3D"))
    pie3D(anno.df$Frequency,
          labels=paste(
              anno.df$Feature,
              "(",
              paste(round(anno.df$Frequency, ndigit), "%", sep=""),
              ")",
              sep=""),
          labelcex=cex,
          col=col,
          ...)
}

getGenomicAnnoStat <- function(peakAnno) {
    if(inherits(peakAnno,"GRanges"))
        peakAnno <- as.data.frame(peakAnno)
    anno <- peakAnno$annotation
    ## anno <- sub(" \\(.+", "", anno)

    e1 <- getOption("ChIPseeker.ignore_1st_exon")
    i1 <- getOption("ChIPseeker.ignore_1st_intron")
    ids <- getOption("ChIPseeker.ignore_downstream")

    if (is.null(e1) || !e1) {
        e1lab <- "1st Exon"
        anno[grep("exon 1 of", anno)] <- e1lab
        exonlab <- "Other Exon"
    } else {
        e1lab <- NULL
        exonlab <- "Exon"
    }

    if (is.null(i1) || !i1) {
        i1lab <- "1st Intron"
        anno[grep("intron 1 of", anno)] <- i1lab
        intronlab <- "Other Intron"
    } else {
        i1lab <- NULL
        intronlab <- "Intron"
    }

    anno[grep("Exon \\(", anno)] <- exonlab
    anno[grep("Intron \\(", anno)] <- intronlab

    if (is.null(ids) || !ids) {
        dsd <- getOption("ChIPseeker.downstreamDistance")
        if (is.null(dsd))
            dsd <- 3000 ## downstream 3k by default
        if (dsd > 1000) {
            dsd <- round(dsd/1000, 1)
            dsd <- paste0(dsd, "kb")
        }
        dslab <- paste0("Downstream (<=", dsd, ")")

        anno[grep("Downstream", anno)] <- dslab
        iglab <- "Distal Intergenic"
    } else {
        dslab <- NULL
        iglab <- "Intergenic"
        anno[grep("Downstream", anno)] <- iglab
    }
    anno[grep("^Distal", anno)] <- iglab

    lvs <- c(
        "5' UTR",
        "3' UTR",
        e1lab,
        exonlab,
        i1lab,
        intronlab,
        dslab,
        iglab
    )

    promoter <- unique(anno[grep("Promoter", anno)])
    ip <- getOption("ChIPseeker.ignore_promoter_subcategory")
    if ((is.null(ip) || !ip) && (length(promoter) > 0)) {
        plab <- sort(as.character(promoter))
    } else {
        plab <- "Promoter"
        anno[grep("^Promoter", anno)] <- plab
    }
    lvs <- c(plab, lvs)

    ## count frequency
    anno.table <- table(anno)

    ## calculate ratio
    anno.ratio <- anno.table/ sum(anno.table) * 100
    anno.df <- as.data.frame(anno.ratio)
    colnames(anno.df) <- c("Feature", "Frequency")

    anno.df$Feature <- factor(anno.df$Feature, levels=lvs[lvs %in% anno.df$Feature])
    anno.df <- anno.df[order(anno.df$Feature),]
    return(anno.df)
}


================================================
FILE: R/plotDistToTSS.R
================================================
merge_two_si = function(x1, x2){
  if (length(unique(gsub("^[0-9]+","",c(x1, x2)))) == 1){
    return(paste0(gsub("[^0-9]*$","",x1), "-", x2))
  } else {
    return(paste0(x1, "-", x2))
  }
}

generate_break_lbs = function(breaks) {
  lbs = c()
  
  # break labels
  break_labels = scales::label_number(scale_cut = scales::cut_si(unit = "b"))(breaks)
  break_labels = gsub(" b$"," bp", break_labels)

  # category labels
  for (i in 2:length(breaks)) {
    if (i == length(breaks)) {
      lbs = c(lbs, paste0(">", break_labels[i-1]))
    } else {
      lbs = c(lbs, merge_two_si(break_labels[i-1], break_labels[i]))
    }
  }
  
  return(lbs)
}

generate_colors = function(palette = NULL, n) {
  # old color in version <= 1.41.1
  old_color = c("#9ecae1", "#3182bd", "#C7A76C", "#86B875", "#39BEB1", "#CD99D8")
  if (is.null(palette)){
    brewer_cols = old_color
  } else if (length(palette) == 1 && is_valid_palette(palette)){
    brewer_cols = RColorBrewer::brewer.pal(
      name = palette, 
      n = RColorBrewer::brewer.pal.info[palette, "maxcolors"]
    ) |> rev()     
  } else if (all(is_valid_color(palette))){
    brewer_cols = palette
  }
  else {
    warning("Your palette is non-valid, switching to default...")
    brewer_cols = old_color
  }
  
  if (length(brewer_cols) >= n) {
    cols = brewer_cols[1:length(brewer_cols)]
  } else {
    cols = grDevices::colorRampPalette(brewer_cols)(n)
  }
  
  return(cols)
}

is_valid_palette = function(palette){
  palette %in% rownames(RColorBrewer::brewer.pal.info)
}

is_valid_color = function(color){
  tryCatch({
    grDevices::col2rgb(color)
    TRUE
  }, error = function(e) {
    FALSE
  })
}

##' plot feature distribution based on the distances to the TSS
##'
##'
##' @title plotDistToTSS.data.frame
##' @param peakDist peak annotation
##' @param distanceColumn column name of the distance from peak to nearest gene
##' @param distanceBreaks default is 'c(0, 1000, 3000, 5000, 10000, 100000)'
##' @param palette palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.
##' @param xlab x label
##' @param ylab y lable
##' @param title figure title
##' @param categoryColumn category column, default is ".id"
##' @return bar plot that summarize distance from peak to
##' TSS of the nearest gene.
##' @importFrom magrittr %<>%
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 aes
##' @importFrom ggplot2 aes_string
##' @importFrom ggplot2 geom_bar
##' @importFrom ggplot2 geom_hline
##' @importFrom ggplot2 theme_bw
##' @importFrom ggplot2 coord_flip
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 ggtitle
##' @importFrom ggplot2 geom_hline
##' @importFrom ggplot2 scale_y_continuous
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 scale_fill_brewer
##' @importFrom ggplot2 scale_fill_hue
##' @importFrom ggplot2 scale_fill_manual
##' @importFrom ggplot2 geom_text
##' @importFrom rlang .data
##' @examples
##' \dontrun{
##' require(TxDb.Hsapiens.UCSC.hg19.knownGene)
##' txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
##' peakfile <- system.file("extdata", "sample_peaks.txt", package="ChIPseeker")
##' peakAnno <- annotatePeak(peakfile, TxDb=txdb)
##' plotDistToTSS(peakAnno)
##' }
##' @seealso \code{\link{annotatePeak}}
##' @author Guangchuang Yu \url{https://guangchuangyu.github.io}
plotDistToTSS.data.frame <- function(peakDist,
                                     distanceColumn="distanceToTSS",
                                     distanceBreaks=c(0, 1000, 3000, 5000, 10000, 100000),
                                     palette = NULL,
                                     xlab="",
                                     ylab="Binding sites (%) (5'->3')",
                                     title="Distribution of transcription factor-binding loci relative to TSS",
                                     categoryColumn = ".id") {

    distanceBreaks = sort(distanceBreaks)
    hasZero = sum(distanceBreaks == 0)
    if (!hasZero) distanceBreaks = c(0, distanceBreaks)
    hasInf = sum(is.infinite(distanceBreaks))
    if (!hasInf) distanceBreaks = c(distanceBreaks, Inf)
    lbs = generate_break_lbs(distanceBreaks)
    peakDist$Feature = cut(abs(peakDist[[distanceColumn]]), 
                           breaks = distanceBreaks,
                           labels = lbs,
                           include.lowest = TRUE)

    ## sign containing -1 and 1 for upstream and downstream
    peakDist$sign <- sign(peakDist[,distanceColumn])

    ## count frequencies
    if (categoryColumn == 1) {
      peakDist = peakDist |> 
        summarise(freq = length(.data$Feature), .by = c("Feature", "sign")) |> 
        mutate(freq = .data$freq/sum(.data$freq) * 100)
    } else {
      peakDist = peakDist |> 
        summarise(freq = length(.data$Feature), .by = c(categoryColumn, "Feature", "sign")) |> 
        mutate(freq = .data$freq/sum(.data$freq) * 100, .by = categoryColumn)
    }

    if (any(peakDist$sign == 0)) {
        zeroDist <- peakDist[peakDist$sign == 0,]
        zeroDist$freq <- zeroDist$freq/2
        zeroDist$sign <- -1
        peakDist[peakDist$sign == 0,] <- zeroDist
        zeroDist$sign <- 1
        peakDist <- rbind(peakDist, zeroDist)
    }

    if (categoryColumn == 1) {
        peakDist %<>% group_by(.data$Feature, .data$sign) %>%
            summarise(freq = sum(.data$freq))

        totalFreq <- peakDist %>% group_by(.data$sign) %>%
            summarise(total = sum(.data$freq))
    } else {
        peakDist %<>% group_by(.data$.id, .data$Feature, .data$sign) %>%
            summarise(freq = sum(.data$freq))
        totalFreq <- peakDist %>% group_by(.data$.id, .data$sign) %>%
            summarise(total = sum(.data$freq))
    }


    ## preparing ylim and y tick labels
    ds = max(totalFreq$total[totalFreq$sign == 1])
    dslim = ceiling(ds/10) * 10
    us = max(totalFreq$total[totalFreq$sign == -1])
    uslim = ceiling(us/10) * 10
    ybreaks <- seq(-uslim, dslim, by=10)
    ylbs <- abs(ybreaks)
    ylbs[ylbs == 0] <- "TSS"

    peakDist$Feature <- factor(peakDist$Feature, levels=rev(levels(peakDist$Feature)))
    if (categoryColumn == 1) {
        p <- ggplot(peakDist, aes(x=1, fill=.data$Feature))
    } else {
        p <- ggplot(peakDist, aes(x=.data[[categoryColumn]], fill=.data$Feature))
    }

    p <- p + geom_bar(data=subset(peakDist, sign==1), aes(y=.data$freq), stat="identity") +
        geom_bar(data=subset(peakDist, sign==-1), aes(y=-.data$freq), stat="identity")

    p <- p + geom_hline(yintercept = 0, colour = "black") +
        coord_flip() + theme_bw() +
            scale_y_continuous(breaks=ybreaks,labels=ylbs)

    p <- p + ylab(ylab) + xlab(xlab) + ggtitle(title)

    if (categoryColumn == 1) {
        p <- p + scale_x_continuous(breaks=NULL)
    }

    cols <- generate_colors(palette = palette, n = length(lbs))
    p <- p + scale_fill_manual(values=rev(cols), guide=guide_legend(reverse=TRUE))

    return(p)
}


================================================
FILE: R/plotTagMatrix.R
================================================
##' plot the profile of peaks
##'`
##' \code{plotPeakProf_MultiWindows()} is almost the same as \code{plotPeakProf2()}, having
##' the main difference of accepting two or more granges objects. Accepting more
##' granges objects can help compare the same peaks in different windows.
##' 
##' \code{TxDb} parameter can accept txdb object.
##' But many regions can not be obtained by txdb object. In this case,
##' Users can provide self-made granges served the same role 
##' as txdb object and pass to \code{TxDb} object.
##' 
##' \code{by} the features of interest. 
##' 
##' (1) if users use \code{txdb}, \code{by} can be one of 'gene', 'transcript', 'exon', 
##' 'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.
##' 
##' (2) if users use self-made granges object, \code{by} can be everything. Because this \code{by}
##' will not pass to functions to get features, which is different from the case of using 
##' txdb object. This \code{by} is only used to made labels showed in picture.
##' 
##' \code{type} means the property of the region. one of the "start site",
##' "end site" and "body".
##' 
##' \code{upstream} and \code{downstream} parameter have different usages:
##' 
##' (1) if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
##' the flank of body region.
##' 
##' (2) if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
##' the upstream and downstream of the start_site or the end_site.
##' 
##' \code{weightCol} refers to column in peak file. This column acts as a weight value. Details
##' see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}
##' 
##' \code{nbin} refers to the number of bins. \code{getTagMatrix()} provide a binning method
##' to get the tag matrix.
##' 
##' There are two ways input a list of window.
##' 
##' (1) Users can input a list of self-made granges objects
##' 
##' (2) Users can input a list of \code{by} and only one \code{type}. In this way, 
##' \code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \code{by} and \code{type}.
##' 
##' Warning: 
##' 
##' (1) All of these window should be the same type. It means users can only
##' compare a list of "start site"/"end site"/"body region" with the same upstream
##' and downstream.
##' 
##' (2) So it will be only one \code{type} and several \code{by}.
##' 
##' (3) Users can make window by txdb object or self-made granges object. Users can only
##' choose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the
##' way of using txdb object. User can input any \code{by} in the way of using 
##' self-made granges object.
##' 
##' (4) Users can mingle the \code{by} designed for the two ways. \code{plotPeakProf_MultiWindows} can
##' accpet the hybrid \code{by}. But the above rules should be followed.
##' 
##' \url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}
##'
##' @title plotPeakProf_MultiWindows
##' 
##' @param tagMatrix tagMatrix or a list of tagMatrix
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object or self-made granges objects
##' @param upstream upstream position
##' @param downstream downstream position
##' @param by feature of interest
##' @param type one of "start_site", "end_site", "body"
##' @param windows_name the name for each window, which will also be showed in the picture as labels
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param verbose print message or not
##' @param nbin the amount of bines 
##' @param ignore_strand ignore the strand information or not
##' @param ... additional parameter
##' @return ggplot object
##' @importFrom methods is
##' @importFrom methods as
##' @importFrom methods missingArg
##' @importFrom methods new
##' @export
plotPeakProf <- function(tagMatrix = NULL,
                         peak,
                         upstream,
                         downstream,
                         conf,
                         by,
                         type,
                         windows_name = NULL,
                         weightCol = NULL,
                         TxDb = NULL,
                         xlab = "Genomic Region (5'->3')",
                         ylab = "Peak Count Frequency",
                         facet = "row",
                         free_y = TRUE,
                         verbose = TRUE,
                         nbin = NULL,
                         ignore_strand = FALSE,
                         ...){
  
  if(is.null(tagMatrix)){
    
    conf <- if(missingArg(conf)) NA else conf
    upstream <- if(missingArg(upstream)) NULL else upstream
    downstream <- if(missingArg(downstream)) NULL else downstream
    
    if(length(by) == 1){
      
      plotPeakProf2(peak = peak, 
                    upstream = upstream, 
                    downstream = downstream,
                    conf = conf,
                    by = by,
                    type = type,
                    weightCol = weightCol, 
                    TxDb = TxDb,
                    xlab = xlab,
                    ylab = ylab,
                    facet = facet,
                    free_y = free_y,
                    verbose = verbose, 
                    nbin = nbin,
                    ignore_strand = ignore_strand,
                    ...)
      
    }else{
      
      if(is.null(windows_name) && !is.null(names(TxDb)))
        windows_name <- names(TxDb)
      
      plotPeakProf_MultiWindows(peak = peak,
                                upstream = upstream,
                                downstream = downstream,
                                conf = conf,
                                by = by,
                                type = type,
                                windows_name = windows_name,
                                weightCol = weightCol,
                                TxDb = TxDb,
                                xlab = xlab,
                                ylab = ylab,
                                facet = facet,
                                free_y = free_y,
                                verbose = verbose,
                                nbin = nbin,
                                ignore_strand = ignore_strand,
                                ...)
      
    }
    
  }else{
    
    if(is(tagMatrix, "list")){
      upstream <- attr(tagMatrix[[1]], 'upstream')
      downstream <- attr(tagMatrix[[1]], 'downstream')
      label <- attr(tagMatrix[[1]], 'label')
      attr(tagMatrix, 'type') <- attr(tagMatrix[[1]], 'type')
      attr(tagMatrix, 'is.binning') <- attr(tagMatrix[[1]], 'is.binning')
      
    }else{
      upstream <- attr(tagMatrix, 'upstream')
      downstream <- attr(tagMatrix, 'downstream')
      label <- attr(tagMatrix, 'label')
    }
    
    
    if(attr(tagMatrix, 'is.binning')){
      
      if (!(missingArg(conf) || is.na(conf))){
        
        plotAvgProf.binning(tagMatrix = tagMatrix, 
                            xlab = xlab,
                            ylab = ylab,
                            conf = conf,
                            facet = facet, 
                            free_y = free_y,
                            upstream = upstream,
                            downstream = downstream,
                            label = label,
                            ...)
        
      }else{
        
        plotAvgProf.binning(tagMatrix = tagMatrix, 
                            xlab = xlab,
                            ylab = ylab,
                            facet = facet, 
                            free_y = free_y,
                            upstream = upstream,
                            downstream = downstream,
                            label = label,
                            ...)
        
      }
      
      
    }else{
      
      xlim <- c(-upstream, downstream)
      
      if (!(missingArg(conf) || is.na(conf))){
        
        plotAvgProf (tagMatrix = tagMatrix, 
                     xlim = xlim,
                     xlab = xlab,
                     ylab = ylab,
                     conf = conf,
                     facet = facet, 
                     free_y = free_y,
                     origin_label = label,
                     ...)
        
      }else{
        
        plotAvgProf (tagMatrix = tagMatrix, 
                     xlim = xlim,
                     xlab = xlab,
                     ylab = ylab,
                     facet = facet, 
                     free_y = free_y,
                     origin_label = label,
                     ...)
        
      }
      
      
    }
  }
  
}


##' plot the profile of peaks
##'
##'
##' @title plotAvgProf
##' @param tagMatrix tagMatrix or a list of tagMatrix
##' @param xlim xlim
##' @param xlab x label
##' @param ylab y label
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param origin_label label of the center
##' @param verbose print message or not
##' @param ... additional parameter
##' @return ggplot object
##' @author G Yu; Y Yan
##' @export
plotAvgProf <- function(tagMatrix, xlim,
                        xlab="Genomic Region (5'->3')",
                        ylab = "Peak Count Frequency",
                        conf,
                        facet="none", 
                        free_y = TRUE, 
                        origin_label = "TSS",
                        verbose = TRUE,
                        ...) {
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)

  if (verbose) {
      cat(">> plotting figure...\t\t\t",
          format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  
  conf <- if(missingArg(conf)) NA else conf
  
  if (!(missingArg(conf) || is.na(conf))){
    p <- plotAvgProf.internal(tagMatrix = tagMatrix, 
                              conf = conf, 
                              xlim = xlim,
                              xlab = xlab, 
                              ylab = ylab,
                              facet = facet, 
                              free_y = free_y, 
                              origin_label = origin_label,
                              ...)
  } else {
    p <- plotAvgProf.internal(tagMatrix, 
                              xlim = xlim,
                              xlab = xlab, 
                              ylab = ylab,
                              facet = facet, 
                              free_y = free_y, 
                              origin_label = origin_label,
                              ...)
  }
  return(p)
}


##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 geom_line
##' @importFrom ggplot2 geom_vline
##' @importFrom ggplot2 geom_ribbon
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 scale_color_manual
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 theme_bw
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 facet_grid
plotAvgProf.internal <- function(tagMatrix, conf,
                                 xlim = c(-3000,3000),
                                 xlab = "Genomic Region (5'->3')",
                                 ylab = "Peak Count Frequency",
                                 facet="none", 
                                 free_y = TRUE,
                                 origin_label, 
                                 ...) {
  
  listFlag <- FALSE
  if (is(tagMatrix, "list")) {
    if ( is.null(names(tagMatrix)) ) {
      nn <- paste0("peak", seq_along(tagMatrix))
      warning("input is not a named list, set the name automatically to ", paste(nn, collapse=' '))
      names(tagMatrix) <- nn
      ## stop("tagMatrix should be a named list...")
    }
    listFlag <- TRUE
  }
  
  if ( listFlag ) {
    facet <- match.arg(facet, c("none", "row", "column"))
    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix[[1]]) ) {
      stop("please specify appropreate xcoordinations...")
    }
  } else {
    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix) ) {
      stop("please specify appropreate xcoordinations...")
    }
  }
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  ##
  conf <- if(missingArg(conf)) NA else conf
  
  pos <- value <- .id <- Lower <- Upper <- NULL
  
  if ( listFlag ) {
    tagCount <- lapply(tagMatrix, function(x) getTagCount(x, xlim = xlim, conf = conf, ...))
    tagCount <- list_to_dataframe(tagCount)
    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix))
    p <- ggplot(tagCount, aes(pos, group=.id, color=.id))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = .id),
                           linetype = 0, alpha = 0.2)
    }
  } else {
    tagCount <- getTagCount(tagMatrix, xlim = xlim, conf = conf, ...)
    p <- ggplot(tagCount, aes(pos))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper),
                           linetype = 0, alpha = 0.2)
    }
  }
  
  p <- p + geom_line(aes(y = value))
  
  if ( 0 > xlim[1] && 0 < xlim[2] ) {
    p <- p + geom_vline(xintercept=0,
                        linetype="longdash")
    p <- p + scale_x_continuous(breaks=c(xlim[1], floor(xlim[1]/2),
                                         0,
                                         floor(xlim[2]/2), xlim[2]),
                                labels=c(paste0(xlim[1],"bp"), paste0(floor(xlim[1]/2),"bp"),
                                         origin_label, 
                                         paste0(floor(xlim[2]/2),"bp"), paste0(xlim[2], "bp")))
  }
  
  if (listFlag) {
    cols <- getCols(length(tagMatrix))
    p <- p + scale_color_manual(values=cols)
    if (facet == "row") {
      if (free_y) {
        p <- p + facet_grid(.id ~ ., scales = "free_y")
      } else {
        p <- p + facet_grid(.id ~ .)
      }
    } else if (facet == "column") {
      if (free_y) {
        p <-  p + facet_grid(. ~ .id, scales = "free_y")
      } else {
        p <-  p + facet_grid(. ~ .id)
      }
    }
  }
  p <- p+xlab(xlab)+ylab(ylab)
  p <- p + theme_bw() + theme(legend.title=element_blank())
  if(facet != "none") {
    p <- p + theme(legend.position="none")
  }
  return(p)
}

##' plot the profile of peaks that align to flank sequences of TSS
##'
##' This function is the old function of \code{plotPeakProf2}. It can
##' only plot the start site region of gene.
##'
##' @title plotAvgProf
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object
##' @param upstream upstream position
##' @param downstream downstream position
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param verbose print message or not
##' @param ignore_strand ignore the strand information or not
##' @param ... additional parameter
##' @return ggplot object
##' @export
##' @author G Yu, Ming L
plotAvgProf2 <- function(peak, weightCol = NULL, TxDb = NULL,
                         upstream = 1000, downstream = 1000,
                         xlab = "Genomic Region (5'->3')",
                         ylab = "Peak Count Frequency",
                         conf,
                         facet = "none",
                         free_y = TRUE,
                         verbose = TRUE, 
                         ignore_strand = FALSE,
                         ...) {
  
  plotPeakProf2(peak = peak, 
                upstream = upstream, 
                downstream = downstream,
                conf,
                by = "gene",
                type = "start_site",
                weightCol = weightCol, 
                TxDb = TxDb,
                xlab = xlab,
                ylab = ylab,
                facet = facet,
                free_y = free_y,
                verbose = verbose, 
                ignore_strand = ignore_strand,
                ...)
  
}

##' plot the profile of peaks  by binning
##' 
##' 
##' @title plotAvgProf.binning
##' @param tagMatrix tagMatrix or a list of tagMatrix
##' @param xlab x label
##' @param ylab y label 
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled 
##' @param upstream rel object reflects the percentage of flank extension, e.g rel(0.2)
##'                 integer reflects the actual length of flank extension or TSS region
##'                 NULL reflects the gene body with no extension
##' @param downstream rel object reflects the percentage of flank extension, e.g rel(0.2)
##'                   integer reflects the actual length of flank extension or TSS region
##'                   NULL reflects the gene body with no extension
##' @param label label
##' @param ... additional parameter
##' @return ggplot object
##' @importFrom ggplot2 rel
plotAvgProf.binning <- function(tagMatrix, 
                                xlab = "Genomic Region (5'->3')",
                                ylab = "Peak Count Frequency",
                                conf,
                                facet ="none", 
                                free_y = TRUE,
                                upstream = NULL,
                                downstream = NULL,
                                label,
                                ...) {
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  conf <- if(missingArg(conf)) NA else conf
  
  if (!(missingArg(conf) || is.na(conf))){
    p <- plotAvgProf.binning.internal(tagMatrix , 
                                      conf = conf, 
                                      xlab = xlab, 
                                      ylab = ylab,
                                      facet = facet, 
                                      free_y = free_y,
                                      upstream = upstream,
                                      downstream = downstream,
                                      label = label,
                                      ...)
  } else {
    p <- plotAvgProf.binning.internal(tagMatrix , 
                                      xlab = xlab, 
                                      ylab = ylab,
                                      facet = facet, 
                                      free_y = free_y, 
                                      upstream = upstream,
                                      downstream = downstream,
                                      label = label,
                                      ...)
  }
  return(p)
}


##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 geom_line
##' @importFrom ggplot2 geom_vline
##' @importFrom ggplot2 geom_ribbon
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 scale_color_manual
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 theme_bw
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 facet_grid
##' @importFrom ggplot2 rel
plotAvgProf.binning.internal <- function(tagMatrix, 
                                         conf,
                                         xlab = "Genomic Region (5'->3')",
                                         ylab = "Peak Count Frequency",
                                         facet="none", 
                                         free_y = TRUE,
                                         upstream = NULL,
                                         downstream = NULL,
                                         label,
                                         ...) {
  
  listFlag <- FALSE
  if (is(tagMatrix, "list")) {
    if ( is.null(names(tagMatrix )) ) {
      nn <- paste0("peak", seq_along(tagMatrix ))
      warning("input is not a named list, set the name automatically to ", paste(nn, collapse=' '))
      names(tagMatrix) <- nn
      ## stop("tagMatrix should be a named list...")
    }
    listFlag <- TRUE
  }
  
  if(listFlag){
    nbin <- dim(tagMatrix[[1]])[2]
  }else{
    nbin <- dim(tagMatrix)[2]
  }
  xlim <- c(1,nbin)
  
  if ( listFlag ) {
    facet <- match.arg(facet, c("none", "row", "column"))
  }
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  ##
  conf <- if(missingArg(conf)) NA else conf
  
  pos <- value <- .id <- Lower <- Upper <- NULL
  
  if ( listFlag ) {
    tagCount <- lapply(tagMatrix , function(x) getTagCount(x, xlim = xlim, conf = conf, ...))
    tagCount <- list_to_dataframe(tagCount)
    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix ))
    p <- ggplot(tagCount, aes(pos, group=.id, color=.id))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = .id),
                           linetype = 0, alpha = 0.2)
    }
  } else {
    tagCount <- getTagCount(tagMatrix , xlim = xlim, conf = conf, ...)
    p <- ggplot(tagCount, aes(pos))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper),
                           linetype = 0, alpha = 0.2)
    }
  }
  
  p <- p + geom_line(aes(y = value))
  
  ## x_scale for genebody
  if(attr(tagMatrix, 'type') == 'body'){
    ## x_scale for gene body with no flank extension
    if(is.null(upstream)){
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*0.25),
                                           floor(nbin*0.5),
                                           floor(nbin*0.75),
                                           nbin),
                                  labels=c(label[1], 
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2]))
    }
    
    
    ## x_scale for flank extension by relative value
    if(inherits(upstream, 'rel')){
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           nbin),
                                  labels=c(paste0("-",as.numeric(upstream)*100,"%"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0("+",as.numeric(downstream)*100,"%")))
      p <- p + geom_vline(xintercept=floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                          linetype="longdash")
      
      p <- p + geom_vline(xintercept=floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                          linetype="longdash")
    }
    
    ## x_scale for flank extension by absolute value
    if(!is.null(upstream) & !inherits(upstream, 'rel')){
      
      upstreamPer <- floor(upstream/1000)*0.1
      downstreamPer <- floor(downstream/1000)*0.1
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),
                                           nbin),
                                  labels=c(paste0("-",upstream,"bp"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0(downstream,"bp")))
      p <- p + geom_vline(xintercept=floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),
                          linetype="longdash")
      
      p <- p + geom_vline(xintercept=floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),
                          linetype="longdash")
    }
  }
  
  
  ## x_scale for start region
  if(attr(tagMatrix, 'type') != 'body'){
    
    p <- p + scale_x_continuous(breaks=c(1, 
                                         floor(nbin*0.25),
                                         floor(nbin*0.5),
                                         floor(nbin*0.75),
                                         nbin),
                                labels=c(paste0("-",upstream,"bp"), 
                                         paste0("-",floor(upstream*0.5),"bp"),
                                         label,
                                         paste0(floor(downstream*0.5),"bp"),
                                         paste0(downstream,"bp")))
    
    p <- p + geom_vline(xintercept=floor(nbin*0.5),
                        linetype="longdash")
  }
  
  
  if (listFlag) {
    cols <- getCols(length(tagMatrix))
    p <- p + scale_color_manual(values=cols)
    if (facet == "row") {
      if (free_y) {
        p <- p + facet_grid(.id ~ ., scales = "free_y")
      } else {
        p <- p + facet_grid(.id ~ .)
      }
    } else if (facet == "column") {
      if (free_y) {
        p <-  p + facet_grid(. ~ .id, scales = "free_y")
      } else {
        p <-  p + facet_grid(. ~ .id)
      }
    }
  }
  p <- p+xlab(xlab)+ylab(ylab)
  p <- p + theme_bw() + theme(legend.title=element_blank())
  if(facet != "none") {
    p <- p + theme(legend.position="none")
  }
  return(p)
}


##' plot the profile of peaks automatically
##'
##' \code{peak} stands for the peak file. 
##' 
##' \code{by} the features of interest. 
##' 
##' (1) if users use \code{txdb}, \code{by} can be one of 'gene', 'transcript', 'exon', 
##' 'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.
##' 
##' (2) if users use self-made granges object, \code{by} can be everything. Because this \code{by}
##' will not pass to functions to get features, which is different from the case of using 
##' txdb object. This \code{by} is only used to made labels showed in picture.
##' 
##' \code{type} means the property of the region. one of the "start site",
##' "end site" and "body".
##' 
##' \code{upstream} and \code{downstream} parameter have different usages:
##' 
##' (1) if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
##' the flank of body region.
##' 
##' (2) if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
##' the upstream and downstream of the start_site or the end_site.
##' 
##' \code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details
##' see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}
##' 
##' \code{nbin} refers to the number of bins, providing a binning method
##' to get the tag matrix.
##' 
##' \code{TxDb} parameter can accept txdb object.
##' But many regions can not be obtained by txdb object. In this case,
##' Users can provide self-made granges served the same role 
##' as txdb object and pass to \code{TxDb} object.
##' 
##' \code{plotPeakProf2()} is different from the \code{plotPeakProf()}. \code{plotPeakProf2()} do not
##' need to provide \code{window} parameter, which means \code{plotPeakProf2()} will call relevent
##' functions to make \code{window} automatically.
##'
##' @title plotPeakProf2
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object, or self-made granges object
##' @param upstream upstream position
##' @param downstream downstream position
##' @param by e.g. 'gene', 'transcript', 'exon' or features of interest(e.g. "enhancer")
##' @param type one of "start_site", "end_site", "body"
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param verbose print message or not
##' @param nbin the amount of nbines 
##' @param ignore_strand ignore the strand information or not
##' @param ... additional parameter
##' @return ggplot object
##' @export
##' @author G Yu, Ming Li
plotPeakProf2 <- function(peak, 
                          upstream, 
                          downstream,
                          conf,
                          by,
                          type,
                          weightCol = NULL, 
                          TxDb = NULL,
                          xlab = "Genomic Region (5'->3')",
                          ylab = "Peak Count Frequency",
                          facet = "none",
                          free_y = TRUE,
                          verbose = TRUE, 
                          nbin = NULL,
                          ignore_strand = FALSE,
                          ...){
  
  conf <- if(missingArg(conf)) NA else conf
  upstream <- if(missingArg(upstream)) NULL else upstream
  downstream <- if(missingArg(downstream)) NULL else downstream
  
  if ( is(peak, "list") ) {
    tagMatrix <- lapply(peak, getTagMatrix, 
                        upstream = upstream,
                        downstream = downstream, 
                        type = type,
                        TxDb = TxDb,
                        by = by,
                        weightCol = weightCol, 
                        nbin = nbin,
                        verbose = verbose,
                        ignore_strand = ignore_strand)
  } else {
    tagMatrix <- getTagMatrix(peak = peak, 
                              upstream = upstream,
                              downstream = downstream, 
                              type = type,
                              by = by,
                              TxDb = TxDb,
                              weightCol = weightCol, 
                              nbin = nbin,
                              verbose = verbose,
                              ignore_strand = ignore_strand)
  }
  
  
  if (!(missingArg(conf) || is.na(conf))){
    p <- plotPeakProf(tagMatrix = tagMatrix,
                      conf = conf,
                      xlab = xlab,
                      ylab = ylab,
                      facet = facet, 
                      free_y = free_y,
                      ...)
    
  } else {
    p <- plotPeakProf(tagMatrix = tagMatrix,
                      xlab = xlab,
                      ylab = ylab,
                      facet= facet, 
                      free_y = free_y,
                      ...)
  }
  return(p)
  
}


##' plot the profile of peaks in two or more windows
##'
##'
##' This function comes from \url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}
##'`
##' \code{plotPeakProf_MultiWindows()} is almost the same as \code{plotPeakProf2()}, having
##' the main difference of accepting two or more granges objects. Accepting more
##' granges objects can help compare the same peaks in different windows.
##' 
##' \code{TxDb} parameter can accept txdb object.
##' But many regions can not be obtained by txdb object. In this case,
##' Users can provide self-made granges served the same role 
##' as txdb object and pass to \code{TxDb} object.
##' 
##' \code{by} the features of interest. 
##' 
##' (1) if users use \code{txdb}, \code{by} can be one of 'gene', 'transcript', 'exon', 
##' 'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.
##' 
##' (2) if users use self-made granges object, \code{by} can be everything. Because this \code{by}
##' will not pass to functions to get features, which is different from the case of using 
##' txdb object. This \code{by} is only used to made labels showed in picture.
##' 
##' \code{type} means the property of the region. one of the "start site",
##' "end site" and "body".
##' 
##' \code{upstream} and \code{downstream} parameter have different usages:
##' 
##' (1) if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
##' the flank of body region.
##' 
##' (2) if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
##' the upstream and downstream of the start_site or the end_site.
##' 
##' \code{weightCol} refers to column in peak file. This column acts as a weight value. Details
##' see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}
##' 
##' \code{nbin} refers to the number of bins. \code{getTagMatrix()} provide a binning method
##' to get the tag matrix.
##' 
##' There are two ways input a list of window.
##' 
##' (1) Users can input a list of self-made granges objects
##' 
##' (2) Users can input a list of \code{by} and only one \code{type}. In this way, 
##' \code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \code{by} and \code{type}.
##' 
##' Warning: 
##' 
##' (1) All of these window should be the same type. It means users can only
##' compare a list of "start site"/"end site"/"body region" with the same upstream
##' and downstream.
##' 
##' (2) So it will be only one \code{type} and several \code{by}.
##' 
##' (3) Users can make window by txdb object or self-made granges object. Users can only
##' choose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the
##' way of using txdb object. User can input any \code{by} in the way of using 
##' self-made granges object.
##' 
##' (4) Users can mingle the \code{by} designed for the two ways. \code{plotPeakProf_MultiWindows} can
##' accpet the hybrid \code{by}. But the above rules should be followed.
##' 
##'
##' @title plotPeakProf_MultiWindows
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object or self-made granges objects
##' @param upstream upstream position
##' @param downstream downstream position
##' @param by feature of interest
##' @param type one of "start_site", "end_site", "body"
##' @param windows_name the name for each window, which will also be showed in the picture as labels
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param verbose print message or not
##' @param nbin the amount of bines 
##' @param ignore_strand ignore the strand information or not
##' @param ... additional parameter
##' @return ggplot object
plotPeakProf_MultiWindows <- function(peak,
                                      upstream,
                                      downstream,
                                      conf,
                                      by,
                                      type,
                                      windows_name = NULL,
                                      weightCol = NULL,
                                      TxDb = NULL,
                                      xlab = "Genomic Region (5'->3')",
                                      ylab = "Peak Count Frequency",
                                      facet = "row",
                                      free_y = TRUE,
                                      verbose = TRUE,
                                      nbin = NULL,
                                      ignore_strand = FALSE,
                                      ...){
  
  conf <- if(missingArg(conf)) NA else conf
  upstream <- if(missingArg(upstream)) NULL else upstream
  downstream <- if(missingArg(downstream)) NULL else downstream
  
  ## check type
  if(length(type) != 1){
    stop("It should be only one type...")
  }
  
  ## make the window name
  if (is.null(windows_name)) {
    nn <- by
    warning("set the name automatically to ", paste(nn, collapse=' '))
    windows_name <- nn
  }else{
    if (length(windows_name) != length(by)) {
      stop("the length of the window name and the by should be equal...")
    }
  }
  
  
  if ( is(peak, "list") ) {
    tagMatrix <- lapply(peak, getTagMatrix2,
                        upstream=upstream,
                        downstream=downstream,
                        windows_name=windows_name,
                        type=type,
                        by=by,
                        TxDb=TxDb,
                        weightCol = weightCol, 
                        nbin = nbin,
                        verbose = verbose,
                        ignore_strand= ignore_strand)
  } else {
    tagMatrix <- getTagMatrix2(peak=peak, 
                               upstream=upstream,
                               downstream=downstream,
                               windows_name=windows_name,
                               type=type,
                               by=by,
                               TxDb=TxDb,
                               weightCol = weightCol, 
                               nbin = nbin,
                               verbose = verbose,
                               ignore_strand= ignore_strand)
  }
  
  if (!(missingArg(conf) || is.na(conf))){
    p <- plotMultiProf(tagMatrix = tagMatrix,
                       conf = conf,
                       xlab = xlab,
                       ylab = ylab,
                       facet = facet, 
                       free_y = free_y,
                       ...)
    
  } else {
    p <- plotMultiProf(tagMatrix = tagMatrix,
                       xlab = xlab,
                       ylab = ylab,
                       facet= facet, 
                       free_y = free_y,
                       ...)
  }
  return(p)
  
}


##' internal function for plotPeakProf_MultiWindows
##' 
##' @param tagMatrix tagMatrix
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param ... additional parameter
plotMultiProf <- function(tagMatrix,
                          conf,
                          xlab="Genomic Region (5'->3')",
                          ylab = "Peak Count Frequency",
                          facet="none", 
                          free_y = TRUE,
                          ...){
  
  
  if(is(tagMatrix[[1]][[1]],"matrix")){
    upstream <- attr(tagMatrix[[1]][[1]], 'upstream')
    downstream <- attr(tagMatrix[[1]][[1]], 'downstream')
    # attr(tagMatrix, 'type') <- attr(tagMatrix[[1]][[1]], 'type')
    # attr(tagMatrix, 'is.binning') <- attr(tagMatrix[[1]][[1]], 'is.binning')
    binFlag <- attr(tagMatrix[[1]][[1]], 'is.binning')
    type <- attr(tagMatrix[[1]][[1]], 'type')
    
  }else{
    upstream <- attr(tagMatrix[[1]], 'upstream')
    downstream <- attr(tagMatrix[[1]], 'downstream')
    binFlag <- attr(tagMatrix[[1]], 'is.binning')
    type <- attr(tagMatrix[[1]], 'type')
  }
  
  if(type == "body"){
    
    label <- c("SS","TS")
    
  }else if(type == "start_site"){
    
    label <- "SS"
    
  }else{
    
    label <- "TS"
    
  }
  
  
  if(binFlag){
    
    if (!(missingArg(conf) || is.na(conf))){
      
      plotMultiProf.binning(tagMatrix = tagMatrix, 
                            xlab = xlab,
                            ylab = ylab,
                            conf = conf,
                            facet = facet, 
                            free_y = free_y,
                            upstream = upstream,
                            downstream = downstream,
                            label = label,
                            ...)
      
    }else{
      
      plotMultiProf.binning(tagMatrix = tagMatrix, 
                            xlab = xlab,
                            ylab = ylab,
                            facet = facet, 
                            free_y = free_y,
                            upstream = upstream,
                            downstream = downstream,
                            label = label,
                            ...)
    }
    
    
  }else{
    
    xlim <- c(-upstream, downstream)
    
    if (!(missingArg(conf) || is.na(conf))){
      
      plotMultiProf.normal(tagMatrix = tagMatrix, 
                           xlim = xlim,
                           xlab = xlab,
                           ylab = ylab,
                           conf = conf,
                           facet = facet, 
                           free_y = free_y,
                           origin_label = label,
                           ...)
      
    }else{
      
      plotMultiProf.normal(tagMatrix = tagMatrix, 
                           xlim = xlim,
                           xlab = xlab,
                           ylab = ylab,
                           facet = facet, 
                           free_y = free_y,
                           origin_label = label,
                           ...)
    }
  }
  
}

##' internal function
##' 
##' @param tagMatrix tagMatrix
##' @param xlim xlim
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param origin_label the label of the center
##' @param verbose print message or not
##' @param ... additional parameter
plotMultiProf.normal <- function(tagMatrix, xlim,
                                 xlab="Genomic Region (5'->3')",
                                 ylab = "Peak Count Frequency",
                                 conf,
                                 facet="none", 
                                 free_y = TRUE, 
                                 origin_label = "TSS",
                                 verbose = TRUE,
                                 ...) {
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  
  if (verbose) {
    cat(">> plotting figure...\t\t\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  
  conf <- if(missingArg(conf)) NA else conf
  
  if (!(missingArg(conf) || is.na(conf))){
    
    p <- plotMultiProf.normal.internal(tagMatrix = tagMatrix, 
                                       conf = conf, 
                                       xlim = xlim,
                                       xlab = xlab, 
                                       ylab = ylab,
                                       facet = facet, 
                                       free_y = free_y, 
                                       origin_label = origin_label,
                                       ...)
    
    
  } else {
    
    p <- plotMultiProf.normal.internal(tagMatrix, 
                                       xlim = xlim,
                                       xlab = xlab, 
                                       ylab = ylab,
                                       facet = facet, 
                                       free_y = free_y, 
                                       origin_label = origin_label,
                                       ...)
  }
  return(p)
}

##' internal function
##' 
##' 
##' @param tagMatrix tagMatrix
##' @param xlim xlim
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param origin_label the label of the center
##' @param ... additional parameter
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 geom_line
##' @importFrom ggplot2 geom_vline
##' @importFrom ggplot2 geom_ribbon
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 scale_color_manual
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 theme_bw
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 facet_grid
plotMultiProf.normal.internal <- function(tagMatrix, conf,
                                          xlim = c(-3000,3000),
                                          xlab = "Genomic Region (5'->3')",
                                          ylab = "Peak Count Frequency",
                                          facet="row", 
                                          free_y = TRUE,
                                          origin_label, 
                                          ...) {
  
  listFlag <- FALSE
  if (is.null(attr(tagMatrix[[1]],'upstream'))) {
    if ( is.null(names(tagMatrix)) ) {
      nn <- paste0("peak", seq_along(tagMatrix))
      warning("input is not a named list, set the name automatically to ", paste(nn, collapse=' '))
      names(tagMatrix) <- nn
      ## stop("tagMatrix should be a named list...")
    }
    listFlag <- TRUE
  }
  
  if ( listFlag ) {
    facet <- match.arg(facet, c("none", "row", "column"))
    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix[[1]][[1]]) ) {
      stop("please specify appropreate xcoordinations...")
    }
  } else {
    if ( (xlim[2]-xlim[1]+1) != ncol(tagMatrix[[1]]) ) {
      stop("please specify appropreate xcoordinations...")
    }
  }
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  ##
  conf <- if(missingArg(conf)) NA else conf
  
  pos <- value <- .id <- Lower <- Upper <- NULL
  
  if ( listFlag ) {
    
    tagCount <- lapply(as.list(names(tagMatrix)), function(x){
      
      tmp <- tagMatrix[[x]]
      tagCount_tmp <- lapply(as.list(names(tmp)),function(x){
        result <- getTagCount(tmp[[x]], xlim = xlim, conf = conf, ...)
        result$type <- x
        
        return(result)
      })
      tagCount_tmp <- list_to_dataframe(tagCount_tmp)
      return(tagCount_tmp)
      
    })
    
    names(tagCount) <- names(tagMatrix)
    tagCount <- list_to_dataframe(tagCount)
    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix))
    p <- ggplot(tagCount, aes(pos, group=type, color=type))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = type),
                           linetype = 0, alpha = 0.2)
    }
    
  } else {
    
    tagCount <- lapply(as.list(names(tagMatrix)), function(x){
      
      result <- getTagCount(tagMatrix[[x]], xlim = xlim, conf = conf, ...)
      result$type <- x
      
      return(result)
    })
    
    tagCount <- do.call("rbind",tagCount)
    
    p <- ggplot(tagCount, aes(x = pos))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper,fill = type),
                           linetype = 0, alpha = 0.2)
    }
  }
  
  p <- p + geom_line(aes(y = value,color = type))
  
  if ( 0 > xlim[1] && 0 < xlim[2] ) {
    p <- p + geom_vline(xintercept=0,
                        linetype="longdash")
    p <- p + scale_x_continuous(breaks=c(xlim[1], floor(xlim[1]/2),
                                         0,
                                         floor(xlim[2]/2), xlim[2]),
                                labels=c(paste0(xlim[1],"bp"), paste0(floor(xlim[1]/2),"bp"),
                                         origin_label, 
                                         paste0(floor(xlim[2]/2),"bp"), paste0(xlim[2], "bp")))
  }
  
  if (listFlag) {
    # cols <- getCols(length(tagMatrix[[1]]))
    # p <- p + scale_color_manual(values=cols)
    if (facet == "row") {
      if (free_y) {
        p <- p + facet_grid(.id ~ ., scales = "free_y")
      } else {
        p <- p + facet_grid(.id ~ .)
      }
    } else if (facet == "column") {
      if (free_y) {
        p <-  p + facet_grid(. ~ .id, scales = "free_y")
      } else {
        p <-  p + facet_grid(. ~ .id)
      }
    }
  }
  
  p <- p+xlab(xlab)+ylab(ylab)
  p <- p + theme_bw() + theme(legend.title=element_blank())

  # if(facet != "none") {
  #   p <- p + theme(legend.position="none")
  # }
  
  return(p)
}

##' internal function
##' 
##' @param tagMatrix tagMatrix
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param upstream the upstream extension
##' @param downstream the downstream extension
##' @param label the label of the center
##' @param ... additional parameter
plotMultiProf.binning <- function(tagMatrix, 
                                  xlab = "Genomic Region (5'->3')",
                                  ylab = "Peak Count Frequency",
                                  conf,
                                  facet ="none", 
                                  free_y = TRUE,
                                  upstream = NULL,
                                  downstream = NULL,
                                  label,
                                  ...) {
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  conf <- if(missingArg(conf)) NA else conf
  
  if (!(missingArg(conf) || is.na(conf))){
    p <- plotMultiProf.binning.internal(tagMatrix , 
                                        conf = conf, 
                                        xlab = xlab, 
                                        ylab = ylab,
                                        facet = facet, 
                                        free_y = free_y,
                                        upstream = upstream,
                                        downstream = downstream,
                                        label = label,
                                        ...)
  } else {
    p <- plotMultiProf.binning.internal(tagMatrix , 
                                        xlab = xlab, 
                                        ylab = ylab,
                                        facet = facet, 
                                        free_y = free_y, 
                                        upstream = upstream,
                                        downstream = downstream,
                                        label = label,
                                        ...)
  }
  return(p)
}

##' internal function
##' 
##' @param tagMatrix tagMatrix
##' @param xlab xlab
##' @param ylab ylab
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param upstream the upstream extension
##' @param downstream the downstream extension
##' @param label the label of the center
##' @param ... additional parameter
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 geom_line
##' @importFrom ggplot2 geom_vline
##' @importFrom ggplot2 geom_ribbon
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 scale_color_manual
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 theme_bw
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 facet_grid
##' @importFrom ggplot2 rel
plotMultiProf.binning.internal <- function(tagMatrix, 
                                           conf,
                                           xlab = "Genomic Region (5'->3')",
                                           ylab = "Peak Count Frequency",
                                           facet="none", 
                                           free_y = TRUE,
                                           upstream = NULL,
                                           downstream = NULL,
                                           label,
                                           ...) {
  
  listFlag <- FALSE
  if (is(tagMatrix[[1]][[1]],"matrix")) {
    if ( is.null(names(tagMatrix)) ) {
      nn <- paste0("peak", seq_along(tagMatrix))
      warning("input is not a named list, set the name automatically to ", paste(nn, collapse=' '))
      names(tagMatrix) <- nn
      ## stop("tagMatrix should be a named list...")
    }
    listFlag <- TRUE
  }
  
  if(listFlag){
    nbin <- dim(tagMatrix[[1]][[1]])[2]
    type <- attr(tagMatrix[[1]][[1]], 'type')
  }else{
    nbin <- dim(tagMatrix[[1]])[2]
    type <- attr(tagMatrix[[1]], 'type')
  }
  xlim <- c(1,nbin)
  
  if ( listFlag ) {
    facet <- match.arg(facet, c("none", "row", "column"))
  }
  
  ## S4Vectors change the behavior of ifelse
  ## see https://support.bioconductor.org/p/70871/
  ##
  ## conf <- ifelse(missingArg(conf), NA, conf)
  ##
  conf <- if(missingArg(conf)) NA else conf
  
  pos <- value <- .id <- Lower <- Upper <- NULL
  
  if ( listFlag ) {
    
    tagCount <- lapply(as.list(names(tagMatrix)), function(x){
      
      tmp <- tagMatrix[[x]]
      tagCount_tmp <- lapply(as.list(names(tmp)),function(x){
        result <- getTagCount(tmp[[x]], xlim = xlim, conf = conf, ...)
        result$type <- x
        
        return(result)
      })
      tagCount_tmp <- list_to_dataframe(tagCount_tmp)
      return(tagCount_tmp)
      
    })
    
    names(tagCount) <- names(tagMatrix)
    tagCount <- list_to_dataframe(tagCount)
    tagCount$.id <- factor(tagCount$.id, levels=names(tagMatrix))
    p <- ggplot(tagCount, aes(pos, group=type, color=type))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper, fill = type),
                           linetype = 0, alpha = 0.2)
    }
    
  } else {
    
    tagCount <- lapply(as.list(names(tagMatrix)), function(x){
      
      result <- getTagCount(tagMatrix[[x]], xlim = xlim, conf = conf, ...)
      result$type <- x
      
      return(result)
    })
    
    tagCount <- do.call("rbind",tagCount)
    
    p <- ggplot(tagCount, aes(pos,group=type,color=type))
    if (!(is.na(conf))) {
      p <- p + geom_ribbon(aes(ymin = Lower, ymax = Upper,fill = type),
                           linetype = 0, alpha = 0.2)
    }
  }
  
  p <- p + geom_line(aes(y = value,color = type))
  
  ## x_scale for genebody
  if(type == 'body'){
    ## x_scale for gene body with no flank extension
    if(is.null(upstream)){
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*0.25),
                                           floor(nbin*0.5),
                                           floor(nbin*0.75),
                                           nbin),
                                  labels=c(label[1], 
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2]))
    }
    
    
    ## x_scale for flank extension by relative value
    if(inherits(upstream, 'rel')){
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           nbin),
                                  labels=c(paste0("-",as.numeric(upstream)*100,"%"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0("+",as.numeric(downstream)*100,"%")))
      p <- p + geom_vline(xintercept=floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                          linetype="longdash")
      
      p <- p + geom_vline(xintercept=floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                          linetype="longdash")
    }
    
    ## x_scale for flank extension by absolute value
    if(!is.null(upstream) & !inherits(upstream, 'rel')){
      
      upstreamPer <- floor(upstream/1000)*0.1
      downstreamPer <- floor(downstream/1000)*0.1
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),
                                           nbin),
                                  labels=c(paste0("-",upstream,"bp"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0(downstream,"bp")))
      p <- p + geom_vline(xintercept=floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),
                          linetype="longdash")
      
      p <- p + geom_vline(xintercept=floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),
                          linetype="longdash")
    }
  }
  
  
  ## x_scale for start region
  if(type != 'body'){
    
    p <- p + scale_x_continuous(breaks=c(1, 
                                         floor(nbin*0.25),
                                         floor(nbin*0.5),
                                         floor(nbin*0.75),
                                         nbin),
                                labels=c(paste0("-",upstream,"bp"), 
                                         paste0("-",floor(upstream*0.5),"bp"),
                                         label,
                                         paste0(floor(downstream*0.5),"bp"),
                                         paste0(downstream,"bp")))
    
    p <- p + geom_vline(xintercept=floor(nbin*0.5),
                        linetype="longdash")
  }
  
  
  if (listFlag) {
    
    if (facet == "row") {
      if (free_y) {
        p <- p + facet_grid(.id ~ ., scales = "free_y")
      } else {
        p <- p + facet_grid(.id ~ .)
      }
    } else if (facet == "column") {
      if (free_y) {
        p <-  p + facet_grid(. ~ .id, scales = "free_y")
      } else {
        p <-  p + facet_grid(. ~ .id)
      }
    }
  }
  p <- p+xlab(xlab)+ylab(ylab)
  p <- p + theme_bw() + theme(legend.title=element_blank())
  # if(facet != "none") {
  #   p <- p + theme(legend.position="none")
  # }
  return(p)
}


##' plot the heatmap of tagMatrix
##'
##'
##' @title tagHeatmap
##' @param tagMatrix tagMatrix or a list of tagMatrix
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param palette palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}
##' @param nrow the nrow of plotting a list of peak
##' @param ncol the ncol of plotting a list of peak
##' @return figure
##' @export
##' @author G Yu
tagHeatmap <- function(tagMatrix, 
                       xlab="", 
                       ylab="", 
                       title=NULL, 
                       palette="RdBu",
                       nrow = NULL,
                       ncol = NULL) {
  listFlag <- FALSE
  if (is(tagMatrix, "list")) {
    listFlag <- TRUE
  }
  peakHeatmap.internal2(tagMatrix = tagMatrix, 
                        listFlag = listFlag, 
                        palette = palette, 
                        xlab = xlab, 
                        ylab = ylab, 
                        title = title,
                        ncol = ncol,
                        nrow = nrow)
}

##' plot the heatmap of peaks 
##'
##'
##' @title peakHeatmap
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object
##' @param upstream upstream position
##' @param downstream downstream position
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param palette palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}
##' @param verbose print message or not
##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'
##' @param type one of "start_site", "end_site", "body"
##' @param nbin the amount of nbines 
##' @param ignore_strand ignore the strand information or not
##' @param windows a collection of region
##' @param nrow the nrow of plotting a list of peak
##' @param ncol the ncol of plotting a list of peak
##' @return figure
##' @export
##' @author G Yu
peakHeatmap <- function(peak, weightCol=NULL, TxDb=NULL,
                        upstream=1000, downstream=1000,
                        xlab="", ylab="", title=NULL,
                        palette=NULL, verbose=TRUE,
                        by="gene", type="start_site",
                        nbin = NULL,ignore_strand = FALSE,
                        windows,ncol = NULL, nrow = NULL) {
  listFlag <- FALSE
  if ( is(peak, "list") ) {
    listFlag <- TRUE
    if (is.null(names(peak)))
      stop("peak should be a peak file or a name list of peak files...")
  }
  
  if (verbose) {
    cat(">> preparing promoter regions...\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  
  if (verbose) {
    cat(">> preparing tag matrix...\t\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  
  if(missing(windows)){
    windows <- getBioRegion(TxDb=TxDb,
                            upstream=upstream,
                            downstream=downstream,
                            by=by,
                            type=type)
  }
  
  
  if (listFlag) {
    tagMatrix <- lapply(peak, getTagMatrix, 
                        weightCol=weightCol, 
                        windows = windows,
                        upstream=upstream,
                        downstream=downstream,
                        TxDb = TxDb,
                        nbin = nbin,
                        verbose = verbose,
                        ignore_strand= ignore_strand)
    
    names(tagMatrix) <- names(peak)
    
  } else {
    tagMatrix <- getTagMatrix(peak, 
                              weightCol=weightCol, 
                              windows = windows,
                              TxDb = TxDb,
                              upstream=upstream,
                              downstream=downstream,
                              nbin = nbin,
                              verbose = verbose,
                              ignore_strand= ignore_strand)
  }
  
  if (verbose) {
    cat(">> generating figure...\t\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  
  xlim <- NULL
  
  p <- peakHeatmap.internal2(tagMatrix = tagMatrix,
                             listFlag = listFlag, 
                             palette = palette, 
                             xlab = xlab,
                             ylab = ylab, 
                             title = title,
                             nrow = nrow,
                             ncol = ncol)
  
  if (verbose) {
    cat(">> done...\t\t\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  invisible(tagMatrix)
  p
}

##' @importFrom aplot plot_list
peakHeatmap.internal2 <- function(tagMatrix, 
                                  listFlag, 
                                  palette, 
                                  xlab, 
                                  ylab, 
                                  title,
                                  nrow,
                                  ncol) {
  if ( is.null(xlab) || is.na(xlab))
    xlab <- ""
  if ( is.null(ylab) || is.na(ylab))
    ylab <- ""
  
  if (listFlag) {
    nc <- length(tagMatrix)
    if ( is.null(palette) || is.na(palette) ) {
      palette <- getPalette(nc)
    } else if (length(palette) != nc) {
      palette <- rep(palette[1], nc)
    } else {
      palette <- palette
    }
    
    if (is.null(title) || is.na(title))
      title <- names(tagMatrix)
    if (length(xlab) != nc) {
      xlab <- rep(xlab[1], nc)
    }
    if (length(ylab) != nc) {
      ylab <- rep(ylab[1], nc)
    }
    if (length(title) != nc) {
      title <- rep(title[1], nc)
    }
    
    tmp <- list()
    
    for (i in 1:nc) {
      
      p <- peakHeatmap.internal(tagMatrix = tagMatrix[[i]], 
                                palette = palette[i], 
                                xlab = xlab[i], 
                                ylab = ylab[i], 
                                title= title[i])
      
      p <- p + theme(plot.title = element_text(hjust = 0.5))
      
      tmp[[i]] <- p
    }
    
    if(is.null(nrow) && is.null(ncol))
      nrow <- 1
    
    p <- plot_list(gglist = tmp,
                   ncol = ncol,
                   nrow = nrow)
    return(p)
    
  } else {
    if (is.null(palette) || is.na(palette))
      palette <- "RdBu"
    if (is.null(title) || is.na(title))
      title <- ""
    peakHeatmap.internal(tagMatrix = tagMatrix, 
                         palette = palette, 
                         xlab = xlab, 
                         ylab = ylab, 
                         title = title)
  }
}


##' @import BiocGenerics
##' @importFrom yulab.utils mat2df
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 aes
##' @importFrom ggplot2 geom_tile
##' @importFrom ggplot2 scale_fill_distiller
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 labs
##' @importFrom ggplot2 scale_x_continuous
peakHeatmap.internal <- function(tagMatrix, 
                                 palette="RdBu", 
                                 xlab="", 
                                 ylab="",
                                 title="") {
  
  upstream <- attr(tagMatrix, "upstream")
  downstream <- attr(tagMatrix, "downstream")
  binning_Flag <- attr(tagMatrix,"is.binning")
  type <- attr(tagMatrix,"type")
  
  body_Flag <- FALSE
  if(type == "body"){
    body_Flag <- TRUE
    label <- attr(tagMatrix,"label")
  }
  
  if(binning_Flag){
    nbin <- dim(tagMatrix)[2]
  }
  
  tagMatrix <- t(apply(tagMatrix, 1, function(x) x/max(x)))
  ii <- order(rowSums(tagMatrix))
  tagMatrix <- tagMatrix[ii,]
  
  colnames(tagMatrix) <- seq_len(dim(tagMatrix)[2])
  rownames(tagMatrix) <- seq_len(dim(tagMatrix)[1])
  
  tagMatrix <- mat2df(tagMatrix)
  colnames(tagMatrix) <- c("values","sample_ID","coordinate")

  sample_ID <- coordinate <- NULL
  
  p <- ggplot(tagMatrix, aes(x = coordinate,y = sample_ID)) + 
    geom_tile(aes(fill = values)) +
    scale_fill_distiller(palette = palette)  +
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          axis.line.y = element_blank(),
          panel.grid=element_blank(),
          panel.background = element_blank()) +
    labs(x = xlab, y = ylab, title = title)

  if(body_Flag){
    
    if(inherits(upstream, 'rel')){
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           nbin),
                                  labels=c(paste0("-",as.numeric(upstream)*100,"%"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0("+",as.numeric(downstream)*100,"%")))
    }
    
    if(is.null(upstream)){
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*0.25),
                                           floor(nbin*0.5),
                                           floor(nbin*0.75),
                                           nbin),
                                  labels=c(label[1], 
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2]))
    }
    
    if(!is.null(upstream) && !inherits(upstream, 'rel')){
      
      upstreamPer <- floor(upstream/1000)*0.1
      downstreamPer <- floor(downstream/1000)*0.1
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),
                                           nbin),
                                  labels=c(paste0("-",upstream,"bp"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0(downstream,"bp")))
    }
    
    p <- p + scale_y_continuous(expand = c(0,0))
    return(p)
    
  }
  
  if(binning_Flag){
    
    p <- p + scale_x_continuous(breaks = c(1,
                                           floor(nbin*(downstream*0.5/(downstream+upstream))),
                                           floor(nbin*(downstream/(downstream+upstream))),
                                           floor(nbin*((downstream + upstream*0.5)/(downstream+upstream))),
                                           nbin),
                                labels = c((-1*downstream),
                                           floor(-1*downstream*0.5),
                                           0,
                                           floor(upstream*0.5),
                                           upstream))
  }else{
    
    p <- p + scale_x_continuous(labels = function(x) x - upstream)    
    
  }
  
  p <- p + scale_y_continuous(expand = c(0,0))
  
  p
}

##' plot the heatmap of peaks align to a sets of regions
##'
##'
##' @title peakHeatmap
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object
##' @param upstream upstream position
##' @param downstream downstream position
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param palette palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}
##' @param verbose print message or not
##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'
##' @param type one of "start_site", "end_site", "body"
##' @param nbin the amount of nbines 
##' @param ignore_strand ignore the strand information or not
##' @param windows_name the name for each window, which will also be showed in the picture as labels
##' @param nrow the nrow of plotting a list of peak
##' @param ncol the ncol of plotting a list of peak
##' @param facet_label_text_size the size of facet label text
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 aes
##' @importFrom ggplot2 geom_tile
##' @importFrom ggplot2 scale_fill_distiller
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 labs
##' @importFrom ggplot2 scale_x_continuous
##' @return figure
##' @export
peakHeatmap_multiple_Sets <- function(peak, 
                                      weightCol=NULL,
                                      TxDb=NULL,
                                      upstream=1000, 
                                      downstream=1000,
                                      xlab="", 
                                      ylab="", 
                                      title=NULL,
                                      palette=NULL, 
                                      verbose=TRUE,
                                      by="gene", 
                                      type="start_site",
                                      nbin = NULL,
                                      ignore_strand = FALSE,
                                      windows_name = NULL,
                                      ncol = NULL,
                                      nrow = NULL,
                                      facet_label_text_size = 12){
  listFlag <- FALSE
  if ( is(peak, "list") ) {
    listFlag <- TRUE
    if (is.null(names(peak)))
      stop("peak should be a peak file or a name list of peak files...")
  }
  
  if (verbose) {
    cat(">> preparing promoter regions...\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
  }
  
  
  ## check type
  if(length(type) != 1){
    stop("It should be only one type...")
  }
  
  if(is.null(windows_name) && !is.null(names(TxDb)))
    windows_name <- names(TxDb)
  
  ## make the window name
  if (is.null(windows_name)) {
    nn <- by
    warning("set the name automatically to ", paste(nn, collapse=' '))
    windows_name <- nn
  }else{
    if (length(windows_name) != length(by)) {
      stop("the length of the window name and the by should be equal...")
    }
  }
  
  if ( is(peak, "list") ) {
    tagMatrix <- lapply(peak, getTagMatrix2,
                        upstream=upstream,
                        downstream=downstream,
                        windows_name=windows_name,
                        type=type,
                        by=by,
                        TxDb=TxDb,
                        weightCol = weightCol, 
                        nbin = nbin,
                        verbose = verbose,
                        ignore_strand= ignore_strand)
  } else {
    tagMatrix <- getTagMatrix2(peak=peak, 
                               upstream=upstream,
                               downstream=downstream,
                               windows_name=windows_name,
                               type=type,
                               by=by,
                               TxDb=TxDb,
                               weightCol = weightCol, 
                               nbin = nbin,
                               verbose = verbose,
                               ignore_strand= ignore_strand)
  }
  
  if(listFlag){
    
    nc <- length(tagMatrix)
    if ( is.null(palette) || is.na(palette) ) {
      palette <- getPalette(nc)
    } else if (length(palette) != nc) {
      palette <- rep(palette[1], nc)
    } else {
      palette <- palette
    }
    
    if (is.null(title) || is.na(title))
      title <- names(tagMatrix)
    if (length(xlab) != nc) {
      xlab <- rep(xlab[1], nc)
    }
    if (length(ylab) != nc) {
      ylab <- rep(ylab[1], nc)
    }
    if (length(title) != nc) {
      title <- rep(title[1], nc)
    }
    
    tmp <- list()
    
    for (i in 1:nc) {
      
      p <- peakHeatmap_multiple_Sets.internal(tagMatrix = tagMatrix[[i]],
                                              upstream=upstream, 
                                              downstream=downstream,
                                              xlab=xlab[[i]], 
                                              ylab=ylab[[i]], 
                                              title=title[[i]],
                                              palette=palette[[i]], 
                                              ncol = ncol,
                                              nrow = nrow,
                                              facet_label_text_size = facet_label_text_size)
      
      p <- p + theme(plot.title = element_text(hjust = 0.5))
      
      tmp[[i]] <- p
    }
    
    if(is.null(nrow) && is.null(ncol))
      nrow <- 1
    
    p <- plot_list(gglist = tmp,
                   ncol = ncol,
                   nrow = nrow)
    
  }else{
    
    if (is.null(palette) || is.na(palette))
      palette <- "RdBu"
    if (is.null(title) || is.na(title))
      title <- ""
    
    p <- peakHeatmap_multiple_Sets.internal(tagMatrix = tagMatrix,
                                            upstream=upstream, 
                                            downstream=downstream,
                                            xlab=xlab, 
                                            ylab=ylab, 
                                            title=title,
                                            palette=palette, 
                                            ncol = ncol,
                                            nrow = nrow,
                                            facet_label_text_size = facet_label_text_size)
    
  }
  
  return(p)
  
}


##' @importFrom yulab.utils mat2df
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 aes
##' @importFrom ggplot2 geom_tile
##' @importFrom ggplot2 scale_fill_distiller
##' @importFrom ggplot2 theme
##' @importFrom ggplot2 element_blank
##' @importFrom ggplot2 labs
##' @importFrom ggplot2 scale_x_continuous
##' @importFrom ggplot2 facet_grid
##' @importFrom ggplot2 element_text
##' @importFrom ggplot2 element_blank
peakHeatmap_multiple_Sets.internal <- function(tagMatrix,
                                               upstream=1000, 
                                               downstream=1000,
                                               xlab="", 
                                               ylab="", 
                                               title=NULL,
                                               palette=NULL, 
                                               ncol = NULL,
                                               nrow = NULL,
                                               facet_label_text_size = 12){

  binning_Flag <- attr(tagMatrix[[1]],"is.binning")
  if(binning_Flag) nbin <- dim(tagMatrix[[1]])[2]
  
  type <- attr(tagMatrix,"type")
  body_Flag <- FALSE
  if(attr(tagMatrix[[1]],"type") == "body"){
    body_Flag <- TRUE
    label <- attr(tagMatrix,"label")
  }
  
  name_of_list <- as.list(names(tagMatrix))
  
  peak_list <- lapply(name_of_list,function(x){
    
    tagMatrix[[x]] <- t(apply(tagMatrix[[x]], 1, function(x) x/max(x)))
    ii <- order(rowSums(tagMatrix[[x]]))
    tagMatrix[[x]] <- tagMatrix[[x]][ii,]
    
    colnames(tagMatrix[[x]]) <- seq_len(dim(tagMatrix[[x]])[2])
    rownames(tagMatrix[[x]]) <- seq_len(dim(tagMatrix[[x]])[1])
    
    tagMatrix[[x]] <- mat2df(tagMatrix[[x]])
    colnames(tagMatrix[[x]]) <- c("values","sample_ID","coordinate")
    
    tagMatrix[[x]]$sample <- x
    return(tagMatrix[[x]])
  })
  
  peak_df <- list_to_dataframe(peak_list)
  
  sample_ID <- coordinate <- NULL
  
  p <- ggplot(peak_df, aes(x = coordinate,y = sample_ID)) + 
    geom_tile(aes(fill = values)) +
    scale_fill_distiller(palette = palette)  +
    theme(axis.text.y=element_blank(),
          axis.ticks.y=element_blank(),
          axis.line.y = element_blank(),
          panel.grid=element_blank(),
          panel.background = element_blank()) +
    labs(x = xlab, y = ylab, title = title)
  
  if(body_Flag){
    
    if(inherits(upstream, 'rel')){
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(as.numeric(upstream)*100/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+25)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+50)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+75)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           floor(nbin*((as.numeric(upstream)*100+100)/(100+(as.numeric(upstream)+as.numeric(downstream))*100))),
                                           nbin),
                                  labels=c(paste0("-",as.numeric(upstream)*100,"%"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0("+",as.numeric(downstream)*100,"%")))
    }
    
    if(is.null(upstream)){
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*0.25),
                                           floor(nbin*0.5),
                                           floor(nbin*0.75),
                                           nbin),
                                  labels=c(label[1], 
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2]))
    }
    
    if(!is.null(upstream) && !inherits(upstream, 'rel')){
      
      upstreamPer <- floor(upstream/1000)*0.1
      downstreamPer <- floor(downstream/1000)*0.1
      
      p <- p + scale_x_continuous(breaks=c(1, 
                                           floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.25)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.5)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+0.75)/(1+upstreamPer+downstreamPer))),
                                           floor(nbin*((upstreamPer+1)/(1+upstreamPer+downstreamPer))),
                                           nbin),
                                  labels=c(paste0("-",upstream,"bp"), 
                                           label[1],
                                           "25%",
                                           "50%",
                                           "75%",
                                           label[2],
                                           paste0(downstream,"bp")))
    }
    
    p <-  p + facet_grid(sample ~ .,switch = "y",space = "free_y",scales = "free_y") +
      theme(strip.text.y.left = element_text(color = "black",face = "bold",
                                             size = facet_label_text_size),
            strip.background = element_blank())
    
    return(p)
    
  }
  
  if(binning_Flag){
    
    p <- p + scale_x_continuous(breaks = c(1,
                                           floor(nbin*(downstream*0.5/(downstream+upstream))),
                                           floor(nbin*(downstream/(downstream+upstream))),
                                           floor(nbin*((downstream + upstream*0.5)/(downstream+upstream))),
                                           nbin),
                                labels = c((-1*downstream),
                                           floor(-1*downstream*0.5),
                                           0,
                                           floor(upstream*0.5),
                                           upstream))
  }else{
    
    p <- p + scale_x_continuous(breaks = c(1,
                                           floor(downstream*0.5),
                                           (downstream + 1),
                                           (downstream + 1 + floor(upstream * 0.5)), 
                                           upstream+downstream+1),
                                labels = c((-1*downstream),
                                           floor(-1*downstream*0.5),
                                           0,
                                           floor(upstream*0.5),
                                           upstream))    
    
  }
  
  p <-  p + facet_grid(sample ~ .,switch = "y",scales = "free_y",space = "free") +
    theme(strip.text.y.left = element_text(color = "black",face = "bold",
                                           size = facet_label_text_size),
          strip.background = element_blank()) +
    scale_y_continuous(expand = c(0,0))
  
  return(p)
  
}


##' plot peak heatmap and profile in a picture
##' 
##' 
##' @title peak_Profile_Heatmap
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight
##' @param TxDb TxDb object
##' @param upstream upstream position
##' @param downstream downstream position
##' @param xlab xlab
##' @param ylab ylab
##' @param title title
##' @param palette palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}
##' @param verbose print message or not
##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'
##' @param type one of "start_site", "end_site", "body"
##' @param nbin the amount of nbines 
##' @param ignore_strand ignore the strand information or not
##' @param windows_name the name for each window, which will also be showed in the picture as labels
##' @param nrow the nrow of plotting a list of peak
##' @param ncol the ncol of plotting a list of peak
##' @param facet_label_text_size the size of facet label text
##' @param conf confidence interval
##' @param facet one of 'none', 'row' and 'column'
##' @param free_y if TRUE, y will be scaled by AvgProf
##' @param height_proportion the proportion of profiling picture and heatmap
##' @importFrom aplot insert_bottom
##' @importFrom aplot plot_list
##' @export
peak_Profile_Heatmap <- function(peak, 
                                 weightCol=NULL,
                                 TxDb=NULL,
                                 upstream=1000, 
                                 downstream=1000,
                                 xlab="", 
                                 ylab="", 
                                 title=NULL,
                                 palette=NULL, 
                                 verbose=TRUE,
                                 by="gene", 
                                 type="start_site",
                                 nbin = NULL,
                                 ignore_strand = FALSE,
                                 windows_name = NULL,
                                 ncol = NULL,
                                 nrow = NULL,
                                 facet_label_text_size = 12,
                                 conf,
                                 facet = "row",
                                 free_y = TRUE,
                                 height_proportion = 4){
  
  conf <- if(missingArg(conf)) NA else conf
  
  if(is(peak, "list")){
    
    nc <- length(peak)
    
    tmp <- list()
    
    if ( is.null(names(peak)) ) {
      nn <- paste0("peak", seq_along(peak))
      warning("input is not a named list, set the name automatically to ", paste(nn, collapse=' '))
      names(peak) <- nn
      ## stop("tagMatrix should be a named list...")
    }
    
    if(is.null(palette)) palette <- getPalette(nc)
    
    if(is.null(title)) title_of_plot <- names(peak)
    
    for (i in 1:nc) {
      peak_profile <- plotPeakProf(peak = peak[[i]],
                                   upstream = upstream,
                                   downstream = downstream,
                                   conf = conf,
                                   by = by,
                                   type = type,
                                   windows_name = windows_name,
                                   weightCol = weightCol,
                                   TxDb = TxDb,
                                   xlab = xlab,
                                   ylab = ylab,
                                   facet = facet,
                                   free_y = free_y,
                                   verbose = verbose,
                                   nbin = nbin,
                                   ignore_strand = ignore_strand)
      
      peak_profile <- peak_profile + labs(title = title_of_plot[i]) +
        theme(plot.title = element_text(hjust = 0.5))
      
      if(length(by) != 1){
        peak_heatmap <- peakHeatmap_multiple_Sets(peak = peak[[i]], 
                                                  weightCol=weightCol,
                                                  TxDb=TxDb,
                                                  upstream=upstream, 
                                                  downstream=downstream,
                                                  xlab=xlab, 
                                                  ylab=ylab, 
                                                  title=title,
                                                  palette=palette[[i]], 
                                                  verbose=verbose,
                                                  by=by, 
                                                  type=type,
                                                  nbin = nbin,
                                                  ignore_strand = ignore_strand,
                                                  windows_name = windows_name,
                                                  ncol = ncol,
                                                  nrow = nrow,
                                                  facet_label_text_size = facet_label_text_size)
      }else{
        
        peak_heatmap <- peakHeatmap(peak[[i]], 
                                    weightCol=weightCol, 
                                    TxDb=TxDb,
                                    upstream=upstream, 
                                    downstream=downstream,
                                    xlab=xlab, 
                                    ylab=ylab, 
                                    title=title,
                                    palette=palette[[i]], 
                                    verbose=verbose,
                                    by=by, 
                                    type=type,
                                    nbin = nbin,
                                    ignore_strand = ignore_strand,
                                    ncol = ncol,
                                    nrow = nrow)
        
      }
      
      p <- peak_profile %>% 
        insert_bottom(peak_heatmap,height = height_proportion)
      
      tmp[[i]] <- p
    }
    
    if (is.null(ncol) && is.null(nrow))
      nrow <- 1
    
    p <- plot_list(gglist = tmp,
                   ncol = ncol,
                   nrow = nrow)
    
    return(p)
    
  }
  
  peak_profile <- plotPeakProf(peak = peak,
                               upstream = upstream,
                               downstream = downstream,
                               conf = conf,
                               by = by,
                               type = type,
                               windows_name = windows_name,
                               weightCol = weightCol,
                               TxDb = TxDb,
                               xlab = xlab,
                               ylab = ylab,
                               facet = facet,
                               free_y = free_y,
                               verbose = verbose,
                               nbin = nbin,
                               ignore_strand = ignore_strand)
  
  
  if(length(by) != 1){
    peak_heatmap <- peakHeatmap_multiple_Sets(peak = peak, 
                                              weightCol=weightCol,
                                              TxDb=TxDb,
                                              upstream=upstream, 
                                              downstream=downstream,
                                              xlab=xlab, 
                                              ylab=ylab, 
                                              title=title,
                                              palette=palette, 
                                              verbose=verbose,
                                              by=by, 
                                              type=type,
                                              nbin = nbin,
                                              ignore_strand = ignore_strand,
                                              windows_name = windows_name,
                                              ncol = ncol,
                                              nrow = nrow,
                                              facet_label_text_size = facet_label_text_size)
  }else{
    
    peak_heatmap <- peakHeatmap(peak = peak, 
                                weightCol=weightCol, 
                                TxDb=TxDb,
                                upstream=upstream, 
                                downstream=downstream,
                                xlab=xlab, 
                                ylab=ylab, 
                                title=title,
                                palette=palette, 
                                verbose=verbose,
                                by=by, 
                                type=type,
                                nbin = nbin,
                                ignore_strand = ignore_strand,
                                ncol = ncol,
                                nrow = nrow)
    
  }
  
  p <- peak_profile %>% 
    insert_bottom(peak_heatmap,height = height_proportion)
  

  return(p)
}

================================================
FILE: R/readPeakFile.R
================================================
##' read peak file and store in data.frame or GRanges object
##'
##' 
##' @title readPeakFile
##' @param peakfile peak file
##' @param as output format, one of GRanges or data.frame
##' @param ... additional parameter (pass to `utils::read.delim()`)
##' @return peak information, in GRanges or data.frame object
##' @import IRanges GenomicRanges
##' @export
##' @examples
##' peakfile <- system.file("extdata", "sample_peaks.txt", package="ChIPseeker")
##' peak.gr <- readPeakFile(peakfile, as="GRanges")
##' peak.gr
##' @author G Yu
readPeakFile <- function(peakfile, as="GRanges", ...) {
    as <- match.arg(as, c("GRanges", "data.frame"))
    peak.df <- peak2DF(peakfile, ...)
    if (as == "data.frame")
        return(peak.df)
    peak.gr <- peakDF2GRanges(peak.df)
    return(peak.gr)
}

peakDF2GRanges <- function(peak.df) {
    peak.gr=GRanges(seqnames=peak.df[,1],
        ranges=IRanges(peak.df[,2], peak.df[,3]))
    cn <- colnames(peak.df)
    if (length(cn) > 3) {
        for (i in 4:length(cn)) {
            mcols(peak.gr)[[cn[i]]] <- peak.df[, cn[i]]
        }
    }
    return(peak.gr)
}

##' @importFrom utils read.delim
peak2DF <- function(peakfile, header, ...) {
    if (missing(header)) {
        ## determine file format
        if (isBedFile(peakfile)) {
            header <- FALSE
        } else {
            header <- TRUE
        }
    }
    peak.df <- read.delim(peakfile, header=header, comment.char="#", ...)
    ## coordinate system in BED file is start at 0
    ## refer to http://asia.ensembl.org/info/website/upload/bed.html?redirect=no
    ## The chromEnd base is not included in the display of the feature.
    ## For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100,
    ## and span the bases numbered 0-99.
    ## so chromEnd, peak.df[,3], is not needed to +1
    peak.df[,2] <- peak.df[,2] + 1
    return(peak.df)
}

isBedFile <- function(peakfile) {
    ## peakfile is a peak file name
    grepl("\\.bed$", peakfile) || grepl("\\.bed.gz$", peakfile) || 
    grepl("\\Peak.gz$", peakfile) || grepl("\\.bedGraph.gz$", peakfile) || 
    grepl("\\.narrowPeak$", peakfile) || grepl("\\.broadPeak$",peakfile) ||
    grepl("\\.gappedPeak$", peakfile)
}


================================================
FILE: R/seq2gene.R
================================================
##' annotate genomic regions to genes in many-to-many mapping
##'
##' This funciton associates genomic regions with coding genes in a many-to-many mapping. It first maps genomic regions to host genes (either located in exon or intron), proximal genes (located in promoter regions) and flanking genes (located in upstream and downstream within user specify distance).
##' @title seq2gene
##' @param seq genomic regions in GRanges object
##' @param tssRegion TSS region
##' @param flankDistance flanking search radius
##' @param TxDb TranscriptDb object
##' @param sameStrand logical whether find nearest/overlap gene in the same strand
##' @return gene vector
##' @export
##' @examples
##' \dontrun{
##' library(TxDb.Hsapiens.UCSC.hg19.knownGene)
##' TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene
##' file <- getSampleFiles()[[1]] # a bed file
##' gr <- readPeakFile(file)
##' genes <- seq2gene(gr, tssRegion=c(-1000, 1000), flankDistance = 3000, TxDb) 
##' }
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
##' @author Guangchuang Yu
seq2gene <- function(seq, tssRegion, flankDistance, TxDb, sameStrand=FALSE) {
    .ChIPseekerEnv(TxDb, item = ChIPseekerCache)
    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)
    
    ## Exons
    exonList <- get_cache_element(item = ChIPseekerCache, elements = "exonList")
    if(is.null(exonList)){
        exonList <- exonsBy(TxDb)
        update_cache_item(item = ChIPseekerCache, list("exonList" = exonList))
    }

    # if ( exists("exonList", envir=ChIPseekerEnv, inherits=FALSE) ) {
    #     exonList <- get("exonList", envir=ChIPseekerEnv)
    # } else {
    #     exonList <- exonsBy(TxDb)
    #     assign("exonList", exonList, envir=ChIPseekerEnv)
    # }
    exons <- getGenomicAnnotation.internal(seq, exonList, type = "Exon", sameStrand=sameStrand)
    
    ## Introns
    intronList <- get_cache_element(item = ChIPseekerCache, elements = "intronList")

    if(is.null(intronList)){
        intronList <- intronsByTranscript(TxDb)
        update_cache_item(item = ChIPseekerCache, list("intronList" = intronList))
    }

    # if ( exists("intronList", envir=ChIPseekerEnv, inherits=FALSE) ) {
    #     intronList <- get("intronList", envir=ChIPseekerEnv)
    # } else {
    #     intronList <- intronsByTranscript(TxDb)
    #     assign("intronList", intronList, envir=ChIPseekerEnv)
    # }
    introns <- getGenomicAnnotation.internal(seq, intronList, type="Intron", sameStrand=sameStrand)
    
    genes <- c(exons$gene, introns$gene)
    ## > head(genes)
    ## [1] "uc001aed.3/126789"    "uc001aka.3/440556"    "uc001ako.3/49856"    
    ## [4] "uc001alg.3/100133612" "uc009vly.2/390992"    "uc001awv.2/79814"   
    genes <- gsub("\\w+\\.*\\d*/(\\d+)", "\\1", genes)
    ## > head(genes)
    ## [1] "126789"    "440556"    "49856"     "100133612" "390992"    "79814"   

    features <- getGene(TxDb, by="gene")
    idx.dist <- getNearestFeatureIndicesAndDistances(seq, features, sameStrand=sameStrand)
    nearestFeatures <- features[idx.dist$index] 
    
    distance <- idx.dist$distance

    pi <- distance > tssRegion[1] & distance < tssRegion[2]
    promoters <- mcols(nearestFeatures[pi])[["gene_id"]]

    nearest_genes <- mcols(nearestFeatures[!pi][abs(distance[!pi]) < flankDistance])[["gene_id"]]

    genes <- c(genes, promoters, nearest_genes)
    return(unique(genes))
}


================================================
FILE: R/subset.R
================================================
##' @importFrom S4Vectors subset
##' @importFrom BiocGenerics start
##' @importFrom BiocGenerics end
##' @method subset csAnno
##' @export
subset.csAnno <- function(x, ... ){
  
  index <- paste(seqnames(x@anno),start(x@anno),end(x@anno), sep = "_")
  # subset the GRanges
  x@anno <- subset(x@anno, ...)
  index2 <- paste(seqnames(x@anno),start(x@anno),end(x@anno), sep = "_")
  
  # the tssRgion, level, hsaGenomicAnnotation keep unchanged
  
  # change the detailGenomicAnnotation
  x@detailGenomicAnnotation <- x@detailGenomicAnnotation[index %in% index2,]
  
  # change the annotation stat 
  x@annoStat <- getGenomicAnnoStat(x@anno)
  
  # change peak number
  x@peakNum <-  length(x@anno)
  
  return(x)
  
}


================================================
FILE: R/tagMatrix.R
================================================
##' prepare the promoter regions
##'
##'
##' @title getPromoters
##' @param TxDb TxDb
##' @param upstream upstream from TSS site
##' @param downstream downstream from TSS site
##' @param by one of gene or transcript
##' @return GRanges object
##' @export
getPromoters <- function(TxDb=NULL,
                         upstream=1000,
                         downstream=1000,
                         by = "gene") {
  
  getBioRegion(TxDb = TxDb,
               upstream = upstream,
               downstream = downstream,
               by = by,
               type = "start_site")
}


##' prepare a bioregion of selected feature
##' 
##' this function combined previous functions getPromoters(), getBioRegion() and getGeneBody() in order
##' to solve the following issues.
##' 
##' (1) \url{https://github.com/GuangchuangYu/ChIPseeker/issues/16}
##' 
##' (2) \url{https://github.com/GuangchuangYu/ChIPseeker/issues/87}
##' 
##' The getBioRegion() function can prevoid a region of interest from
##' \code{txdb} object. There are three kinds of regions, \code{start_site},
##' \code{end_site} and \code{body}. 
##' 
##' We take transcript region to expain the differences of these three regions.
##' tx: chr1 1000 1400. 
##' 
##' \code{body} region refers to the 1000-1400bp.
##' 
##' \code{start_site} region with \code{upstream = 100, downstream = 100} refers to 900-1100bp. 
##' 
##' \code{end_site} region with \code{upstream = 100, downstream = 100} refers to 1300-1500bp.
##'
##' @title getBioRegion
##' @param TxDb TxDb
##' @param upstream upstream from start site or end site
##' @param downstream downstream from start site or end site
##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'
##' @param type one of "start_site", "end_site", "body"
##' @return GRanges object
##' @import BiocGenerics IRanges GenomicRanges
##' @importFrom yulab.utils get_cache_item
##' @author Guangchuang Yu, Ming L
##' @export
getBioRegion <- function(TxDb=NULL,
                         upstream=1000,
                         downstream=1000,
                         by="gene",
                         type="start_site"){
  
  by <- match.arg(by, c('gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR','UTR'))
  type <- match.arg(type, c("start_site", "end_site", "body"))
  
  TxDb <- loadTxDb(TxDb)
  .ChIPseekerEnv(TxDb, item = ChIPseekerCache)
  # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)
  
  label <- make_label(type = type, by = by)
  
  
  if(by == 'gene' || by == 'transcript'){
    regions <- getGene(TxDb, by)
  }
  
  if (by == "exon") {
    # exonList <- get_exonList(ChIPseekerEnv)
    exonList <- get_exonList(item = ChIPseekerCache)
    regions <-  unlist(exonList)
  }
  
  if (by == "intron") {
    # intronList <- get_intronList(ChIPseekerEnv)
    intronList <- get_intronList(item = ChIPseekerCache)
    regions <- unlist(intronList)
  }
  
  if (by == "3UTR") {
    threeUTRList <- threeUTRsByTranscript(TxDb)
    regions <- unlist(threeUTRList)
  }
  
  if (by == "5UTR") {
    fiveUTRList <- fiveUTRsByTranscript(TxDb)
    regions <- unlist(fiveUTRList)
  }
  
  if (by == 'UTR'){
    three_URT <- threeUTRsByTranscript(TxDb)
    three_UTR_regions <- unlist(three_URT)
    five_UTR <- fiveUTRsByTranscript(TxDb)
    five_UTR_regions <- unlist(five_UTR)
    regions <- c(three_UTR_regions,five_UTR_regions)
  }
  
  if(type == "start_site"){
    coordinate<- ifelse(strand(regions) == "+", start(regions), end(regions))
  }else if(type == "end_site"){
    coordinate<- ifelse(strand(regions) == "+", end(regions), start(regions))
  }else{
    ## assign attribute 
    attr(regions, 'type') = type
    attr(regions, 'by') = by
    attr(regions, 'label') = label
    
    return(regions)
  }
  
  ## issue and code obtained from Chen Ting(NIH/NCI)
  start_site <- ifelse(strand(regions) == "+",coordinate-upstream, coordinate-downstream)
  end_site <- ifelse(strand(regions) == "+", coordinate+downstream, coordinate+upstream)
  
  bioRegion <- GRanges(seqnames=seqnames(regions),
                       ranges=IRanges(start_site, end_site),
                       strand=strand(regions))
  bioRegion <- unique(bioRegion)
  
  ## assign attribute 
  attr(bioRegion, 'type') = type
  attr(bioRegion, 'by') = by
  
  ## different region have different label to be added to the figures
  ## so we attach label to the Granges object
  attr(bioRegion, 'label') = label
  
  attr(bioRegion, 'upstream') = upstream
  attr(bioRegion, 'downstream') = downstream
  
  return(bioRegion)
}

##' make windows from granges object
##' 
##' \code{makeBioRegionFromGranges()} function can make bioregion from granges object.
##' 
##' The differences between \code{makeBioRegionFromGranges()} and \code{getBioRegion()} is that
##' \code{getBioRegion()} get the region object from \code{txdb} object but
##' \code{makeBioRegionFromGranges()} get the region from the granges object provided by users.
##' For example, \code{txdb} object do not contain insulator or enhancer regions. Users can
##' provide these regions through self-made granges object
##' 
##' There are three kinds of regions, \code{start_site}, \code{end_site} and \code{body}. 
##' 
##' We take enhancer region to explain the differences of these three regions.
##' enhancer: chr1 1000 1400. 
##' 
##' \code{body} region refers to the 1000-1400bp.
##' 
##' \code{start_site} region with \code{upstream = 100, downstream = 100} refers to 900-1100bp. 
##' 
##' \code{end_site} region with \code{upstream = 100, downstream = 100} refers to 1300-1500bp.
##'
##' In \code{makeBioRegionFromGranges()}, \code{upstream} and \code{downstream} can be
##' \code{NULL} if the \code{type == 'body'}. \code{by} should be specified by users and 
##' can not be omitted. \code{by} parameter will be used to made labels. \code{type} should also
##' be specified.
##' 
##' \url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}
##' 
##' @title makeBioRegionFromGranges
##' 
##' @param gr a grange object contain region of interest
##' @param upstream upstream from start site or end site, can be NULL if the type == 'body'
##' @param downstream downstream from start site or end site, can be NULL if the type == 'body'
##' @param by specify be users, e.g. gene, insulator, enhancer
##' @param type one of "start_site", "end_site", "body"
##' @return GRanges object
##' @import BiocGenerics IRanges GenomicRanges
##' @export
makeBioRegionFromGranges <- function(gr,
                                     by,
                                     type,
                                     upstream=1000,
                                     downstream=1000){
  
  if (!is(gr, "GRanges")) {
    stop("windows should be a GRanges object...")
  }
  
  type <- match.arg(type, c("start_site", "end_site", "body"))
  
  label <- make_label(type = type, by = by)
  regions <- gr
  
  if(type == "start_site"){
    coordinate<- ifelse(strand(regions) == "+", start(regions), end(regions))
  }else if(type == "end_site"){
    coordinate<- ifelse(strand(regions) == "+", end(regions), start(regions))
  }else{
    ## assign attribute 
    attr(regions, 'type') = type
    attr(regions, 'by') = by
    attr(regions, 'label') = label
    
    return(regions)
  }
  
  ## issue and code obtained from Chen Ting(NIH/NCI)
  start_site <- ifelse(strand(regions) == "+",coordinate-upstream, coordinate-downstream)
  end_site <- ifelse(strand(regions) == "+", coordinate+downstream, coordinate+upstream)
  
  bioRegion <- GRanges(seqnames=seqnames(regions),
                       ranges=IRanges(start_site, end_site),
                       strand=strand(regions))
  bioRegion <- unique(bioRegion)
  
  ## assign attribute 
  attr(bioRegion, 'type') = type
  attr(bioRegion, 'by') = by
  attr(bioRegion, 'label') = label
  attr(bioRegion, 'upstream') = upstream
  attr(bioRegion, 'downstream') = downstream
  
  return(bioRegion)
  
}


##' calculate the tag matrix
##' 
##' \code{getTagMatrix()} function can produce the matrix for visualization.
##' \code{peak} stands for the peak file. 
##' \code{window} stands for a collection of regions that users want to look into. 
##' Users can use \code{window} to capture the peak of interest.
##' There are two ways to input \code{window}. 
##' 
##' The first way is that users can use
##' \code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()} to 
##' get \code{window} and put it into \code{getTagMatrix()}. 
##' 
##' The second way is that users can use \code{getTagMatrix()} to
##' call \code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()}. In this way
##' users do not need to input \code{window} parameter but they need to input
##' \code{txdb}. 
##' 
##' \code{txdb} is a set of packages contained annotation 
##' of regions of different genomes. Users can
##' get the regions of interest through specific functions. These specific functions
##' are built in \code{getPromoters()/getBioRegion()}. Many regions can not be gain
##' through \code{txdb}, like insulator and enhancer regions. 
##' Users can provide these regions in the form of granges object. 
##' These self-made granges object will be passed to \code{TxDb} parameter and they will
##' be passed to \code{makeBioRegionFromGranges()} to produce the \code{window}.
##' In a word, \code{TxDb} parameter is a reference information. Users can
##' pass \code{txdb object} or self-made granges into it.
##' 
##' Details see \code{\link{getPromoters}},\code{\link{getBioRegion}} and \code{\link{makeBioRegionFromGranges}}
##' 
##' \code{upstream} and \code{downstream} parameter have different usages:
##' 
##' (1) \code{window} parameter is provided, 
##' 
##' if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
##' the flank of body region.
##' 
##' if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} do not
##' play a role in \code{getTagMatrix()} function.
##' 
##' (2) \code{window} parameter is missing,
##' 
##' if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
##' the flank of body region.
##' 
##' if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
##' the upstream and downstream of the start_site or the end_site.
##' 
##' \code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details
##' see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}
##' 
##' \code{nbin} refers to the number of bins. \code{getTagMatrix()} provide a binning method
##' to get the tag matrix.
##' 
##' @title getTagMatrix
##'
##' @param peak peak peak file or GRanges object
##' @param upstream the distance of upstream extension
##' @param downstream the distance of downstream extension
##' @param windows a collection of region
##' @param type one of "start_site", "end_site", "body"
##' @param by one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users
##' @param TxDb TxDb or self-made granges object, served as txdb
##' @param weightCol column name of weight, default is NULL
##' @param nbin the amount of nbines 
##' @param verbose print message or not
##' @param ignore_strand ignore the strand information or not
##' @return tagMatrix
##' @importFrom ggplot2 rel
##' @export
getTagMatrix <- function(peak, 
                         upstream,
                         downstream, 
                         windows,
                         type,
                         by,
                         TxDb=NULL,
                         weightCol = NULL, 
                         nbin = NULL,
                         verbose = TRUE,
                         ignore_strand= FALSE){
  
  is_GRanges_of_TxDb <- FALSE
  if (is(TxDb, "GRanges")) {
    is_GRanges_of_TxDb <- TRUE
    message("#\n#.. 'TxDb' is a self-defined 'GRanges' object...\n#")
  }
  
  if(missingArg(windows)){
    
    if(is_GRanges_of_TxDb){
      
      ## make windows from self-made granges object
      windows <- makeBioRegionFromGranges(gr=TxDb,
                                          by=by,
                                          type=type,
                                          upstream=upstream,
                                          downstream=downstream)
      
    }else{
      
      ## make windows from txdb object
      windows <- getBioRegion(TxDb=TxDb,
                              upstream=upstream,
                              downstream=downstream,
                              by=by,
                              type=type)
      
      
    }
    
  }else{
    
    if (!is(windows, "GRanges")) {
      stop("windows should be a GRanges object...")
    }
    
    if(is.null(attr(windows,'type'))){
      stop("windows should be made from getPromoters()/getBioRegion()/makeBioRegionFromGranges()")
    }
    
    type <- attr(windows, 'type')
    by <- attr(windows, 'by')
    
  }
  
  # check the upstream and downstream parameter
  if(type == "body"){
    if(missingArg(upstream)){
      upstream <- NULL
    }
    
    if(missingArg(downstream)){
      downstream <- NULL
    }
    
  }else{
    upstream <- attr(windows, 'upstream')
    downstream <- attr(windows, 'downstream')
  }
  
  ## check upstream and downstream parameter
  check_upstream_and_downstream(upstream = upstream, downstream = downstream)
  
  if(type != 'body'){
    if(inherits(upstream, 'rel') || is.null(upstream)){
      stop("upstream and downstream for site region should be actual number...")
    }
  }
  
  ## check nbin parameters
  if(!is.null(nbin) && !is.numeric(nbin)){
    stop('nbin should be NULL or numeric...')
  }
  
  if(type == 'body' && is.null(nbin)){
    stop('plotting body region should set the nbin parameter...')
  }
  
  ## check nbin parameter
  if(!is.null(nbin)){
    cat(">> binning method is used...",
        format(Sys.time(), "%Y-%m-%d %X"), "\n",sep = "")
    
    is.binning <- TRUE
  }else{
    
    is.binning <- FALSE
  }
  
  if (verbose) {
    cat(">> preparing ",type," regions"," by ",by,"... ",
        format(Sys.time(), "%Y-%m-%d %X"), "\n",sep = "")
  }
  
  
  if(is.binning){
    
    if (verbose) {
      cat(">> preparing tag matrix by binning... ",
          format(Sys.time(), "%Y-%m-%d %X"), "\n")
    }
    
    tagMatrix <- getTagMatrix.binning.internal(peak = peak, 
                                               weightCol = weightCol, 
                                               windows = windows, 
                                               nbin = nbin,
                                               upstream = upstream,
                                               downstream = downstream,
                                               ignore_strand = ignore_strand)
  }else{
    
    if (verbose) {
      cat(">> preparing tag matrix... ",
          format(Sys.time(), "%Y-%m-%d %X"), "\n")
    }
    
    tagMatrix <- getTagMatrix.internal(peak=peak, 
                                       weightCol=weightCol, 
                                       windows=windows, 
                                       ignore_strand=ignore_strand)
  }
  
  ## assign attribute 
  attr(tagMatrix, 'upstream') = upstream
  attr(tagMatrix, 'downstream') = downstream
  attr(tagMatrix, 'type') = attr(windows, 'type')
  attr(tagMatrix, 'label') = attr(windows, 'label')
  attr(tagMatrix, "is.binning") <- is.binning
  
  return(tagMatrix)
}


##' calculate the tag matrix
##'
##'
##' @title getTagMatrix.internal
##' @param peak peak file or GRanges object
##' @param weightCol column name of weight, default is NULL
##' @param windows a collection of region with equal size, eg. promoter region.
##' @param ignore_strand ignore the strand information or not
##' @return tagMatrix
##' @import BiocGenerics S4Vectors IRanges GenomeInfoDb GenomicRanges
##' @author G Yu
getTagMatrix.internal <- function(peak, 
                                  weightCol=NULL, 
                                  windows, 
                                  ignore_strand= FALSE) {
  peak.gr <- loadPeak(peak)
  
  if (! is(windows, "GRanges")) {
    stop("windows should be a GRanges object...")
  }
  if (length(unique(width(windows))) != 1) {
    stop("width of windows should be equal...")
  }
  
  ## if (!exists("ChIPseekerEnv", envir = .GlobalEnv)) {
  ##     assign("ChIPseekerEnv", new.env(), .GlobalEnv)
  ## }
  ## ChIPseekerEnv <- get("ChIPseekerEnv", envir = .GlobalEnv)
  
  ## if (exists("peak", envir=ChIPseekerEnv, inherits=FALSE) &&
  ##     exists("promoters", envir=ChIPseekerEnv, inherits=FALSE) &&
  ##     exists("weightCol", envir=ChIPseekerEnv, inherits=FALSE) &&
  ##     exists("tagMatrix", envir=ChIPseekerEnv, inherits=FALSE) ) {
  
  ##     pp <- get("peak", envir=ChIPseekerEnv)
  ##     promoters <- get("promoters", envir=ChIPseekerEnv)
  ##     w <- get("weightCol", envir=ChIPseekerEnv)
  
  ##     if (all(pp == peak)) {
  ##         if (all(windows == promoters)) {
  ##             if ( (is.null(w) && is.null(weightCol)) ||
  ##                 (!is.null(w) && !is.null(weightCol) && w == weightCol)) {
  ##                 tagMatrix <- get("tagMatrix", envir=ChIPseekerEnv)
  ##                 return(tagMatrix)
  ##             } else {
  ##                 assign("weightCol", weightCol, envir=ChIPseekerEnv)
  ##             }
  ##         } else {
  ##             assign("promoters", windows)
  ##             ## make sure it is not conflict with getPromoters
  ##             if ( exists("upstream", envir=ChIPseekerEnv, inherits=FALSE))
  ##                 rm("upstream", envir=ChIPseekerEnv)
  ##         }
  ##     } else {
  ##         assign("peak", peak, envir=ChIPseekerEnv)
  ##     }
  
  ## }
  
  ## if ( !exists("peak", envir=ChIPseekerEnv, inherits=FALSE)) {
  ##     assign("peak", peak, envir=ChIPseekerEnv)
  ## }
  
  ## if ( !exists("promoters", envir=ChIPseekerEnv, inherits=FALSE)) {
  ##     assign("promoters", windows, envir=ChIPseekerEnv)
  ## }
  
  ## if (!exists("weightCol", envir=ChIPseekerEnv, inherits=FALSE)) {
  ##     assign("weightCol", weightCol, envir=ChIPseekerEnv)
  ## }
  if (is.null(weightCol)) {
    peak.cov <- coverage(peak.gr)
  } else {
    weight <- mcols(peak.gr)[[weightCol]]
    peak.cov <- coverage(peak.gr, weight=weight)
  }
  cov.len <- elementNROWS(peak.cov)
  cov.width <- GRanges(seqnames=names(cov.len),
                       IRanges(start=rep(1, length(cov.len)),
                               end=cov.len))
  windows <- subsetByOverlaps(windows, cov.width,
                              type="within", ignore.strand=FALSE)
  
  chr.idx <- intersect(names(peak.cov),
                       unique(as.character(seqnames(windows))))
  
  peakView <- Views(peak.cov[chr.idx], as(windows, "IntegerRangesList")[chr.idx])
  tagMatrixList <- lapply(peakView, function(x) t(viewApply(x, as.vector)))
  tagMatrix <- do.call("rbind", tagMatrixList)
  
  ## get the index of windows, that are reorganized by as(windows, "IntegerRangesList")
  idx.list <- split(1:length(windows),  as.factor(seqnames(windows)))
  idx <- do.call("c", idx.list)
  
  rownames(tagMatrix) <- idx
  tagMatrix <- tagMatrix[order(idx),]
  
  ## minus strand
  if (!ignore_strand) {
    minus.idx <- which(as.character(strand(windows)) == "-")
    tagMatrix[minus.idx,] <- tagMatrix[minus.idx, ncol(tagMatrix):1]
  }
  
  tagMatrix <- tagMatrix[rowSums(tagMatrix)!=0,]
  ## assign("tagMatrix", tagMatrix, envir=ChIPseekerEnv)
  return(tagMatrix)
}


##' calculate the tagMatrix by binning
##' the idea was derived from the function of deeptools
##' https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html 
##' 
##' @title getTagMatrix.binning.internal
##' @param peak peak peak file or GRanges object
##' @param weightCol weightCol column name of weight, default is NULL
##' @param windows windows a collection of region with equal or not equal size, eg. promoter region, gene region.
##' @param nbin the amount of nbines needed to be splited and it should not be more than min_body_length
##' @param upstream rel object, NULL or actual number
##' @param downstream rel object, NULL or actual number
##' @param ignore_strand ignore the strand information or not
##' @import BiocGenerics S4Vectors IRanges GenomeInfoDb GenomicRanges 
##' @importFrom ggplot2 rel
##' @return tagMatrix 
getTagMatrix.binning.internal <- function(peak, 
                                          weightCol = NULL, 
                                          windows, 
                                          nbin = 800,
                                          upstream = NULL,
                                          downstream = NULL,
                                          ignore_strand = FALSE){
  
  min_body_length <- filter_length <- nbin
  peak.gr <- loadPeak(peak)
  type <- attr(windows, 'type')
  
  
  if (!is(windows, "GRanges")) {
    stop("windows should be a GRanges object...")
  }
  
  if (is.null(weightCol)) {
    peak.cov <- coverage(peak.gr)
  } else {
    weight <- mcols(peak.gr)[[weightCol]]
    peak.cov <- coverage(peak.gr, weight=weight)
  }
  
  
  cov.len <- elementNROWS(peak.cov)
  cov.width <- GRanges(seqnames=names(cov.len),
                       IRanges(start=rep(1, length(cov.len)),
                               end=cov.len))
  
  windows <- subsetByOverlaps(windows, 
                              cov.width,
                              type="within", 
                              ignore.strand=FALSE)
  
  ## extend the windows by rel object
  if(inherits(upstream, 'rel')){
    
    windows1 <- windows
    
    if(!ignore_strand){
      
      positive_index <- which(as.character(strand(windows1)) == "+")
      negative_index <- which(as.character(strand(windows1)) == "-")
      start(windows1)[positive_index] <- suppressWarnings(start(windows1)[positive_index] - floor(width(windows)[positive_index]*as.numeric(upstream)))
      end(windows1)[positive_index] <- suppressWarnings(end(windows1)[positive_index] + floor(width(windows)[positive_index]*as.numeric(downstream)))
      
      start(windows1)[negative_index] <- suppressWarnings(start(windows1)[negative_index] - floor(width(windows)[negative_index]*as.numeric(downstream)))
      end(windows1)[negative_index] <- suppressWarnings(end(windows1)[negative_index] + floor(width(windows)[negative_index]*as.numeric(upstream)))
      
    }else{
      
      start(windows1) <- suppressWarnings(start(windows1) - floor(width(windows)*as.numeric(upstream)))
      end(windows1) <- suppressWarnings(end(windows1) + floor(width(windows)*as.numeric(downstream)))
      
    }
   
    windows <- windows1
    nbin <- floor(nbin*(1+as.numeric(downstream)+as.numeric(upstream)))
    min_body_length <- min_body_length*(1+as.numeric(upstream)+as.numeric(downstream))
    
    cat(">> preparing matrix with extension from (",attr(windows,'label')[1],"-",
        100*as.numeric(upstream),"%)~(",attr(windows,'label')[2],"+",
        100*as.numeric(downstream),"%)... ",
        format(Sys.time(), "%Y-%m-%d %X"),"\n",sep = "")
  }
  
  ## do not extend
  if(is.null(upstream)){
    if(attr(windows, 'type') == 'body'){
      cat(">> preparing matrix for ",attr(windows, 'type')," region with no flank extension... ",
          format(Sys.time(), "%Y-%m-%d %X"),"\n",sep = "")
    }else{
      cat(">> preparing matrix for ",attr(windows,'type')," region... ",
          format(Sys.time(), "%Y-%m-%d %X"),"\n",sep = "")
    }
  }
  
  ## extend the windows by actual number 
  if(!is.null(upstream) && !inherits(upstream, 'rel') && attr(windows, 'type')== 'body'){
    
    windows1 <- windows
    
    if(!ignore_strand){
      
      positive_index <- which(as.character(strand(windows1)) == "+")
      negative_index <- which(as.character(strand(windows1)) == "-")
      
      start(windows1)[positive_index] <- suppressWarnings(start(windows1)[positive_index] - upstream)
      end(windows1)[positive_index] <- suppressWarnings(end(windows1)[positive_index] + downstream)
      
      start(windows1)[negative_index] <- suppressWarnings(start(windows1)[negative_index] - downstream)
      end(windows1)[negative_index] <- suppressWarnings(end(windows1)[negative_index] + upstream)
      
    }else{
      
      start(windows1) <- suppressWarnings(start(windows1) - upstream)
      end(windows1) <- suppressWarnings(end(windows1) + downstream)
      
    }
    
    windows <- windows1
    upstreamPer <- floor(upstream/1000)*0.1
    downstreamPer <- floor(downstream/1000)*0.1
    nbin <- floor(nbin*(1+upstreamPer+downstreamPer))
    min_body_length <- min_body_length+upstream+downstream
    
    cat(">> preparing matrix with flank extension from (",attr(windows,'label')[1],"-",
        upstream,"bp)~(",attr(windows,'label')[2],"+",downstream,"bp)... ",
        format(Sys.time(), "%Y-%m-%d %X"),"\n",sep = "")
  }
  
  chr.idx <- intersect(names(peak.cov),
                       unique(as.character(seqnames(windows))))
  
  windows <- as(windows, "IntegerRangesList")[chr.idx]
  attr(windows,'type') <- type
  
  peakView <- Views(peak.cov[chr.idx], 
                    windows)
  
  ## remove the gene that has no binding proteins
  for (i in 1:length(peakView)) {
    
    index <- viewSums(peakView[[i]])!= 0
    peakView[[i]] <- peakView[[i]][index]
    windows[[i]] <- windows[[i]][index]
  } 
  
  tagMatrixList <- lapply(peakView, function(x) viewApply(x, as.vector))
  
  if(!attr(windows, 'type') == 'body'){
    
    tagMatrixList <- lapply(tagMatrixList, function(x) t(x))
    
    # to remove the chromosome that do not bind protein
    index <- vapply(tagMatrixList, function(x) length(x)>0, FUN.VALUE = logical(1))
    tagMatrixList <- tagMatrixList[index]
    windows <- windows[index]
    
    ## create a matrix to receive binning results
    tagMatrix <- list()
    
    ## this circulation is to deal with different chromosomes
    for (i in 1:length(tagMatrixList)) {
      
      tagMatrix[[i]] <- matrix(nrow = nrow(tagMatrixList[[i]]),ncol = nbin)
      
      ## this circulation is to deal with different genes
      for (j in 1:nrow(tagMatrixList[[i]])) {
        
        ## seq is the distance between different bins
        seq <- floor(length(tagMatrixList[[i]][j,])/nbin)
        
        ## cursor record the position of calculation
        cursor <- 1
        
        ## the third circulation is to calculate the binding strength
        ## it has two parts
        ## the first part is to for the nbin(1:nbin-1)
        ## because the seq is not derived from exact division
        ## the second part is to compensate the loss of non-exact-division
        
        ## this the first part for 1:(nbin-1)
        for (k in 1:(nbin-1)) {
          
          read <- 0
          
          for (z in cursor:(cursor+seq-1)) {
            read <- read + tagMatrixList[[i]][j,z]
          }
          
          tagMatrix[[i]][j,k] <- read/seq
          
          cursor <- cursor+seq
        }
        
        ## this the second part to to compensate the loss of non-exact-division
        read <- 0
        for (z in cursor:length(tagMatrixList[[i]][j,])) {
          read <- read+tagMatrixList[[i]][j,z]
        }
        
        tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][j,])-cursor+1)
      }
      
      if(!ignore_strand){
        minus.idx <- which(as.character(mcols(windows[[i]])[["strand"]]) == "-")
        tagMatrix[[i]][minus.idx,] <- tagMatrix[[i]][minus.idx, ncol(tagMatrix[[i]]):1]
      }
    }
    
  }else{
    
    ## extend genebody by atual number
    if(!is.null(upstream) & !inherits(upstream, 'rel')){
      
      for (i in 1:length(tagMatrixList)) {
        if (length(class(tagMatrixList[[i]])) != 1) {
          sample <- tagMatrixList[[i]]
          tagMatrixList[[i]] <- lapply(seq_len(ncol(sample)), function(i) sample[,i])  
        }
      }
      
      index <- vapply(tagMatrixList, function(x) length(x)>0, FUN.VALUE = logical(1))
      tagMatrixList <- tagMatrixList[index]
      windows <- windows[index]
      
      ## count the amount before filtering
      pre_amount <- 0
      for(i in 1:length(tagMatrixList)){
        pre_amount <- pre_amount+length(tagMatrixList[[i]])
      }
      
      for (i in 1:length(tagMatrixList)) {
        
        index <- vapply(tagMatrixList[[i]], function(y) length(y)>min_body_length,FUN.VALUE = logical(1))
        tagMatrixList[[i]] <- tagMatrixList[[i]][index]
        windows[[i]] <- windows[[i]][index]
      }
      
      ## count the amount after filtering
      amount <- 0
      for(i in 1:length(tagMatrixList)){
        amount <- amount+length(tagMatrixList[[i]])
      }
      
      cat(">> ",pre_amount-amount," peaks(",100*((pre_amount-amount)/pre_amount),
          "%), having lengths smaller than ",filter_length,"bp, are filtered... ",
          format(Sys.time(), "%Y-%m-%d %X"),"\n",sep = "")
      
      upstreamnbin <- floor(nbin*(upstreamPer/(1+upstreamPer+downstreamPer)))
      bodynbin <- floor(nbin*(1/(1+upstreamPer+downstreamPer)))
      downstreamnbin <- floor(nbin*(downstreamPer/(1+upstreamPer+downstreamPer)))
      
      tagMatrix <- list()
      
      for (i in 1:length(tagMatrixList)) {
        
        tagMatrix[[i]] <- matrix(nrow = length(tagMatrixList[[i]]),ncol = nbin)
        
        ## count the upstream 
        for (j in 1:length(tagMatrixList[[i]])) {
          
          seq <- floor(upstream/upstreamnbin)
          cursor <- 1
          
          for (k in 1:(upstreamnbin-1)) {
            
            read <- 0
            
            for (z in cursor:(cursor+seq-1)) {
              read <- read + tagMatrixList[[i]][[j]][z]
            }
            
            tagMatrix[[i]][j,k] <- read/seq
            
            cursor <- cursor+seq
          }
          
          
          read <- 0
          for (z in cursor:upstream) {
            read <- read+tagMatrixList[[i]][[j]][z]
          }
          
          tagMatrix[[i]][j,upstreamnbin] <- read/(upstream-cursor)
          
        }
        
        ## count genebody
        for (j in 1:length(tagMatrixList[[i]])) {
          
          seq <- floor((length(tagMatrixList[[i]][[j]])-upstream-downstream)/bodynbin)
          cursor <- upstream+1
          
          for (k in (upstreamnbin+1):(upstreamnbin+bodynbin-1)) {
            
            read <- 0
            
            for (z in cursor:(cursor+seq-1)) {
              read <- read + tagMatrixList[[i]][[j]][z]
            }
            
            tagMatrix[[i]][j,k] <- read/seq
            
            cursor <- cursor+seq
          }
          
          read <- 0
          for (z in cursor:(length(tagMatrixList[[i]][[j]])-downstream)) {
            read <- read+tagMatrixList[[i]][[j]][z]
          }
          
          tagMatrix[[i]][j,bodynbin+upstreamnbin] <- read/(length(tagMatrixList[[i]][[j]])-downstream-cursor)
        }
        
        ## count downstream
        for (j in 1:length(tagMatrixList[[i]])) {
          
          seq <- floor(downstream/downstreamnbin)
          cursor <- length(tagMatrixList[[i]][[j]])-downstream+1
          
          for (k in (upstreamnbin+bodynbin+1):(nbin-1)) {
            
            read <- 0
            
            for (z in cursor:(cursor+seq-1)) {
              read <- read + tagMatrixList[[i]][[j]][z]
            }
            
            tagMatrix[[i]][j,k] <- read/seq
            
            cursor <- cursor+seq
          }
          
          read <- 0
          for (z in cursor:length(tagMatrixList[[i]][[j]])) {
            read <- read+tagMatrixList[[i]][[j]][z]
          }
          
          tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][[j]])-cursor+1)
        }
        
        if(!ignore_strand){
          minus.idx <- which(as.character(mcols(windows[[i]])[["strand"]]) == "-")
          tagMatrix[[i]][minus.idx,] <- tagMatrix[[i]][minus.idx, ncol(tagMatrix[[i]]):1]
        }
        
      }
      
    }else{
      
      for (i in 1:length(tagMatrixList)) {
        if (length(class(tagMatrixList[[i]])) != 1) {
          sample <- tagMatrixList[[i]]
          tagMatrixList[[i]] <- lapply(seq_len(ncol(sample)), function(i) sample[,i])  
        }
      }
      
      index <- vapply(tagMatrixList, function(x) length(x)>0, FUN.VALUE = logical(1))
      tagMatrixList <- tagMatrixList[index]
      windows <- windows[index]
      
      ## count the amount before filtering
      pre_amount <- 0
      for(i in 1:length(tagMatrixList)){
        pre_amount <- pre_amount+length(tagMatrixList[[i]])
      }
      
      for (i in 1:length(tagMatrixList)) {
        
        index <- vapply(tagMatrixList[[i]], function(y) length(y)>min_body_length,FUN.VALUE = logical(1))
        tagMatrixList[[i]] <- tagMatrixList[[i]][index]
        windows[[i]] <- windows[[i]][index]
      }
      
      ## count the amount after filtering
      amount <- 0
      for(i in 1:length(tagMatrixList)){
        amount <- amount+length(tagMatrixList[[i]])
      }
      
      cat(">> ",pre_amount-amount," peaks(",100*((pre_amount-amount)/pre_amount),
          "%), having lengths smaller than ",filter_length,"bp, are filtered... ",
          format(Sys.time(), "%Y-%m-%d %X"),"\n",sep = "")
  
      tagMatrix <- list()
      
      for (i in 1:length(tagMatrixList)) {
        
        tagMatrix[[i]] <- matrix(nrow = length(tagMatrixList[[i]]),ncol = nbin)
        
        for (j in 1:length(tagMatrixList[[i]])) {
          
          seq <- floor(length(tagMatrixList[[i]][[j]])/nbin)
          cursor <- 1
          
          for (k in 1:(nbin-1)) {
            
            read <- 0
            
            for (z in cursor:(cursor+seq-1)) {
              read <- read + tagMatrixList[[i]][[j]][z]
            }
            
            tagMatrix[[i]][j,k] <- read/seq
            
            cursor <- cursor+seq
          }
          
          read <- 0
          for (z in cursor:length(tagMatrixList[[i]][[j]])) {
            read <- read+tagMatrixList[[i]][[j]][z]
          }
          
          tagMatrix[[i]][j,nbin] <- read/(length(tagMatrixList[[i]][[j]])-cursor+1)
          
        }
        
        if(!ignore_strand){
          minus.idx <- which(as.character(mcols(windows[[i]])[["strand"]]) == "-")
          tagMatrix[[i]][minus.idx,] <- tagMatrix[[i]][minus.idx, ncol(tagMatrix[[i]]):1]
        }
      }
      
    }
    
  }
  
  ## combine the results
  tagMatrix <- do.call("rbind",tagMatrix)
  
  return(tagMatrix)
}


##' Nested function for getTagMatrix() to deal with multiple windows
##' 
##' This is an internal function.
##' @title getTagMatrix2
##'
##' @param peak peak peak file or GRanges object
##' @param upstream the distance of upstream extension
##' @param downstream the distance of downstream extension
##' @param windows_name the names of windows
##' @param type one of "start_site", "end_site", "body"
##' @param by one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users
##' @param TxDb TxDb or self-made granges object, served as txdb
##' @param weightCol column name of weight, default is NULL
##' @param nbin the amount of nbines 
##' @param verbose print message or not
##' @param ignore_strand ignore the strand information or not
##' @return tagMatrix
##' @importFrom ggplot2 rel
getTagMatrix2 <- function(peak, 
                          upstream,
                          downstream,
                          windows_name,
                          type,
                          by,
                          TxDb=NULL,
                          weightCol = NULL, 
                          nbin = NULL,
                          verbose = TRUE,
                          ignore_strand= FALSE){
  
  names(TxDb) <- by
  
  windows <- lapply(as.list(by), function(x){
    
    if(x %in% c('gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR')){
      
      result <- getBioRegion(TxDb=TxDb[[x]],
                             upstream=upstream,
                             downstream=downstream,
                             by=x,
                             type=type)
    }else{
      
      result <- makeBioRegionFromGranges(gr=TxDb[[x]],
                                         by=x,
                                         type=type,
                                         upstream=upstream,
                                         downstream=downstream)
      
    }
    
    return(result)
    
  })
  
  names(windows) <- windows_name
  
  # check the upstream and downstream parameter for body
  if(type == "body"){
    if(missingArg(upstream)){
      upstream <- NULL
    }
    
    if(missingArg(downstream)){
      downstream <- NULL
    }
    
  }else{
    upstream <- attr(windows[[1]], 'upstream')
    downstream <- attr(windows[[1]], 'downstream')
  }
  
  ## check upstream and downstream parameter
  check_upstream_and_downstream(upstream = upstream, downstream = downstream)
  
  if(type != 'body'){
    if(inherits(upstream, 'rel') || is.null(upstream)){
      stop("upstream and downstream for site region should be actual number...")
    }
  }
  
  ## check nbin parameters
  if(!is.null(nbin) && !is.numeric(nbin)){
    stop('nbin should be NULL or numeric...')
  }
  
  if(type == 'body' && is.null(nbin)){
    stop('plotting body region should set the nbin parameter...')
  }
  
  ## check nbin parameter
  if(!is.null(nbin)){
    cat(">> binning method is used...",
        format(Sys.time(), "%Y-%m-%d %X"), "\n",sep = "")
    
    is.binning <- TRUE
  }else{
    
    is.binning <- FALSE
  }
  
  if (verbose) {
    cat(">> preparing ",type," regions"," by ",paste(by,collapse = " "),"... ",
        format(Sys.time(), "%Y-%m-%d %X"), "\n",sep = "")
  }
  
  
  if(is.binning){
    
    if (verbose) {
      cat(">> preparing tag matrix by binning... ",
          format(Sys.time(), "%Y-%m-%d %X"), "\n")
    }
    
    tagMatrix <- getTagMatrix2.binning.internal(peak = peak, 
                                                weightCol = weightCol, 
                                                windows = windows, 
                                                windows_name=windows_name,
                                                nbin = nbin,
                                                upstream = upstream,
                                                downstream = downstream,
                                                ignore_strand = ignore_strand)
  }else{
    
    if (verbose) {
      cat(">> preparing tag matrix... ",
          format(Sys.time(), "%Y-%m-%d %X"), "\n")
    }
    
    tagMatrix <- getTagMatrix2.internal(peak=peak, 
                                        weightCol=weightCol,
                                        windows=windows,
                                        windows_name=windows_name,
                                        ignore_strand=ignore_strand)
  }
  
  names(tagMatrix) <- windows_name
  
  ## assign attribute 
  tagMatrix <- lapply(tagMatrix, function(x){
    attr(x, 'upstream') = upstream
    attr(x, 'downstream') = downstream
    attr(x, 'type') = attr(windows[[1]], 'type')
    attr(x, 'label') = attr(windows[[1]], 'label')
    attr(x, "is.binning") <- is.binning
    return(x)
  })
  
  return(tagMatrix)
  
}

##' @title getTagMatrix2.internal
##'
##' @param peak peak peak file or GRanges object
##' @param windows a collection of region
##' @param windows_name the name of windows
##' @param weightCol column name of weight, default is NULL
##' @param ignore_strand ignore the strand information or not
getTagMatrix2.internal <- function(peak, 
                                   weightCol=NULL,
                                   windows,
                                   windows_name,
                                   ignore_strand= FALSE) {
  
  mt_list <- lapply(windows_name, function(x){
    
    windows_tmp <- windows[[x]]
    
    mt <- getTagMatrix.internal(peak=peak, 
                                weightCol=weightCol, 
                                windows=windows_tmp, 
                                ignore_strand=ignore_strand)
    
    return(mt)
  })
  
  return(mt_list)
}

##' internal function
##' 
##' @param peak peak peak file or GRanges object
##' @param upstream the distance of upstream extension
##' @param downstream the distance of downstream extension
##' @param windows a collection of region
##' @param windows_name the name of windows
##' @param weightCol column name of weight, default is NULL
##' @param nbin the amount of nbines 
##' @param ignore_strand ignore the strand information or not
getTagMatrix2.binning.internal <- function(peak, 
                                           weightCol = NULL, 
                                           windows, 
                                           windows_name,
                                           nbin = 800,
                                           upstream = NULL,
                                           downstream = NULL,
                                           ignore_strand = FALSE){
  
  mt_list <- lapply(windows_name, function(x){
    
    windows_tmp <- windows[[x]]
    
    mt <- getTagMatrix.binning.internal(peak = peak, 
                                        weightCol = weightCol, 
                                        windows = windows_tmp, 
                                        nbin = nbin,
                                        upstream = upstream,
                                        downstream = downstream,
                                        ignore_strand = ignore_strand)
    
    return(mt)
  })
  
  return(mt_list)
  
}

================================================
FILE: R/upsetplot.R
================================================
## @importFrom UpSetR upset
## @importFrom grid viewport
## @importFrom grid pushViewport
## @importFrom grid popViewport
## @importFrom gridBase gridPLT
## @importFrom graphics plot.new
##' @importFrom ggplot2 coord_fixed
##' @importFrom ggplot2 ggplot
##' @importFrom ggplot2 aes_
##' @importFrom ggplot2 geom_bar
##' @importFrom ggplot2 xlab
##' @importFrom ggplot2 ylab
##' @importFrom ggplot2 theme_minimal
##' @author Guangchuang Yu
upsetplot.csAnno <- function(x, order_by = "freq", vennpie=FALSE, vp = list(x=.6, y=.7, width=.8, height=.8)) {
    y <- x@detailGenomicAnnotation
    nn <- names(y)
    y <- as.matrix(y)

    res <- tibble::tibble(anno = lapply(1:nrow(y), function(i) nn[y[i,]]))
    g <- ggplot(res, aes_(x = ~anno)) + geom_bar() +
        xlab(NULL) + ylab(NULL) + theme_minimal() +
        ggupset::scale_x_upset(n_intersections = 20, order_by = order_by) 

    if (!vennpie) return(g)

    f <- function() vennpie(x, cex = .9)

    p <- ggplotify::as.ggplot(f) + coord_fixed() 

    ggplotify::as.ggplot(g) +
        ggimage::geom_subview(subview = p, x = vp$x, y = vp$y, width = vp$width, height = vp$height)


    ## y[y] <- 1
    ## y <- as.data.frame(y)
    ## ## cn <- colnames(y)
    ## ## cn[cn == "fiveUTR"] <- "5 UTR"
    ## ## cn[cn == "threeUTR"] <- "3 UTR"
    ## ## colnames(y) <- cn

    ## if (is.null(sets)) {
    ##     sets <- c("distal_intergenic", "downstream",
    ##               "threeUTR", "fiveUTR", "Intron",
    ##               "Exon", "Promoter")
    ##     if (vennpie && is.null(sets.bar.color)) {
    ##         sets.bar.color <- c("#d95f0e", "#fee0d2", "#98D277",
    ##                             "#6F9E4C", "#fc9272", "#9ecae1", "#ffeda0")
    ##     }
    ## }

    ## if (is.null(sets.bar.color)) {
    ##     sets.bar.color <- "black"
    ## }

    ## if (vennpie) {
    ##     plot.new()
    ##     # grid.rect(gp = gpar(fill="white"))
    ##     upset(y, sets=sets, sets.bar.color=sets.bar.color,
    ##           order.by = order.by, ...)
    ##     pushViewport(vp)
    ##     ##par(plt=gridPLT(), new=TRUE)
    ##     vennpie(x)
    ##     popViewport()
    ## } else {
    ##     upset(y, sets=sets,sets.bar.color=sets.bar.color,
    ##           order.by = order.by, ...)
    ## }
}


================================================
FILE: R/utilities.R
================================================
#' @title env function for ChIPseeker
#' @param TxDb txdb object
#' @param item item name
#' @param force force to update txdb item in cache or not.
#' @importFrom yulab.utils get_cache_item
#' @importFrom yulab.utils update_cache_item
#' @importFrom yulab.utils rm_cache_item
#' @importFrom yulab.utils initial_cache_item
#' @importFrom S4Vectors metadata
.ChIPseekerEnv <- function(TxDb, item = "ChIPseekerEnv", force = FALSE) {
    
    # get cache item
    # it will create a list if there is no a cache item
    cache_item <- get_cache_item(item)

    # if there is no TXDB cached, write in cache
    if (is.null(cache_item$TXDB)) {
        update_cache_item(item = item, list(TXDB = TxDb))
        cat(">> Using Genome:", get_env_genome(),"...\n")
        return(invisible(NULL))
    }

    # force to update item
    if(force){
        cat(">> Force to update txdb in cache...\n")
        rm_cache_item(item)           
        initial_cache_item(item)      
        update_cache_item(item, list(TXDB = TxDb))  
        cat(">> Using Genome:", get_env_genome(),"...\n")
    }

    # if exist TXDB
    TXDB <- cache_item$TXDB
    m1 <- tryCatch(unlist(metadata(TXDB)), error = function(e) NULL)
    m2 <- tryCatch(unlist(metadata(TxDb)),  error = function(e) NULL)
    if (!is.null(m1)) m1 <- m1[!is.na(m1)]
    if (!is.null(m2)) m2 <- m2[!is.na(m2)]

    txdb_flag <- is.character(all.equal(TXDB, TxDb))

    if (is.null(m1) || is.null(m2) || length(m1) != length(m2) || any(m1 != m2) || txdb_flag) {
        cat(">> Update txdb in cache...\n")
        rm_cache_item(item)           
        initial_cache_item(item)      
        update_cache_item(item, list(TXDB = TxDb))  
    }

    cat(">> Using Genome:", get_env_genome(),"...\n")

    invisible(NULL)

    # pos <- 1
    # envir <- as.environment(pos)
    # if (!exists("ChIPseekerEnv", envir=.GlobalEnv)) {
    #     assign("ChIPseekerEnv", new.env(), envir = envir)
    # }

    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)
    # if (!exists("TXDB", envir=ChIPseekerEnv, inherits=FALSE)) {
    #     ## first run
    #     assign("TXDB", TxDb, envir=ChIPseekerEnv)
    # } else {
    #     TXDB <- get("TXDB", envir=ChIPseekerEnv)
    #     m1 <- tryCatch(unlist(metadata(TXDB)), error=function(e) NULL)

    #     m2 <- unlist(metadata(TxDb))

    #     if (!is.null(m1)) {
    #         m1 <- m1[!is.na(m1)]
    #     }
    #     m2 <- m2[!is.na(m2)]

    #     if ( is.null(m1) || length(m1) != length(m2) || any(m1 != m2) ) {
    #         rm(ChIPseekerEnv)
    #         assign("ChIPseekerEnv", new.env(), envir = envir)
    #         ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)
    #         assign("TXDB", TxDb, envir=ChIPseekerEnv)
    #     }
    # }
}


##' @importFrom GenomicFeatures exonsBy
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
get_exonList <- function(item = "ChIPseekerEnv") {
    # TxDb <- get("TXDB", envir=ChIPseekerEnv)
    TxDb <- get_cache_element(item = item, elements = "TXDB")

    exonList <- get_cache_element(item = item, elements = "exonList")

    if(is.null(exonList)){
        exonList <- exonsBy(TxDb)
        update_cache_item(item = item, list("exonList" = exonList))
    }

    # if ( exists("exonList", envir=ChIPseekerEnv, inherits=FALSE) ) {
    #     exonList <- get("exonList", envir=ChIPseekerEnv)
    # } else {
    #     exonList <- exonsBy(TxDb)
    #     assign("exonList", exonList, envir=ChIPseekerEnv)
    # }
    return(exonList)
}

##' @importFrom GenomicFeatures intronsByTranscript
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
get_intronList <- function(item = "ChIPseekerEnv") {

    # TxDb <- get("TXDB", envir=ChIPseekerEnv)
    TxDb <- get_cache_element(item = item, elements = "TXDB")

    intronList <- get_cache_element(item = item, elements = "intronList")

    if(is.null(intronList)){
        intronList <- intronsByTranscript(TxDb)
        update_cache_item(item = item, list("intronList" = intronList))
    }

    # if ( exists("intronList", envir=ChIPseekerEnv, inherits=FALSE) ) {
    #     intronList <- get("intronList", envir=ChIPseekerEnv)
    # } else {
    #     intronList <- intronsByTranscript(TxDb)
    #     assign("intronList", intronList, envir=ChIPseekerEnv)
    # }
    return(intronList)
}


getCols <- function(n) {
    col <- c("#8dd3c7", "#ffffb3", "#bebada",
             "#fb8072", "#80b1d3", "#fdb462",
             "#b3de69", "#fccde5", "#d9d9d9",
             "#bc80bd", "#ccebc5", "#ffed6f")

    col2 <- c("#1f78b4", "#ffff33", "#c2a5cf",
             "#ff7f00", "#810f7c", "#a6cee3",
             "#006d2c", "#4d4d4d", "#8c510a",
             "#d73027", "#78c679", "#7f0000",
             "#41b6c4", "#e7298a", "#54278f")

    col3 <- c("#a6cee3", "#1f78b4", "#b2df8a",
              "#33a02c", "#fb9a99", "#e31a1c",
              "#fdbf6f", "#ff7f00", "#cab2d6",
              "#6a3d9a", "#ffff99", "#b15928")

    ## colorRampPalette(brewer.pal(12, "Set3"))(n)
    col3[1:n]
}

getPalette <- function(n){
  
  palette <- c("RdBu", "RdYlGn", "Spectral",
               "RdYlBu", "PiYG", "PRGn",
               "PuOr", "BrBG", "RdGy")
  
  palette[1:n]
  
}

getSgn <- function(data, idx){
    d <- data[idx, ]
    ss <- colSums(d)
    ss <- ss / sum(ss)
    return(ss)
}
parseBootCiPerc <- function(bootCiPerc){
    bootCiPerc <- bootCiPerc$percent
    tmp <- length(bootCiPerc)
    ciLo <- bootCiPerc[tmp - 1]
    ciUp <- bootCiPerc[tmp]
    return(c(ciLo, ciUp))
}

## estimate CI using bootstraping
##' @importFrom boot boot
##' @importFrom boot boot.ci
##' @importFrom parallel detectCores
getTagCiMatrix <- function(tagMatrix, conf = 0.95, resample=500, ncpus=detectCores()-1){
    RESAMPLE_TIME <- resample
    trackLen <- ncol(tagMatrix)
    if (Sys.info()[1] == "Windows") {
        tagMxBoot <- boot(data = tagMatrix, statistic = getSgn, R = RESAMPLE_TIME)
    } else {
        tagMxBoot <- boot(data = tagMatrix, statistic = getSgn, R = RESAMPLE_TIME,
                          parallel = "multicore", ncpus = ncpus)
    }
    cat(">> Running bootstrapping for tag matrix...\t\t",
        format(Sys.time(), "%Y-%m-%d %X"), "\n")
    tagMxBootCi <- sapply(seq_len(trackLen), function(i) {
                        bootCiToken <- boot.ci(tagMxBoot, type = "perc", index = i)
                        ## parse boot.ci results
                        return(parseBootCiPerc(bootCiToken))
                        }
                    )
    row.names(tagMxBootCi) <- c("Lower", "Upper")
    return(tagMxBootCi)
}

getTagCount <- function(tagMatrix, xlim, conf, ...) {
    ss <- colSums(tagMatrix)
    ss <- ss/sum(ss)
    ## plot(1:length(ss), ss, type="l", xlab=xlab, ylab=ylab)
    pos <- value <- NULL
    dd <- data.frame(pos=c(xlim[1]:xlim[2]), value=ss)
    if (!(missingArg(conf) || is.na(conf))){
        tagCiMx <- getTagCiMatrix(tagMatrix, conf = conf, ...)
        dd$Lower <- tagCiMx["Lower", ]
        dd$Upper <- tagCiMx["Upper", ]
    }
    return(dd)
}


TXID2EG <- function(txid, geneIdOnly=FALSE) {
    txid <- as.character(txid)
    if (geneIdOnly == TRUE) {
        res <- TXID2EGID(txid)
    } else {
        res <- TXID2TXEG(txid)
    }
    return(res)
}

##' @importFrom GenomicFeatures transcripts
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
TXID2TXEG <- function(txid) {
    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)

    txid2geneid <- get_cache_element(item = ChIPseekerCache, elements = "txid2geneid")

    if(is.null(txid2geneid)){
        txdb <- get_cache_element(item = ChIPseekerCache, elements = "TXDB")
        txidinfo <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
        idx <- which(sapply(txidinfo$gene_id, length) == 0)
        txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name
        txid2geneid <- paste(mcols(txidinfo)[["tx_name"]],
                             mcols(txidinfo)[["gene_id"]],
                             sep="/")
        txid2geneid <- sub("/NA", "", txid2geneid)

        names(txid2geneid) <- mcols(txidinfo)[["tx_id"]]
        update_cache_item(item = ChIPseekerCache, list("txid2geneid" = txid2geneid))
    }

    # if (exists("txid2geneid", envir=ChIPseekerEnv, inherits=FALSE)) {
    #     txid2geneid <- get("txid2geneid", envir=ChIPseekerEnv)
    # } else {
    #     txdb <- get("TXDB", envir=ChIPseekerEnv)
    #     txidinfo <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
    #     idx <- which(sapply(txidinfo$gene_id, length) == 0)
    #     txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name
    #     txid2geneid <- paste(mcols(txidinfo)[["tx_name"]],
    #                          mcols(txidinfo)[["gene_id"]],
    #                          sep="/")
    #     txid2geneid <- sub("/NA", "", txid2geneid)

    #     names(txid2geneid) <- mcols(txidinfo)[["tx_id"]]
    #     assign("txid2geneid", txid2geneid, envir=ChIPseekerEnv)
    # }
    return(as.character(txid2geneid[txid]))
}

##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
TXID2EGID <- function(txid) {
    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)

    txid2geneid <- get_cache_element(item = ChIPseekerCache, elements = "txid2eg")

    if(is.null(txid2geneid)){
        txdb <- get_cache_element(item = ChIPseekerCache, elements = "TXDB")
        txidinfo <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
        idx <- which(sapply(txidinfo$gene_id, length) == 0)
        txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name
        txid2geneid <- as.character(mcols(txidinfo)[["gene_id"]])

        names(txid2geneid) <- mcols(txidinfo)[["tx_id"]]
        update_cache_item(item = ChIPseekerCache, list("txid2eg" = txid2geneid))
    }

    # if (exists("txid2eg", envir=ChIPseekerEnv, inherits=FALSE)) {
    #     txid2geneid <- get("txid2eg", envir=ChIPseekerEnv)
    # } else {
    #     txdb <- get("TXDB", envir=ChIPseekerEnv)
    #     txidinfo <- transcripts(txdb, columns=c("tx_id", "tx_name", "gene_id"))
    #     idx <- which(sapply(txidinfo$gene_id, length) == 0)
    #     txidinfo[idx,]$gene_id <- txidinfo[idx,]$tx_name
    #     txid2geneid <- as.character(mcols(txidinfo)[["gene_id"]])

    #     names(txid2geneid) <- mcols(txidinfo)[["tx_id"]]
    #     assign("txid2eg", txid2geneid, envir=ChIPseekerEnv)
    # }
    return(as.character(txid2geneid[txid]))
}

## according to: https://support.bioconductor.org/p/70432/#70545
## contributed by Hervé Pagès
getFirstHitIndex <- function(x) {
    ## sapply(unique(x), function(i) which(x == i)[1])
    which(!duplicated(x))
}

##' calculate the overlap matrix, which is useful for vennplot
##'
##'
##' @title overlap
##' @param Sets a list of objects
##' @return data.frame
##' @importFrom gtools permutations
##' @export
##' @author G Yu
overlap <- function(Sets) {
    ## this function is very generic.
    ## it call the getIntersectLength function to calculate
    ## the number of the intersection.
    ## if it fail, take a look at the object type were supported by getIntersectLength function.

    nn <- names(Sets)
    w <- t(apply(permutations(2,length(Sets),0:1, repeats.allowed=TRUE), 1 , rev))
    rs <- rowSums(w)
    wd <- as.data.frame(w)
    wd$n <- NA
    for (i in length(nn):0) {
        idx <- which(rs == i)
        if (i == length(nn)) {
            len <- getIntersectLength(Sets, as.logical(w[idx,]))
            wd$n[idx] <- len
        } else if (i == 0) {
            wd$n[idx] <- 0
        } else {
            for (ii in idx) {
                ##print(ii)
                len <- getIntersectLength(Sets, as.logical(w[ii,]))
                ww = w[ii,]
                jj <- which(ww == 0)
                pp <- permutations(2, length(jj), 0:1, repeats.allowed=TRUE)

                for (aa in 2:nrow(pp)) {
                    ## 1st row is all 0, abondoned
                    xx <- jj[as.logical(pp[aa,])]
                    ww[xx] =ww[xx] +1
                    bb <-  t(apply(w, 1, function(i) i == ww))
                    wd$n[rowSums(bb) == length(ww) ]
                         ww <- w[ii,]
                    len <- len - wd$n[rowSums(bb) == length(ww) ]
                    ww <- w[ii,]
                }
                wd$n[ii] <- len
            }
        }
    }
    colnames(wd) = c(names(Sets), "Weight")
    return(wd)
}


getIntersectLength <- function(Sets, idx) {
    ## only use intersect and length methods in this function
    ## works fine with GRanges object
    ## and easy to extend to other objects.
    ss= Sets[idx]
    ol <- ss[[1]]

    if (sum(idx) == 1) {
        return(length(ol))
    }

    for (j in 2:length(ss)) {
        ol <-  intersect(ol, ss[[j]])
    }
    return(length(ol))
}

loadPeak <- function(peak, verbose=FALSE) {
    if (is(peak, "GRanges")) {
        peak.gr <- peak
    } else if (file.exists(peak)) {
        if (verbose)
            cat(">> loading peak file...\t\t\t\t",
                format(Sys.time(), "%Y-%m-%d %X"), "\n")
        peak.gr <- readPeakFile(peak, as="GRanges")
    } else {
        stop("peak should be GRanges object or a peak file...")
    }
    return(peak.gr)
}

##' @importFrom TxDb.Hsapiens.UCSC.hg19.knownGene TxDb.Hsapiens.UCSC.hg19.knownGene
loadTxDb <- function(TxDb) {
    if ( is.null(TxDb) ) {
        warning(">> TxDb is not specified, use 'TxDb.Hsapiens.UCSC.hg19.knownGene' by default...")
        TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    }
    return(TxDb)
}

##' @importFrom AnnotationDbi get
##' @importFrom GenomicFeatures genes
##' @importFrom GenomicFeatures transcriptsBy
##' @importFrom yulab.utils get_cache_element
##' @importFrom yulab.utils update_cache_item
getGene <- function(TxDb, by="gene") {
    .ChIPseekerEnv(TxDb, item = ChIPseekerCache)
    # ChIPseekerEnv <- get("ChIPseekerEnv", envir=.GlobalEnv)

    by <- match.arg(by, c("gene", "transcript"))

    if (by == "gene") {

        features <- get_cache_element(item = ChIPseekerCache, elements = "Genes")

        if(is.null(features)){
            features <- suppressMessages(genes(TxDb))
            update_cache_item(item = ChIPseekerCache, list("Genes" = features))
        }

        # if ( exists("Genes", envir=ChIPseekerEnv, inherits=FALSE) ) {
        #     features <- get("Genes", envir=ChIPseekerEnv)
        # } else {
        #     features <- suppressMessages(genes(TxDb))
        #     assign("Genes", features, envir=ChIPseekerEnv)
        # }
    } else {

        features <- get_cache_element(item = ChIPseekerCache, elements = "Transcripts")

        if(is.null(features)){
            features <- transcriptsBy(TxDb)
            features <- unlist(features)
            update_cache_item(item = ChIPseekerCache, list("Transcripts" = features))
        }

        # if ( exists("Transcripts", envir=ChIPseekerEnv, inherits=FALSE) ) {
        #     features <- get("Transcripts", envir=ChIPseekerEnv)
        # } else {
        #     features <- transcriptsBy(TxDb)
        #     features <- unlist(features)
        #     assign("Transcripts", features, envir=ChIPseekerEnv)
        # }
    }

    return(features)
}


##' get filenames of sample files
##'
##'
##' @title getSampleFiles
##' @return list of file names
##' @export
##' @author G Yu
getSampleFiles <- function() {
    dir <- system.file("extdata", "GEO_sample_data", package="ChIPseeker")
    files <- list.files(dir)
    ## protein <- sub("GSM\\d+_", "", files)
    ## protein <- sub("_.+", "", protein)
    protein <- gsub(pattern='GSM\\d+_(\\w+_\\w+)_.*', replacement='\\1',files)
    protein <- sub("_Chip.+", "", protein)
    res <- paste(dir, files, sep="/")
    res <- as.list(res)
    names(res) <- protein
    return(res)
}
## @importFrom RCurl getURL
## getDirListing <- function (url) {
##     ## from GEOquery
##     print(url)
##     a <- getURL(url)
##     b <- textConnection(a)
##     d <- read.table(b, header = FALSE)
##     close(b)
##     return(d)
## }


is.dir <- function(dir) {
    if (file.exists(dir) == FALSE)
        return(FALSE)
    return(file.info(dir)$isdir)
}


parse_targetPeak_Param <- function(targetPeak) {
    if (length(targetPeak) == 1) {
        if (is.dir(targetPeak)) {
            files <- list.files(path=targetPeak)
            idx <- unlist(sapply(c("bed", "bedGraph", "Peak"), grep, x=files))
            idx <- sort(unique(idx))
            files <- files[idx]
            targetPeak <- sub("/$", "", targetPeak)
            res <- paste(targetPeak, files, sep="/")
        } else {
            if (!file.exists(targetPeak)) {
                stop("bed file is not exists...")
            } else {
                res <- targetPeak
            }
        }
    } else {
        if (is.dir(targetPeak[1])) {
            stop("targetPeak should be a vector of bed file names or a folder containing bed files...")
        } else {
            res <- targetPeak[file.exists(targetPeak)]
            if (length(res) == 0) {
                stop("targetPeak file not exists...")
            }
        }
    }
    return(res)
}


IDType <- function(TxDb) {
    ##
    ## IDType <- metadata(TxDb)[8,2]
    ##
    ## update: 2015-10-27
    ## now IDType change from metadata(TxDb)[8,2] to metadata(TxDb)[9,2]
    ## it may change in future too
    ##
    ## it's safe to extract via grep

    md <- metadata(TxDb)
    md[grep("Type of Gene ID", md[,1]), 2]
}

list_to_dataframe <- function(dataList) {
    if (is.null(names(dataList)))
        return(do.call('rbind', dataList))

    cn <- lapply(dataList, colnames) %>% unlist %>% unique
    cn <- c('.id', cn)
    dataList2 <- lapply(seq_along(dataList), function(i) {
        data = dataList[[i]]
        data$.id = names(dataList)[i]
        idx <- ! cn %in% colnames(data)
        if (sum(idx) > 0) {
            for (i in cn[idx]) {
                data[, i] <- NA
            }
        }
        return(data[,cn])
    })
    res <- do.call('rbind', dataList2)
    res$.id <- factor(res$.id, levels=rev(names(dataList)))
    return(res)
}

##' @importFrom GenomicRanges GRangesList
##' @export
GenomicRanges::GRangesList

## . function was from plyr package
##' capture name of variable
##'
##' @rdname dotFun
##' @export
##' @title .
##' @param ... expression
##' @param .env environment
##' @return expression
##' @examples
##' x <- 1
##' eval(.(x)[[1]])
. <- function (..., .env = parent.frame()) {
    structure(as.list(match.call()[-1]), env = .env, class = "quoted")
}


##' check upstream and downstream parameter
##' 
##' 
##' check_upstream_and_downstream
##'
##' @param upstream upstream
##' @param downstream downstream
##' @importFrom ggplot2 rel
check_upstream_and_downstream <- function(upstream, downstream){
    
    ## upstream and downstream should be the same type
    if(class(upstream) != class(downstream)){
        stop("the type of upstream and downstream should be the same...")
    }
    
    ## downstream and upstream parameter should be numeric or NULL
    if(!is.numeric(upstream) && !is.null(upstream)){
        stop("upstream and downstream parameter should be numeric or NULL...")
    }
    
    ## the value of rel object should be in (0,1)
    if(inherits(upstream, 'rel')){
        if(as.numeric(upstream) < 0 || as.numeric(upstream) >1 ){
            stop('the value of rel object should be in (0,1)...')
        }
    }
    
    ## check actual number
    if(is.numeric(upstream) && !inherits(upstream, 'rel')){
        if(upstream < 1 | downstream < 1){
            stop('if upstream or downstream is integer, the value of it should be greater than 1...')
        }
    }
}


##' @importFrom ggplot2 rel
##' 
##' @export
ggplot2::rel


##' make label for figures
##' @param by one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'
##' @param type one of "start_site", "end_site", "body"
make_label <- function(type, by){
    
    if(type == 'body'){
        if(by %in% c('gene', 'transcript', 'exon', 'intron')){
            label_SS <- paste0("T","SS")
            label_TS <- paste0("T","TS")
            label <- c(label_SS,label_TS)
        }else{
            label_SS <- paste0(by,"_SS")
            label_TS <- paste0(by,"_TS")
            label <- c(label_SS,label_TS)
        }
        
    }else if(type == "start_site"){
        if(by %in% c('gene', 'transcript', 'exon', 'intron')){
            label <- paste0("T","SS")
        }else{
            label <- paste0(by,"_SS") 
        }
        
    }else{
        if(by %in% c('gene', 'transcript', 'exon', 'intron')){
            label <- paste0("T","TS")
        }else{
            label <- paste0(by,"_TS")
        }
    }
    
    return(label)
}

##' @importFrom yulab.utils get_cache_item
get_env_genome <- function(){

    current_env <- get_cache_item(item = ChIPseekerCache)

    env_txdb <- current_env$TXDB
    env_txdb_meta <- S4Vectors::metadata(env_txdb)
    env_txdb_version <- env_txdb_meta[grep("Genome",env_txdb_meta[,1]),2]

    return(env_txdb_version)
}

================================================
FILE: R/vennpie.R
================================================
##' @importFrom plotrix floating.pie
vennpie.csAnno <- function(x, 
                           r = 0.2, 
                           cex = 1.2,
                           col = NULL) {
    detailGenomicAnnotation <- x@detailGenomicAnnotation

    distance <- as.data.frame(x)$distanceToTSS
    total <- nrow(detailGenomicAnnotation)
    Genic <- sum(detailGenomicAnnotation$genic)

    Intergenic <- total-Genic
    Distal_Intergenic <- sum(detailGenomicAnnotation$distal_intergenic)
    Intron <- sum(detailGenomicAnnotation$Intron)
    Exon <- sum(detailGenomicAnnotation$Exon)
    Upstream <- sum(detailGenomicAnnotation$Promoter & distance < 0)

    ## fiveUTR <- sum(detailGenomicAnnotation$fiveUTR)
    ## threeUTR <- sum(detailGenomicAnnotation$threeUTR)
    Downstream <- sum(detailGenomicAnnotation$downstream)

    ## fiveUTR='#e5f5e0',threeUTR='#a1d99b',
    cols <- c(NO='white', Genic='#3182bd', Intergenic='#fec44f',
              Intron='#fc9272', Exon='#9ecae1', Upstream='#ffeda0',
              Downstream='#fee0d2', Distal_Intergenic='#d95f0e')
    
    cols[names(col)] <- col


    ##par(mai = c(0,0,0,0))
    ##layout(matrix(c(1,2), ncol=2), widths=c(0.7,0.3))
    pie(1, radius=r, init.angle=90, col="white", border=NA, labels='')

    ## https://www.biostars.org/p/326456/
    ## if count is 0, floating pie will ignore it
    ## and the color will mismatch with the category
    ## fixed by adding pseudo-count +1
    floating.pie(0,0, c(Exon,
                        Genic-Exon,
                        Distal_Intergenic,
                        Downstream,
                        Intergenic-Distal_Intergenic-Downstream
                        ) + 1,
                 radius=4*r,
                 startpos=pi/2,
                 col=cols[c("Exon", "NO", "NO", "Downstream", "NO")],
                 border=NA)

    floating.pie(0,0, c(Genic-Intron,
                        Intron,
                        Distal_Intergenic,
                        Intergenic-Upstream-Distal_Intergenic,
                        Upstream) +1 ,
                 radius=3*r,
                 startpos=pi/2,
                 col=cols[c("NO", "Intron", "Distal_Intergenic",
                     "NO", "Upstream")],
                 border=NA)

    floating.pie(0, 0, c(Genic, Intergenic) +1,
                 radius=2*r,
                 startpos=pi/2,
                 col=cols[c("Genic", "Intergenic")],
                 border=NA)
    ##plot.new()
    ##legend(center), legend=names(cols)[-1], fill=cols[-1], bty="n")
    legend(3*r, 3*r, legend=sub("_", " ", names(cols)[-1]),
           fill=cols[-1], bty="n", cex=cex)
}


================================================
FILE: R/vennplot.R
================================================
##' plot the overlap of a list of object
##'
##'
##' There are two ways to plot, which users can specify through `by`.
##' 
##' The first way is to use `gplots` packages, by setting `by = gplots`. This method
##' is default method. The venn plot produced through this way has no color.
##' 
##' The second way is to use `ggVennDiagram` packages, by setting `by = ggVennDiagram`. 
##' The venn plot produced through this way has colors which can be defined by users using
##' ggplot2 grammar e.g.(scale_fill_distiller()). And users can specify any details, like digital number,
##' text size and showing percentage or not, by inputting `...` extra parameters.
##' 
##' @title vennplot
##' @param Sets a list of object, can be vector or GRanges object
##' @param by one of gplots, ggVennDiagram or Vennerable
##' @param ... extra parameters using ggVennDiagram. Details see \link[ggVennDiagram]{ggVennDiagram}
##' @return venn plot that summarize the overlap of peaks
##' from different experiments or gene annotation from
##' different peak files.
##' @importFrom gplots plot.venn
## @importFrom ggVennDiagram ggVennDiagram
## @importFrom Vennerable Venn
## @importFrom grid grid.newpage
##' @examples
##' ## example not run
##' ## require(TxDb.Hsapiens.UCSC.hg19.knownGene)
##' ## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
##' ## peakfiles <- getSampleFiles()
##' ## peakAnnoList <- lapply(peakfiles, annotatePeak)
##' ## names(peakAnnoList) <- names(peakfiles)
##' ## genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
##' ## vennplot(genes)
##' @export
##' @author G Yu
vennplot <- function(Sets, by="gplots",...) {
    if (is.null(names(Sets))) {
        nn <- paste0("Set", seq_along(Sets))
        warning("input is not a named list, set the name automatically to ", paste(nn, collapse = " "))
        names(Sets) <- nn
        ## stop("input object should be a named list...")
    }

    overlapDF <- overlap(Sets)
    if (by == "Vennerable") {
        ## setRepositories(ind=7)
        ## install.package("Vennerable")
        ## OR
        ## install.packages("Vennerable", repos="http://R-Forge.R-project.org")
        pkg <- "Vennerable"
        require(pkg, character.only=TRUE)
        Venn <- eval(parse(text="Venn"))
        v <- Venn(SetNames=names(Sets), Weight=overlapDF$Weight)
        plotVenn <- eval(parse(text="Vennerable:::plotVenn"))
        plotVenn(v)
    } else if (by == "gplots") {
        n <- ncol(overlapDF)
        colnames(overlapDF)[n] <- "num"
        overlapDF <- overlapDF[, c(n, 1:(n-1))]
        rownames(overlapDF)=apply(overlapDF, 1, function(i) paste(i[-1], sep="", collapse=""))
        vennCount <- as.matrix(overlapDF)
        class(vennCount) <- "venn"
        plot.venn(vennCount)
    } else if(by == "ggVennDiagram"){
	    ggVennDiagram::ggVennDiagram(Sets, ...)
    } else {
        stop("not supported...")
    }
}

##' vennplot for peak files
##'
##'
##' @title vennplot.peakfile
##' @param files peak files
##' @param labels labels for peak files
##' @return figure
##' @export
##' @author G Yu
vennplot.peakfile <- function(files, labels=NULL) {
    peak.Sets <- lapply(files, readPeakFile)
    if (is.null(labels)) {
        ## remove .xls or .bed of the file names as labels
        labels <- sub("\\.\\w+$", "", files)
    }
    names(peak.Sets) <- labels
    vennplot(peak.Sets)
}


================================================
FILE: R/zzz.R
================================================
##' @importFrom yulab.utils yulab_msg
.onAttach <- function(libname, pkgname) {
  packageStartupMessage(yulab_msg(pkgname))

  options(ChIPseeker.downstreamDistance = 300)
  options(ChIPseeker.ignore_1st_exon = FALSE)
  options(ChIPseeker.ignore_1st_intron = FALSE)
  options(ChIPseeker.ignore_downstream = FALSE)
  options(ChIPseeker.ignore_promoter_subcategory= FALSE)
  
  options(aplot_align = 'y')

}


================================================
FILE: README.Rmd
================================================
---
output:
  md_document:
    variant: gfm
html_preview: false
---


```{r echo=FALSE, results="hide", message=FALSE}
#library("txtplot")
library("badger")
library("ypages")
library("yulab.utils")
```


# ChIPseeker: ChIP peak Annotation, Comparison, and Visualization

<img src="https://raw.githubusercontent.com/Bioconductor/BiocStickers/master/ChIPseeker/ChIPseeker.png" height="200" align="right" />


`r badge_bioc_release("ChIPseeker", "green")`
`r badge_devel("guangchuangyu/ChIPseeker", "green")`
[![Bioc](http://www.bioconductor.org/shields/years-in-bioc/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#since)
[![Say Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/GuangchuangYu)


[![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
[![codecov](https://codecov.io/gh/GuangchuangYu/ChIPseeker/branch/master/graph/badge.svg)](https://codecov.io/gh/GuangchuangYu/ChIPseeker/)
[![Last-changedate](https://img.shields.io/badge/last%20change-`r gsub('-', '--', Sys.Date())`-green.svg)](https://github.com/GuangchuangYu/ChIPseeker/commits/master)
[![GitHub forks](https://img.shields.io/github/forks/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/network)
[![GitHub stars](https://img.shields.io/github/stars/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/stargazers)


[![platform](http://www.bioconductor.org/shields/availability/devel/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#archives)
[![Build Status](http://www.bioconductor.org/shields/build/devel/bioc/ChIPseeker.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/ChIPseeker/)
[![Linux/Mac Travis Build Status](https://img.shields.io/travis/GuangchuangYu/ChIPseeker/master.svg?label=Mac%20OSX%20%26%20Linux)](https://travis-ci.org/GuangchuangYu/ChIPseeker)
[![AppVeyor Build Status](https://img.shields.io/appveyor/ci/Guangchuangyu/ChIPseeker/master.svg?label=Windows)](https://ci.appveyor.com/project/GuangchuangYu/ChIPseeker)


```{r comment="", echo=FALSE, results='asis'}
cat(packageDescription('ChIPseeker')$Description)
```


## :writing_hand: Authors

Guangchuang YU 

School of Basic Medical Sciences, Southern Medical University

<https://yulab-smu.top>

If you use `r Biocpkg('ChIPseeker')` in published research, please cite:


+ Q Wang<sup>#</sup>, M Li<sup>#</sup>, T Wu, L Zhan, L Li, M Chen, W Xie, Z Xie, E Hu, S Xu, __G Yu__<sup>\*</sup>. [Exploring epigenomic datasets by ChIPseeker](https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585). __*Current Protocols*__, 2022, 2(10): e585. 
+ __G Yu__<sup>\*</sup>, LG Wang, QY He<sup>\*</sup>. [ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparision and visualization](http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btv145). __*Bioinformatics*__. 2015, 31(14):2382-2383. 


## :arrow_double_down: Installation

Get the released version from Bioconductor:

```r
## try http:// if https:// URLs are not supported
if (!requireNamespace("BiocManager", quietly=TRUE))
    install.packages("BiocManager")
## BiocManager::install("BiocUpgrade") ## you may need this
BiocManager::install("ChIPseeker")
```

Or the development version from github:

```r
## install.packages("devtools")
devtools::install_github("YuLab-SMU/ChIPseeker")
```


## Contributing

We welcome any contributions! By participating in this project you agree to
abide by the terms outlined in the [Contributor Code of Conduct](CONDUCT.md).


================================================
FILE: README.md
================================================
# ChIPseeker: ChIP peak Annotation, Comparison, and Visualization

<img src="https://raw.githubusercontent.com/Bioconductor/BiocStickers/master/ChIPseeker/ChIPseeker.png" height="200" align="right" />

[![](https://img.shields.io/badge/release%20version-1.32.1-green.svg)](https://www.bioconductor.org/packages/ChIPseeker)
[![](https://img.shields.io/badge/devel%20version-1.33.4-green.svg)](https://github.com/guangchuangyu/ChIPseeker)
[![Bioc](http://www.bioconductor.org/shields/years-in-bioc/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#since)
[![Say
Thanks!](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg)](https://saythanks.io/to/GuangchuangYu)

[![Project Status: Active - The project has reached a stable, usable
state and is being actively
developed.](http://www.repostatus.org/badges/latest/active.svg)](http://www.repostatus.org/#active)
[![codecov](https://codecov.io/gh/GuangchuangYu/ChIPseeker/branch/master/graph/badge.svg)](https://codecov.io/gh/GuangchuangYu/ChIPseeker/)
[![Last-changedate](https://img.shields.io/badge/last%20change-2022--10--29-green.svg)](https://github.com/GuangchuangYu/ChIPseeker/commits/master)
[![GitHub
forks](https://img.shields.io/github/forks/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/network)
[![GitHub
stars](https://img.shields.io/github/stars/GuangchuangYu/ChIPseeker.svg)](https://github.com/GuangchuangYu/ChIPseeker/stargazers)

[![platform](http://www.bioconductor.org/shields/availability/devel/ChIPseeker.svg)](https://www.bioconductor.org/packages/devel/bioc/html/ChIPseeker.html#archives)
[![Build
Status](http://www.bioconductor.org/shields/build/devel/bioc/ChIPseeker.svg)](https://bioconductor.org/checkResults/devel/bioc-LATEST/ChIPseeker/)
[![Linux/Mac Travis Build
Status](https://img.shields.io/travis/GuangchuangYu/ChIPseeker/master.svg?label=Mac%20OSX%20%26%20Linux)](https://travis-ci.org/GuangchuangYu/ChIPseeker)
[![AppVeyor Build
Status](https://img.shields.io/appveyor/ci/Guangchuangyu/ChIPseeker/master.svg?label=Windows)](https://ci.appveyor.com/project/GuangchuangYu/ChIPseeker)

This package implements functions to retrieve the nearest genes around
the peak, annotate genomic region of the peak, statstical methods for
estimate the significance of overlap among ChIP peak data sets, and
incorporate GEO database for user to compare the own dataset with those
deposited in database. The comparison can be used to infer cooperative
regulation and thus can be used to generate hypotheses. Several
visualization functions are implemented to summarize the coverage of the
peak experiment, average profile and heatmap of peaks binding to TSS
regions, genomic annotation, distance to TSS, and overlap of peaks or
genes.

## :writing_hand: Authors

Guangchuang YU

School of Basic Medical Sciences, Southern Medical University

<https://yulab-smu.top>

If you use [ChIPseeker](http://bioconductor.org/packages/ChIPseeker) in
published research, please cite:

-   Q Wang<sup>\#</sup>, M Li<sup>\#</sup>, T Wu, L Zhan, L Li, M Chen,
    W Xie, Z Xie, E Hu, S Xu, **G Yu**<sup>\*</sup>. [Exploring
    epigenomic datasets by
    ChIPseeker](https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585).
    ***Current Protocols***, 2022, 2(10): e585.
-   **G Yu**<sup>\*</sup>, LG Wang, QY He<sup>\*</sup>. [ChIPseeker: an
    R/Bioconductor package for ChIP peak annotation, comparision and
    visualization](http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btv145).
    ***Bioinformatics***. 2015, 31(14):2382-2383.

## :arrow_double_down: Installation

Get the released version from Bioconductor:

``` r
## try http:// if https:// URLs are not supported
if (!requireNamespace("BiocManager", quietly=TRUE))
    install.packages("BiocManager")
## BiocManager::install("BiocUpgrade") ## you may need this
BiocManager::install("ChIPseeker")
```

Or the development version from github:

``` r
## install.packages("devtools")
devtools::install_github("YuLab-SMU/ChIPseeker")
```

## Contributing

We welcome any contributions! By participating in this project you agree
to abide by the terms outlined in the [Contributor Code of
Conduct](CONDUCT.md).


================================================
FILE: appveyor.yml
================================================
environment:
  matrix:
    - R_VERSION: devel
      R_ARCH: x64
      USE_RTOOLS: true
  _R_CHECK_FORCE_SUGGESTS_: false

# DO NOT CHANGE the "init" and "install" sections below

# Download script file from GitHub
init:
  ps: |
        $ErrorActionPreference = "Stop"
        Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
        Import-Module '..\appveyor-tool.ps1'

install:
  ps: Bootstrap

# Adapt as necessary starting from here

build_script:
  - travis-tool.sh install_bioc BiocStyle IRanges graphite ReactomePA AnnotationDbi DO.db DOSE org.Hs.eg.db TxDb.Hsapiens.UCSC.hg19.knownGene clusterProfiler
  - travis-tool.sh install_deps

test_script:
  - travis-tool.sh run_tests

on_failure:
  - 7z a failure.zip *.Rcheck\*
  - appveyor PushArtifact failure.zip

artifacts:
  - path: '*.Rcheck\**\*.log'
    name: Logs

  - path: '*.Rcheck\**\*.out'
    name: Logs

  - path: '*.Rcheck\**\*.fail'
    name: Logs

  - path: '*.Rcheck\**\*.Rout'
    name: Logs

  - path: '\*_*.tar.gz'
    name: Linux Package

  - path: '\*_*.zip'
    name: Windows Package

notifications:
- provider: Email
  to:
  - gcyu@connect.hku.hk
  on_build_success: false


================================================
FILE: inst/CITATION
================================================
citHeader("Please cite Q. Wang (2022) or G. Yu (2015) for using ChIPseeker. In addition, please cite clusterProfiler/DOSE/ReactomePA when using functional enrichment analyses.")


citEntry(entry  ="ARTICLE",
         title  = "Exploring epigenomic datasets by ChIPseeker",
         author = c(
             person("Qianwen", "Wang"),
             person("Ming", "Li"),
             person("Tianzhi", "Wu"),
             person("Li", "Zhan"),
             person("Lin", "Li"),
             person("Meijun", "Chen"),
             person("Wenqin", "Xie"),
             person("Zijing", "Xie"),
             person("Erqiang", "Hu"),
             person("Shuangbin", "Xu"),
             person("Guangchuang", "Yu", email = "guangchuangyu@gmail.com")
             ),
         journal = "Current Protocols",
         year    = "2022",
         volume  = "2",
         number  = "10",
         pages   = "e585",
         PMID    = "36286622",
         doi     = "10.1002/cpz1.585",
         url     = "https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585",
         textVersion = paste("Qianwen Wang, Ming Li, Tianzhi Wu, Li Zhan, Lin Li, Meijun Chen, Wenqin Xie, Zijing Xie, Erqiang Hu, Shuangbin Xu, Guangchuang Yu.",
         "Exploring epigenomic datasets by ChIPseeker.",
         "Current Protocols 2022, 2(10): e585")
         )


citEntry(entry  ="ARTICLE",
         title  = "ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization",
         author = personList(
             as.person("Guangchuang Yu"),
             as.person("Li-Gen Wang"),
             as.person("Qing-Yu He")
             ),
         journal = "Bioinformatics",
         year    = "2015",
         volume  = "31",
         number  = "14",
         pages   = "2382-2383",
         PMID    = "25765347",
         doi     = "10.1093/bioinformatics/btv145",
         textVersion = paste("Guangchuang Yu, LiGen Wang, and QingYu He.",
             "ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization.",
             "Bioinformatics 2015, 31(14):23822383")
         )


================================================
FILE: inst/extdata/sample_peaks.txt
================================================
chr	start	end	length	summit	tags	X.10.log10.pvalue.	fold_enrichment	FDR...
chr10	105137980	105138593	614	174	7	52.8	15.32	NA
chr10	42644416	42645383	968	669	27	77.15	9.13	0.79
chr6	162188189	162188742	554	174	8	59.88	17.82	NA
chr4	2307246	2307622	377	188	9	85.25	26.62	1.03
chr13	51791808	51792240	433	267	8	65.32	15.08	0.74
chr19	58731678	58732653	976	472	15	55	8.04	NA
chr16	47717334	47717718	385	193	8	75.05	18.38	NA
chr5	133997712	133998281	570	216	11	59.14	10.48	NA
chr17	21730549	21731638	1090	685	15	64.79	9.19	NA
chr1	108313793	108314347	555	189	7	58.81	18.16	0.78
chr7	66713702	66714183	482	285	7	50.52	14.54	0.97
chr6	102911566	102912390	825	476	17	53.89	6.13	NA
chr7	99693575	99694953	1379	285	19	54.14	6.93	0.92
chr14	21930630	21931094	465	289	7	57.91	11.83	0.78
chr7	140460467	140461015	549	221	10	51.62	9.66	0.98
chr1	204827779	204828392	614	236	10	54.94	11.73	0.85
chr22	31055016	31055701	686	486	9	68.84	24.51	NA
chr12	121813018	121813753	736	507	12	63.05	13.41	0.77
chr9	34128226	34128829	604	266	9	62.93	15.08	0.77
chr8	51449160	51449457	298	150	7	63.71	9.59	NA
chr7	152266697	152267502	806	218	13	66.03	10.72	NA
chr7	5200045	5200648	604	217	10	51.25	9.72	NA
chr20	32330916	32331273	358	179	7	51.33	14.66	0.98
chr13	53041893	53042341	449	256	7	65.63	21.79	0.74
chr4	188271660	188272266	607	340	10	51.25	11.16	NA
chr11	57336216	57336780	565	167	8	55.92	15.08	0.85
chr7	147992515	147993057	543	199	8	70.42	18.16	0.7
chr9	126707435	126708020	586	198	10	89.63	28.73	1.04
chr8	146099083	146099655	573	205	8	65.26	18.38	NA
chr3	18887631	18888098	468	210	10	80.46	19.36	NA
chr15	75296138	75296986	849	382	14	62.33	7.8	NA
chr7	76678195	76678851	657	217	12	68.53	10.06	0.62
chr10	72271427	72271869	443	225	8	50.24	14.25	NA
chr3	179584381	179584987	607	177	11	50.71	9.19	NA
chr4	163405047	163405425	379	189	7	64.2	22	0.77
chr7	72657437	72658247	811	241	19	81.36	8.15	0.91
chr17	47813522	47813967	446	184	7	50.24	12.3	NA
chr19	17019713	17020403	691	501	10	51.25	11.12	NA
chr2	112229859	112230262	404	203	8	56.88	14.85	NA
chr10	69646478	69646970	493	313	7	56.06	17.74	0.84
chr22	21699160	21699710	551	365	7	58.69	17.96	0.79
chrX	119427920	119428427	508	197	7	50.24	14.01	NA
chr11	12358590	12358928	339	169	7	53.91	11.93	0.91
chr10	23293727	23294379	653	446	8	50.61	9.19	NA
chr16	19008911	19009452	542	178	8	53.74	10.45	NA
chr11	71999828	72000241	414	242	8	79.93	25.14	0.83
chr17	38168330	38169125	796	363	11	57.63	15.91	NA
chr7	56124436	56124960	525	314	11	88.99	20.68	NA
chr22	26998729	26999306	578	216	10	72.04	17.51	NA
chr11	108369506	108370050	545	349	7	50.52	13.7	0.97
chr1	247012833	247013232	400	200	9	69.75	18.1	0.61
chr4	57030170	57030662	493	181	7	50.52	14.87	0.97
chr2	85878694	85879282	589	288	7	54.39	16.76	0.86
chr8	57064347	57064991	645	474	10	80.87	22.7	0.83
chr18	60283518	60284080	563	167	10	51.62	9.91	0.98
chr14	19009999	19010720	722	547	118	90.28	6.43	1
chr4	123592232	123592745	514	168	8	59.23	15.08	0.79
chr17	6492901	6493361	461	211	7	50.24	10.59	NA
chr1	201432452	201432945	494	338	8	60.21	12.42	0.74
chr5	25353549	25354499	951	468	23	91.86	10.49	NA
chr21	41376711	41377162	452	162	8	60.21	13.63	0.74
chr10	135193549	135194110	562	159	8	51.28	8.74	0.98
chr2	165659676	165660060	385	192	10	106.7	36.32	1.94
chr1	18733754	18734314	561	405	7	50.52	14.1	0.97
chr3	181501405	181501917	513	333	11	91.78	18.05	1.01
chr15	44748091	44748538	448	246	9	74.71	17.6	0.72
chr17	64850164	64850608	445	267	8	50.18	11.73	0.97
chr3	50293565	50294504	940	683	16	95.98	16.76	1.24
chr1	6780373	6780997	625	429	11	70.72	16.09	0.68
chr11	60722310	60723055	746	403	14	55.49	6.89	NA
chr16	47690030	47690397	368	185	7	84.74	28.08	NA
chr3	42881399	42881811	413	207	8	64.41	18.38	NA
chr17	73738304	73739104	801	267	11	54.12	13.13	NA
chr7	72886499	72887134	636	426	14	55.22	8.77	NA
chr19	479390	480356	967	608	15	62.85	9.19	NA
chr1	223554052	223554581	530	360	7	50.24	12.18	NA
chr19	55642648	55643120	473	316	7	55.56	11.17	0.86
chr2	160150507	160150819	313	156	10	55.46	7.87	0.85
chr6	108379579	108380048	470	218	12	135.66	40.11	NA
chr7	66659861	66660350	490	334	7	60.23	20.11	0.79
chr21	45158341	45159044	704	246	12	57.42	9.19	NA
chr17	12142123	12142576	454	256	10	95.08	28.28	1.31
chr12	111535820	111536662	843	508	13	55.22	10.33	NA
chr4	74053898	74054391	494	220	7	50.24	13.62	NA
chr17	41107403	41107930	528	362	12	82.37	13.13	NA
chr5	37022195	37022565	371	186	9	90.79	22.98	NA
chr14	89037667	89038455	789	465	11	91.33	25.38	NA
chr19	55988686	55989638	953	457	17	103.53	17.24	NA
chrX	73187260	73187705	446	145	7	65.84	18.16	0.71
chr16	68160728	68161185	458	216	7	70.84	24.07	NA
chr1	72383088	72383669	582	167	8	54.89	12.57	0.85
chr3	65290871	65291616	746	399	12	101.35	21.79	1.55
chr17	4396192	4396640	449	265	7	57.21	16.76	0.81
chr7	129241408	129242063	656	201	10	66.64	15.32	NA
chr8	105375364	105376057	694	485	11	92.72	22.98	NA
chr8	48921895	48922221	327	164	7	60.45	11.09	NA
chr22	47339473	47339954	482	276	8	59.45	15.32	NA
chr7	5545814	5546438	625	216	11	52.8	12.42	NA
chr13	77761699	77762182	484	158	7	63.2	18.16	0.78
chr9	6582021	6582849	829	425	15	106.11	17.6	1.87
chr5	1430193	1430994	802	506	13	51.97	8.04	NA
chr11	64532961	64533474	514	199	7	50.52	12.92	0.95
chr21	42055751	42056309	559	218	7	50.24	15.42	NA
chr12	107211163	107211704	542	359	10	80.87	19.07	0.83
chr3	161238645	161239047	403	201	7	60.41	19.55	0.8
chr17	3680122	3680663	542	281	14	67.79	10.11	NA
chr17	20455343	20455801	459	191	10	51.62	9.23	0.97
chr2	196378101	196378811	711	519	11	59.14	14.71	NA
chr3	194245019	194245449	431	268	7	55.17	12.57	0.84
chr16	14626541	14627285	745	509	11	54.27	10.72	NA
chr2	236773772	236774222	451	251	8	59.88	14.51	NA
chr11	67551724	67552171	448	168	7	50.24	12.35	NA
chr10	54421011	54421536	526	249	7	60.53	14.53	0.8
chr11	121028043	121028655	613	198	10	51.25	11.27	NA
chr10	27600469	27601208	740	354	12	82.79	17.6	0.92
chr12	83275694	83276252	559	296	8	69.37	21.79	0.65
chr14	63851416	63851824	409	209	9	70.34	16.45	0.64
chr4	185478067	185478514	448	299	7	58.8	16.22	NA
chr14	51818623	51819136	514	341	7	50.52	12.92	0.95
chr3	179394999	179395365	367	183	7	68.9	25.42	0.61
chr6	90496197	90496532	336	168	7	53.32	11.73	0.94
chr3	48664669	48665218	550	301	8	50.25	11.49	NA
chr16	17028641	17029305	665	407	12	107.4	29.05	2.05
chr14	101874985	101876019	1035	571	13	74.1	13.79	NA
chr12	42519772	42520228	457	260	7	53.33	15.08	0.96
chr9	135276277	135277082	806	298	12	73.73	16	0.72
chr14	104160631	104161383	753	182	11	60.5	11.6	0.8
chr3	85070344	85071044	701	413	8	59.88	19.33	NA
chr11	107900936	107901419	484	287	7	51.54	12.57	0.97
chr12	130291640	130292042	403	201	7	50.3	14.18	0.97
chr21	41159981	41160669	689	191	9	59.49	18.79	100
chr6	118954241	118954912	672	336	10	80.87	23.65	0.83
chr10	70228566	70229168	603	226	10	72.72	17.6	0.69
chr22	46914023	46914630	608	424	9	51.27	11.17	0.98
chr14	61232829	61233495	667	481	10	80.46	21.46	NA
chr4	74134806	74135475	670	418	9	58.57	13.79	NA
chr10	7869332	7869993	662	176	10	78.36	15.71	0.76
chr5	120189408	120189950	543	325	8	56.98	16.09	NA
chr1	11542794	11543378	585	312	8	59.88	16.13	NA
chr1	9413700	9414272	573	159	8	55.43	12.57	0.85
chr12	131218246	131219043	798	602	12	58.77	10.72	NA
chr1	29032216	29033010	795	333	11	59.42	14.71	NA
chr2	187827920	187828577	658	291	11	78.27	20.68	NA
chr11	28235164	28235576	413	214	10	71.56	15.08	0.64
chr4	173579304	173579706	403	201	9	78.66	22.62	0.77
chr12	80194469	80194868	400	200	8	81.06	29.05	0.84
chr11	26134912	26135581	670	472	12	103.04	23.58	1.57
chr9	88771969	88772469	501	371	8	50.17	13.79	NA
chr8	146115421	146115783	363	182	7	65.63	22.99	NA
chr6	140093330	140093770	441	169	7	54.45	15.08	0.86


================================================
FILE: inst/test-plot/test-plotPeakProf.R
================================================
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)

context("test plotPeakProf() for a list of windows")

peak <- getSampleFiles()[[4]]
peak_list <- getSampleFiles()[4:5]
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene

## self-made enhancer region in the form of granges object
enhancer <- transcripts(txdb)[1:5000,]

## self-made non-enhancer region in the form of granges object
non_enhancer <- unlist(fiveUTRsByTranscript(txdb))[1:5000]

gr <- list(enhancer,non_enhancer)

test_that("input two self-made granges object",{
  
  p <- plotPeakProf(peak = peak,
                    conf = 0.95,
                    by = c("enhancer","non-enhancer"),
                    windows_name = c("enhancer","non-enhancer"),
                    weightCol = "V5",
                    type = "start_site",
                    upstream = 1000,
                    downstream = 1000,
                    TxDb = list(enhancer,non_enhancer))
  
  expect_is(p,"gg")
  
})

test_that("input a list of peaks",{
  
  p <- plotPeakProf(peak = peak_list,
                    TxDb = list(enhancer,non_enhancer),
                    conf = 0.95,
                    by = c("enhancer","non-enhancer"),
                    windows_name = c("enhancer","non-enhancer"),
                    weightCol = "V5",
                    type = "start_site",
                    upstream = 1000,
                    downstream = 1000)
  
  expect_is(p,"gg")
})


test_that("input gr and txdb input",{
  
  p <- plotPeakProf(peak = peak,
                    TxDb = list(enhancer,txdb),
                    conf = 0.95,
                    by = c("enhancer","gene"),
                    windows_name = c("enhancer","gene"),
                    weightCol = "V5",
                    type = "start_site",
                    upstream = 1000,
                    downstream = 1000)
  
  expect_is(p,"gg")
})


test_that("check body region",{
  
  p <- plotPeakProf(peak = peak,
                    TxDb = list(enhancer,txdb),
                    conf = 0.95,
                    by = c("enhancer","gene"),
                    windows_name = c("enhancer","gene"),
                    weightCol = "V5",
                    type = "body",
                    upstream = 1000,
                    downstream = 1000,
                    nbin = 800)
  
  expect_is(p,"gg")
  
  p <- plotPeakProf(peak = peak,
                    TxDb = list(enhancer,txdb),
                    conf = 0.95,
                    by = c("enhancer","gene"),
                    windows_name = c("enhancer","gene"),
                    weightCol = "V5",
                    type = "body",
                    nbin = 800)
  
  expect_is(p,"gg")
  
  p <- plotPeakProf(peak = peak,
                    TxDb = list(enhancer,txdb),
                    conf = 0.95,
                    by = c("enhancer","gene"),
                    windows_name = c("enhancer","gene"),
                    weightCol = "V5",
                    type = "body",
                    upstream = rel(0.2),
                    downstream = rel(0.2),
                    nbin = 800)
  
  expect_is(p,"gg")
  
  p <- plotPeakProf(peak = peak_list,
                    TxDb = list(enhancer,txdb),
                    conf = 0.95,
                    by = c("enhancer","gene"),
                    windows_name = c("enhancer","gene"),
                    weightCol = "V5",
                    type = "body",
                    upstream = rel(0.2),
                    downstream = rel(0.2),
                    nbin = 800)
  
  expect_is(p,"gg")
})

================================================
FILE: inst/test-plot/test-plotTagMatrix.R
================================================
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)

context("test plotTagMatrix() and related functions")

test_that("test plotPeakProf2 function use txdb",{
  
  peak <- getSampleFiles()[[4]]
  peak_list <- getSampleFiles()[4:5]
  
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  
  # test single peak file
  p1_1 <- plotPeakProf(peak = peak,
                       upstream = 1000,
                       downstream = 1000,
                       by = "gene",
                       type = "start_site",
                       TxDb = txdb,
                       nbin = 800)
  
  expect_is(p1_1, "gg")
  
  # test a list of peak files
  p1_2 <- plotPeakProf(peak = peak_list,
                       upstream = 1000,
                       downstream = 1000,
                       by = "gene",
                       type = "start_site",
                       TxDb = txdb,
                       nbin = 800)
  
  expect_is(p1_2, "gg")
  
  # test body region
  # without extension
  p2_1 <- plotPeakProf(peak = peak_list,
                       by = "gene",
                       type = "body",
                       TxDb = txdb,
                       nbin = 800)
  
  expect_is(p2_1, "gg")
  
  # extend with rel object
  p2_2 <- plotPeakProf(peak = peak_list,
                       by = "gene",
                       type = "body",
                       TxDb = txdb,
                       upstream = rel(0.2),
                       downstream = rel(0.2),
                       nbin = 800)
  
  expect_is(p2_2, "gg")
  
  # extend with actual number
  p2_3 <- plotPeakProf(peak = peak_list,
                       by = "gene",
                       type = "body",
                       TxDb = txdb,
                       upstream = 1000,
                       downstream = 1000,
                       nbin = 800)
  
  expect_is(p2_3, "gg")
  
})


test_that("test plotPeakProf2 function use self-made granges",{
  
  peak <- getSampleFiles()[[4]]
  peak_list <- getSampleFiles()[4:5]
  
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  
  # we consider transcript region as enhancer region
  # and make self-made granges object
  # they can be the same in the form of granges object
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  enhancer <- transcripts(txdb)[1:5000,]
  
  # test single peak file
  p1_1 <- plotPeakProf(peak = peak,
                       upstream = 1000,
                       downstream = 1000,
                       by = "gene",
                       type = "start_site",
                       TxDb = enhancer,
                       nbin = 800)
  
  expect_is(p1_1, "gg")
  
  # test a list of peak files
  p1_2 <- plotPeakProf(peak = peak_list,
                       upstream = 1000,
                       downstream = 1000,
                       by = "gene",
                       type = "start_site",
                       TxDb = enhancer,
                       nbin = 800)
  
  expect_is(p1_2, "gg")
  
  # test body region
  # without extension
  p2_1 <- plotPeakProf(peak = peak_list,
                       by = "gene",
                       type = "body",
                       TxDb = enhancer,
                       nbin = 800)
  
  expect_is(p2_1, "gg")
  
  # extend with rel object
  p2_2 <- plotPeakProf(peak = peak_list,
                       by = "gene",
                       type = "body",
                       TxDb = enhancer,
                       upstream = rel(0.2),
                       downstream = rel(0.2),
                       nbin = 800)
  
  expect_is(p2_2, "gg")
  
  # extend with actual number
  p2_3 <- plotPeakProf(peak = peak_list,
                       by = "gene",
                       type = "body",
                       TxDb = enhancer,
                       upstream = 1000,
                       downstream = 1000,
                       nbin = 800)
  
  expect_is(p2_3, "gg")
  
})

================================================
FILE: man/ChIPseeker-package.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ChIPseeker-package.R
\docType{package}
\name{ChIPseeker-package}
\alias{ChIPseeker}
\alias{ChIPseeker-package}
\title{ChIPseeker: ChIPseeker for ChIP peak Annotation, Comparison, and Visualization}
\description{
This package implements functions to retrieve the nearest genes around the peak, annotate genomic region of the peak, statstical methods for estimate the significance of overlap among ChIP peak data sets, and incorporate GEO database for user to compare the own dataset with those deposited in database. The comparison can be used to infer cooperative regulation and thus can be used to generate hypotheses. Several visualization functions are implemented to summarize the coverage of the peak experiment, average profile and heatmap of peaks binding to TSS regions, genomic annotation, distance to TSS, and overlap of peaks or genes.
}
\seealso{
Useful links:
\itemize{
  \item \url{https://yulab-smu.top/contribution-knowledge-mining/}
  \item Report bugs at \url{https://github.com/YuLab-SMU/ChIPseeker/issues}
}

}
\author{
\strong{Maintainer}: Guangchuang Yu \email{guangchuangyu@gmail.com} (\href{https://orcid.org/0000-0002-6485-8781}{ORCID})

Other contributors:
\itemize{
  \item Ming Li \email{limiang929@gmail.com} [contributor]
  \item Qianwen Wang \email{treywea@gmail.com} [contributor]
  \item Yun Yan \email{youryanyun@gmail.com} [contributor]
  \item Hervé Pagès \email{hpages.on.github@gmail.com} [contributor]
  \item Michael Kluge \email{michael.kluge@bio.ifi.lmu.de} [contributor]
  \item Thomas Schwarzl \email{schwarzl@embl.de} [contributor]
  \item Zhougeng Xu \email{xuzhougeng@163.com} [contributor]
  \item Chun-Hui Gao \email{gaospecial@gmail.com} (\href{https://orcid.org/0000-0002-1445-7939}{ORCID}) [contributor]
}

}
\keyword{internal}


================================================
FILE: man/ChIPseekerCache.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ChIPseeker-package.R
\docType{data}
\name{ChIPseekerCache}
\alias{ChIPseekerCache}
\title{Name of the ChIPseeker cache environment (internal static variable)}
\format{
character vector
}
\usage{
ChIPseekerCache
}
\description{
Name of the ChIPseeker cache environment (internal static variable)
}
\keyword{datasets}


================================================
FILE: man/annotatePeak.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/annotatePeak.R
\name{annotatePeak}
\alias{annotatePeak}
\title{annotatePeak}
\usage{
annotatePeak(
  peak,
  tssRegion = c(-3000, 3000),
  TxDb = NULL,
  level = "transcript",
  assignGenomicAnnotation = TRUE,
  genomicAnnotationPriority = c("Promoter", "5UTR", "3UTR", "Exon", "Intron",
    "Downstream", "Intergenic"),
  annoDb = NULL,
  addFlankGeneInfo = FALSE,
  flankDistance = 5000,
  sameStrand = FALSE,
  ignoreOverlap = FALSE,
  ignoreUpstream = FALSE,
  ignoreDownstream = FALSE,
  overlap = "TSS",
  verbose = TRUE,
  columns = c("ENTREZID", "ENSEMBL", "SYMBOL", "GENENAME")
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{tssRegion}{Region Range of TSS}

\item{TxDb}{TxDb or EnsDb annotation object}

\item{level}{one of transcript and gene}

\item{assignGenomicAnnotation}{logical, assign peak genomic annotation or not}

\item{genomicAnnotationPriority}{genomic annotation priority}

\item{annoDb}{annotation package}

\item{addFlankGeneInfo}{logical, add flanking gene information from the peaks}

\item{flankDistance}{distance of flanking sequence}

\item{sameStrand}{logical, whether find nearest/overlap gene in the same strand}

\item{ignoreOverlap}{logical, whether ignore overlap of TSS with peak}

\item{ignoreUpstream}{logical, if True only annotate gene at the 3' of the peak.}

\item{ignoreDownstream}{logical, if True only annotate gene at the 5' of the peak.}

\item{overlap}{one of 'TSS' or 'all', if overlap="all", then gene overlap with peak will be reported as nearest gene, no matter the overlap is at TSS region or not.}

\item{verbose}{print message or not}

\item{columns}{names of columns to be obtained from database}
}
\value{
data.frame or GRanges object with columns of:

all columns provided by input.

annotation: genomic feature of the peak, for instance if the peak is
located in 5'UTR, it will annotated by 5'UTR. Possible annotation is
Promoter-TSS, Exon, 5' UTR, 3' UTR, Intron, and Intergenic.

geneChr: Chromosome of the nearest gene

geneStart: gene start

geneEnd: gene end

geneLength: gene length

geneStrand: gene strand

geneId: entrezgene ID

distanceToTSS: distance from peak to gene TSS

if annoDb is provided, extra column will be included:

ENSEMBL: ensembl ID of the nearest gene

SYMBOL: gene symbol

GENENAME: full gene name
}
\description{
Annotate peaks
}
\examples{
\dontrun{
require(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
peakfile <- system.file("extdata", "sample_peaks.txt", package="ChIPseeker")
peakAnno <- annotatePeak(peakfile, tssRegion=c(-3000, 3000), TxDb=txdb)
peakAnno
}
}
\seealso{
\code{\link{plotAnnoBar}} \code{\link{plotAnnoPie}} \code{\link{plotDistToTSS}}
}
\author{
G Yu
}


================================================
FILE: man/as.GRanges.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\name{as.GRanges}
\alias{as.GRanges}
\title{as.GRanges}
\usage{
as.GRanges(x)
}
\arguments{
\item{x}{csAnno object}
}
\value{
GRanges object
}
\description{
convert csAnno object to GRanges
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/as.data.frame.csAnno.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\name{as.data.frame.csAnno}
\alias{as.data.frame.csAnno}
\title{as.data.frame.csAnno}
\usage{
\method{as.data.frame}{csAnno}(x, row.names = NULL, optional = FALSE, ...)
}
\arguments{
\item{x}{csAnno object}

\item{row.names}{row names}

\item{optional}{should be omitted.}

\item{...}{additional parameters}
}
\value{
data.frame
}
\description{
convert csAnno object to data.frame
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/check_upstream_and_downstream.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{check_upstream_and_downstream}
\alias{check_upstream_and_downstream}
\title{check upstream and downstream parameter}
\usage{
check_upstream_and_downstream(upstream, downstream)
}
\arguments{
\item{upstream}{upstream}

\item{downstream}{downstream}
}
\description{
check_upstream_and_downstream
}


================================================
FILE: man/combine_csAnno.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\name{combine_csAnno}
\alias{combine_csAnno}
\title{combine_csAnno}
\usage{
combine_csAnno(x, ...)
}
\arguments{
\item{x}{csAnno object}

\item{...}{csAnno objects}
}
\value{
csAnno object
}
\description{
Combine csAnno Object
}
\details{
https://github.com/YuLab-SMU/ChIPseeker/issues/157
}


================================================
FILE: man/covplot.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/covplot.R
\name{covplot}
\alias{covplot}
\title{covplot}
\usage{
covplot(
  peak,
  weightCol = NULL,
  xlab = "Chromosome Size (bp)",
  ylab = "",
  title = "ChIP Peaks over Chromosomes",
  chrs = NULL,
  xlim = NULL,
  lower = 1,
  fill_color = "black"
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{weightCol}{weight column of peak}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{chrs}{selected chromosomes to plot, all chromosomes by default}

\item{xlim}{ranges to plot, default is whole chromosome}

\item{lower}{lower cutoff of coverage signal}

\item{fill_color}{specify the color/palette for the plot. Order matters}
}
\value{
ggplot2 object
}
\description{
plot peak coverage
}
\author{
G Yu
}


================================================
FILE: man/csAnno-class.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\docType{class}
\name{csAnno-class}
\alias{csAnno-class}
\alias{show,csAnno-method}
\alias{vennpie,csAnno-method}
\alias{plotDistToTSS,csAnno-method}
\alias{plotAnnoBar,csAnno-method}
\alias{plotAnnoPie,csAnno-method}
\alias{upsetplot,csAnno-method}
\alias{subset,csAnno-method}
\title{Class "csAnno"
This class represents the output of ChIPseeker Annotation}
\description{
Class "csAnno"
This class represents the output of ChIPseeker Annotation
}
\section{Slots}{

\describe{
\item{\code{anno}}{annotation}

\item{\code{tssRegion}}{TSS region}

\item{\code{level}}{transcript or gene}

\item{\code{hasGenomicAnnotation}}{logical}

\item{\code{detailGenomicAnnotation}}{Genomic Annotation in detail}

\item{\code{annoStat}}{annotation statistics}

\item{\code{peakNum}}{number of peaks}
}}

\seealso{
\code{\link{annotatePeak}}
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}
\keyword{classes}


================================================
FILE: man/dot-ChIPseekerEnv.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{.ChIPseekerEnv}
\alias{.ChIPseekerEnv}
\title{env function for ChIPseeker}
\usage{
.ChIPseekerEnv(TxDb, item = "ChIPseekerEnv", force = FALSE)
}
\arguments{
\item{TxDb}{txdb object}

\item{item}{item name}

\item{force}{force to update txdb item in cache or not.}
}
\description{
env function for ChIPseeker
}


================================================
FILE: man/dotFun.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{.}
\alias{.}
\title{.}
\usage{
.(..., .env = parent.frame())
}
\arguments{
\item{...}{expression}

\item{.env}{environment}
}
\value{
expression
}
\description{
capture name of variable
}
\examples{
x <- 1
eval(.(x)[[1]])
}


================================================
FILE: man/downloadGEObedFiles.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GEO.R
\name{downloadGEObedFiles}
\alias{downloadGEObedFiles}
\title{downloadGEObedFiles}
\usage{
downloadGEObedFiles(genome, destDir = getwd())
}
\arguments{
\item{genome}{genome version}

\item{destDir}{destination folder}
}
\description{
download all BED files of a particular genome version
}
\author{
G Yu
}


================================================
FILE: man/downloadGSMbedFiles.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GEO.R
\name{downloadGSMbedFiles}
\alias{downloadGSMbedFiles}
\title{downloadGSMbedFiles}
\usage{
downloadGSMbedFiles(GSM, destDir = getwd())
}
\arguments{
\item{GSM}{GSM accession numbers}

\item{destDir}{destination folder}
}
\description{
download BED supplementary files of a list of GSM accession numbers
}
\author{
G Yu
}


================================================
FILE: man/dropAnno.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/annotatePeak.R
\name{dropAnno}
\alias{dropAnno}
\title{dropAnno}
\usage{
dropAnno(csAnno, distanceToTSS_cutoff = 10000)
}
\arguments{
\item{csAnno}{output of annotatePeak}

\item{distanceToTSS_cutoff}{distance to TSS cutoff}
}
\value{
csAnno object
}
\description{
dropAnno
}
\details{
drop annotation exceeding distanceToTSS_cutoff
}
\author{
Guangchuang Yu
}


================================================
FILE: man/enrichAnnoOverlap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/enrichOverlap.R
\name{enrichAnnoOverlap}
\alias{enrichAnnoOverlap}
\title{enrichAnnoOverlap}
\usage{
enrichAnnoOverlap(
  queryPeak,
  targetPeak,
  TxDb = NULL,
  pAdjustMethod = "BH",
  chainFile = NULL,
  distanceToTSS_cutoff = NULL
)
}
\arguments{
\item{queryPeak}{query bed file}

\item{targetPeak}{target bed file(s) or folder containing bed files}

\item{TxDb}{TxDb}

\item{pAdjustMethod}{pvalue adjustment method}

\item{chainFile}{chain file for liftOver}

\item{distanceToTSS_cutoff}{restrict nearest gene annotation by distance cutoff}
}
\value{
data.frame
}
\description{
calcuate overlap significant of ChIP experiments based on their nearest gene annotation
}
\author{
G Yu
}


================================================
FILE: man/enrichPeakOverlap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/enrichOverlap.R
\name{enrichPeakOverlap}
\alias{enrichPeakOverlap}
\title{enrichPeakOverlap}
\usage{
enrichPeakOverlap(
  queryPeak,
  targetPeak,
  TxDb = NULL,
  pAdjustMethod = "BH",
  nShuffle = 1000,
  chainFile = NULL,
  pool = TRUE,
  mc.cores = detectCores() - 1,
  verbose = TRUE
)
}
\arguments{
\item{queryPeak}{query bed file or GRanges object}

\item{targetPeak}{target bed file(s) or folder that containing bed files or a list of GRanges objects}

\item{TxDb}{TxDb}

\item{pAdjustMethod}{pvalue adjustment method}

\item{nShuffle}{shuffle numbers}

\item{chainFile}{chain file for liftOver}

\item{pool}{logical, whether pool target peaks}

\item{mc.cores}{number of cores, see \link[parallel]{mclapply}}

\item{verbose}{logical}
}
\value{
data.frame
}
\description{
calculate overlap significant of ChIP experiments based on the genome coordinations
}
\author{
G Yu
}


================================================
FILE: man/getAnnoStat.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\name{getAnnoStat}
\alias{getAnnoStat}
\title{getAnnoStat}
\usage{
getAnnoStat(x)
}
\arguments{
\item{x}{csAnno object}
}
\description{
getting status of annotation
}


================================================
FILE: man/getBioRegion.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getBioRegion}
\alias{getBioRegion}
\title{getBioRegion}
\usage{
getBioRegion(
  TxDb = NULL,
  upstream = 1000,
  downstream = 1000,
  by = "gene",
  type = "start_site"
)
}
\arguments{
\item{TxDb}{TxDb}

\item{upstream}{upstream from start site or end site}

\item{downstream}{downstream from start site or end site}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}

\item{type}{one of "start_site", "end_site", "body"}
}
\value{
GRanges object
}
\description{
prepare a bioregion of selected feature
}
\details{
this function combined previous functions getPromoters(), getBioRegion() and getGeneBody() in order
to solve the following issues.

(1) \url{https://github.com/GuangchuangYu/ChIPseeker/issues/16}

(2) \url{https://github.com/GuangchuangYu/ChIPseeker/issues/87}

The getBioRegion() function can prevoid a region of interest from
\code{txdb} object. There are three kinds of regions, \code{start_site},
\code{end_site} and \code{body}. 

We take transcript region to expain the differences of these three regions.
tx: chr1 1000 1400. 

\code{body} region refers to the 1000-1400bp.

\code{start_site} region with \code{upstream = 100, downstream = 100} refers to 900-1100bp. 

\code{end_site} region with \code{upstream = 100, downstream = 100} refers to 1300-1500bp.
}
\author{
Guangchuang Yu, Ming L
}


================================================
FILE: man/getGEOInfo.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GEO.R
\name{getGEOInfo}
\alias{getGEOInfo}
\title{getGEOInfo}
\usage{
getGEOInfo(genome, simplify = TRUE)
}
\arguments{
\item{genome}{genome version}

\item{simplify}{simplify result or not}
}
\value{
data.frame
}
\description{
get subset of GEO information by genome version keyword
}
\author{
G Yu
}


================================================
FILE: man/getGEOgenomeVersion.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GEO.R
\name{getGEOgenomeVersion}
\alias{getGEOgenomeVersion}
\title{getGEOgenomeVersion}
\usage{
getGEOgenomeVersion()
}
\value{
data.frame
}
\description{
get genome version statistics collecting from GEO ChIPseq data
}
\author{
G Yu
}


================================================
FILE: man/getGEOspecies.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/GEO.R
\name{getGEOspecies}
\alias{getGEOspecies}
\title{getGEOspecies}
\usage{
getGEOspecies()
}
\value{
data.frame
}
\description{
accessing species statistics collecting from GEO database
}
\author{
G Yu
}


================================================
FILE: man/getGeneAnno.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/addGeneAnno.R
\name{getGeneAnno}
\alias{getGeneAnno}
\title{getGeneAnno}
\usage{
getGeneAnno(annoDb, geneID, type, columns)
}
\arguments{
\item{annoDb}{annotation package}

\item{geneID}{query geneID}

\item{type}{gene ID type}

\item{columns}{names of columns to be obtained from database}
}
\value{
data.frame
}
\description{
get gene annotation, symbol, gene name etc.
}
\author{
G Yu
}


================================================
FILE: man/getGenomicAnnotation.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getGenomicAnnotation.R
\name{getGenomicAnnotation}
\alias{getGenomicAnnotation}
\title{getGenomicAnnotation}
\usage{
getGenomicAnnotation(
  peaks,
  distance,
  tssRegion = c(-3000, 3000),
  TxDb,
  level,
  genomicAnnotationPriority,
  sameStrand = FALSE
)
}
\arguments{
\item{peaks}{peaks in GRanges object}

\item{distance}{distance of peak to TSS}

\item{tssRegion}{tssRegion, default is -3kb to +3kb}

\item{TxDb}{TxDb object}

\item{level}{one of gene or transcript}

\item{genomicAnnotationPriority}{genomic Annotation Priority}

\item{sameStrand}{whether annotate gene in same strand}
}
\value{
character vector
}
\description{
get Genomic Annotation of peaks
}
\author{
G Yu
}


================================================
FILE: man/getNearestFeatureIndicesAndDistances.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/getNearestFeatureIndicesAndDistances.R
\name{getNearestFeatureIndicesAndDistances}
\alias{getNearestFeatureIndicesAndDistances}
\title{getNearestFeatureIndicesAndDistances}
\usage{
getNearestFeatureIndicesAndDistances(
  peaks,
  features,
  sameStrand = FALSE,
  ignoreOverlap = FALSE,
  ignoreUpstream = FALSE,
  ignoreDownstream = FALSE,
  overlap = "TSS"
)
}
\arguments{
\item{peaks}{peak in GRanges}

\item{features}{features in GRanges}

\item{sameStrand}{logical, whether find nearest gene in the same strand}

\item{ignoreOverlap}{logical, whether ignore overlap of TSS with peak}

\item{ignoreUpstream}{logical, if True only annotate gene at the 3' of the peak.}

\item{ignoreDownstream}{logical, if True only annotate gene at the 5' of the peak.}

\item{overlap}{one of "TSS" or "all"}
}
\value{
list
}
\description{
get index of features that closest to peak and calculate distance
}
\author{
G Yu
}


================================================
FILE: man/getPromoters.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getPromoters}
\alias{getPromoters}
\title{getPromoters}
\usage{
getPromoters(TxDb = NULL, upstream = 1000, downstream = 1000, by = "gene")
}
\arguments{
\item{TxDb}{TxDb}

\item{upstream}{upstream from TSS site}

\item{downstream}{downstream from TSS site}

\item{by}{one of gene or transcript}
}
\value{
GRanges object
}
\description{
prepare the promoter regions
}


================================================
FILE: man/getSampleFiles.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{getSampleFiles}
\alias{getSampleFiles}
\title{getSampleFiles}
\usage{
getSampleFiles()
}
\value{
list of file names
}
\description{
get filenames of sample files
}
\author{
G Yu
}


================================================
FILE: man/getTagMatrix.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getTagMatrix}
\alias{getTagMatrix}
\title{getTagMatrix}
\usage{
getTagMatrix(
  peak,
  upstream,
  downstream,
  windows,
  type,
  by,
  TxDb = NULL,
  weightCol = NULL,
  nbin = NULL,
  verbose = TRUE,
  ignore_strand = FALSE
)
}
\arguments{
\item{peak}{peak peak file or GRanges object}

\item{upstream}{the distance of upstream extension}

\item{downstream}{the distance of downstream extension}

\item{windows}{a collection of region}

\item{type}{one of "start_site", "end_site", "body"}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users}

\item{TxDb}{TxDb or self-made granges object, served as txdb}

\item{weightCol}{column name of weight, default is NULL}

\item{nbin}{the amount of nbines}

\item{verbose}{print message or not}

\item{ignore_strand}{ignore the strand information or not}
}
\value{
tagMatrix
}
\description{
calculate the tag matrix
}
\details{
\code{getTagMatrix()} function can produce the matrix for visualization.
\code{peak} stands for the peak file. 
\code{window} stands for a collection of regions that users want to look into. 
Users can use \code{window} to capture the peak of interest.
There are two ways to input \code{window}. 

The first way is that users can use
\code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()} to 
get \code{window} and put it into \code{getTagMatrix()}. 

The second way is that users can use \code{getTagMatrix()} to
call \code{getPromoters()/getBioRegion()/makeBioRegionFromGranges()}. In this way
users do not need to input \code{window} parameter but they need to input
\code{txdb}. 

\code{txdb} is a set of packages contained annotation 
of regions of different genomes. Users can
get the regions of interest through specific functions. These specific functions
are built in \code{getPromoters()/getBioRegion()}. Many regions can not be gain
through \code{txdb}, like insulator and enhancer regions. 
Users can provide these regions in the form of granges object. 
These self-made granges object will be passed to \code{TxDb} parameter and they will
be passed to \code{makeBioRegionFromGranges()} to produce the \code{window}.
In a word, \code{TxDb} parameter is a reference information. Users can
pass \code{txdb object} or self-made granges into it.

Details see \code{\link{getPromoters}},\code{\link{getBioRegion}} and \code{\link{makeBioRegionFromGranges}}

\code{upstream} and \code{downstream} parameter have different usages:

(1) \code{window} parameter is provided, 

if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
the flank of body region.

if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} do not
play a role in \code{getTagMatrix()} function.

(2) \code{window} parameter is missing,

if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
the flank of body region.

if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
the upstream and downstream of the start_site or the end_site.

\code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details
see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}

\code{nbin} refers to the number of bins. \code{getTagMatrix()} provide a binning method
to get the tag matrix.
}


================================================
FILE: man/getTagMatrix.binning.internal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getTagMatrix.binning.internal}
\alias{getTagMatrix.binning.internal}
\title{getTagMatrix.binning.internal}
\usage{
getTagMatrix.binning.internal(
  peak,
  weightCol = NULL,
  windows,
  nbin = 800,
  upstream = NULL,
  downstream = NULL,
  ignore_strand = FALSE
)
}
\arguments{
\item{peak}{peak peak file or GRanges object}

\item{weightCol}{weightCol column name of weight, default is NULL}

\item{windows}{windows a collection of region with equal or not equal size, eg. promoter region, gene region.}

\item{nbin}{the amount of nbines needed to be splited and it should not be more than min_body_length}

\item{upstream}{rel object, NULL or actual number}

\item{downstream}{rel object, NULL or actual number}

\item{ignore_strand}{ignore the strand information or not}
}
\value{
tagMatrix
}
\description{
calculate the tagMatrix by binning
the idea was derived from the function of deeptools
https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html
}


================================================
FILE: man/getTagMatrix.internal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getTagMatrix.internal}
\alias{getTagMatrix.internal}
\title{getTagMatrix.internal}
\usage{
getTagMatrix.internal(peak, weightCol = NULL, windows, ignore_strand = FALSE)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{weightCol}{column name of weight, default is NULL}

\item{windows}{a collection of region with equal size, eg. promoter region.}

\item{ignore_strand}{ignore the strand information or not}
}
\value{
tagMatrix
}
\description{
calculate the tag matrix
}
\author{
G Yu
}


================================================
FILE: man/getTagMatrix2.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getTagMatrix2}
\alias{getTagMatrix2}
\title{getTagMatrix2}
\usage{
getTagMatrix2(
  peak,
  upstream,
  downstream,
  windows_name,
  type,
  by,
  TxDb = NULL,
  weightCol = NULL,
  nbin = NULL,
  verbose = TRUE,
  ignore_strand = FALSE
)
}
\arguments{
\item{peak}{peak peak file or GRanges object}

\item{upstream}{the distance of upstream extension}

\item{downstream}{the distance of downstream extension}

\item{windows_name}{the names of windows}

\item{type}{one of "start_site", "end_site", "body"}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron', '3UTR' , '5UTR', or specified by users}

\item{TxDb}{TxDb or self-made granges object, served as txdb}

\item{weightCol}{column name of weight, default is NULL}

\item{nbin}{the amount of nbines}

\item{verbose}{print message or not}

\item{ignore_strand}{ignore the strand information or not}
}
\value{
tagMatrix
}
\description{
Nested function for getTagMatrix() to deal with multiple windows
}
\details{
This is an internal function.
}


================================================
FILE: man/getTagMatrix2.binning.internal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getTagMatrix2.binning.internal}
\alias{getTagMatrix2.binning.internal}
\title{internal function}
\usage{
getTagMatrix2.binning.internal(
  peak,
  weightCol = NULL,
  windows,
  windows_name,
  nbin = 800,
  upstream = NULL,
  downstream = NULL,
  ignore_strand = FALSE
)
}
\arguments{
\item{peak}{peak peak file or GRanges object}

\item{weightCol}{column name of weight, default is NULL}

\item{windows}{a collection of region}

\item{windows_name}{the name of windows}

\item{nbin}{the amount of nbines}

\item{upstream}{the distance of upstream extension}

\item{downstream}{the distance of downstream extension}

\item{ignore_strand}{ignore the strand information or not}
}
\description{
internal function
}


================================================
FILE: man/getTagMatrix2.internal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{getTagMatrix2.internal}
\alias{getTagMatrix2.internal}
\title{getTagMatrix2.internal}
\usage{
getTagMatrix2.internal(
  peak,
  weightCol = NULL,
  windows,
  windows_name,
  ignore_strand = FALSE
)
}
\arguments{
\item{peak}{peak peak file or GRanges object}

\item{weightCol}{column name of weight, default is NULL}

\item{windows}{a collection of region}

\item{windows_name}{the name of windows}

\item{ignore_strand}{ignore the strand information or not}
}
\description{
getTagMatrix2.internal
}


================================================
FILE: man/info.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ChIPseeker-package.R
\docType{data}
\name{info}
\alias{info}
\alias{ucsc_release}
\alias{gsminfo}
\alias{tagMatrixList}
\title{Information Datasets}
\description{
ucsc genome version, precalcuated data and gsm information
}
\keyword{datasets}


================================================
FILE: man/makeBioRegionFromGranges.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tagMatrix.R
\name{makeBioRegionFromGranges}
\alias{makeBioRegionFromGranges}
\title{makeBioRegionFromGranges}
\usage{
makeBioRegionFromGranges(gr, by, type, upstream = 1000, downstream = 1000)
}
\arguments{
\item{gr}{a grange object contain region of interest}

\item{by}{specify be users, e.g. gene, insulator, enhancer}

\item{type}{one of "start_site", "end_site", "body"}

\item{upstream}{upstream from start site or end site, can be NULL if the type == 'body'}

\item{downstream}{downstream from start site or end site, can be NULL if the type == 'body'}
}
\value{
GRanges object
}
\description{
make windows from granges object
}
\details{
\code{makeBioRegionFromGranges()} function can make bioregion from granges object.

The differences between \code{makeBioRegionFromGranges()} and \code{getBioRegion()} is that
\code{getBioRegion()} get the region object from \code{txdb} object but
\code{makeBioRegionFromGranges()} get the region from the granges object provided by users.
For example, \code{txdb} object do not contain insulator or enhancer regions. Users can
provide these regions through self-made granges object

There are three kinds of regions, \code{start_site}, \code{end_site} and \code{body}. 

We take enhancer region to explain the differences of these three regions.
enhancer: chr1 1000 1400. 

\code{body} region refers to the 1000-1400bp.

\code{start_site} region with \code{upstream = 100, downstream = 100} refers to 900-1100bp. 

\code{end_site} region with \code{upstream = 100, downstream = 100} refers to 1300-1500bp.

In \code{makeBioRegionFromGranges()}, \code{upstream} and \code{downstream} can be
\code{NULL} if the \code{type == 'body'}. \code{by} should be specified by users and 
can not be omitted. \code{by} parameter will be used to made labels. \code{type} should also
be specified.

\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}
}


================================================
FILE: man/make_label.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{make_label}
\alias{make_label}
\title{make label for figures}
\usage{
make_label(type, by)
}
\arguments{
\item{type}{one of "start_site", "end_site", "body"}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}
}
\description{
make label for figures
}


================================================
FILE: man/overlap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{overlap}
\alias{overlap}
\title{overlap}
\usage{
overlap(Sets)
}
\arguments{
\item{Sets}{a list of objects}
}
\value{
data.frame
}
\description{
calculate the overlap matrix, which is useful for vennplot
}
\author{
G Yu
}


================================================
FILE: man/peakHeatmap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{peakHeatmap}
\alias{peakHeatmap}
\title{peakHeatmap}
\usage{
peakHeatmap(
  peak,
  weightCol = NULL,
  TxDb = NULL,
  upstream = 1000,
  downstream = 1000,
  xlab = "",
  ylab = "",
  title = NULL,
  palette = NULL,
  verbose = TRUE,
  by = "gene",
  type = "start_site",
  nbin = NULL,
  ignore_strand = FALSE,
  windows,
  ncol = NULL,
  nrow = NULL
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{palette}{palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}}

\item{verbose}{print message or not}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}

\item{type}{one of "start_site", "end_site", "body"}

\item{nbin}{the amount of nbines}

\item{ignore_strand}{ignore the strand information or not}

\item{windows}{a collection of region}

\item{ncol}{the ncol of plotting a list of peak}

\item{nrow}{the nrow of plotting a list of peak}
}
\value{
figure
}
\description{
plot the heatmap of peaks
}
\author{
G Yu
}


================================================
FILE: man/peakHeatmap_multiple_Sets.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{peakHeatmap_multiple_Sets}
\alias{peakHeatmap_multiple_Sets}
\title{peakHeatmap}
\usage{
peakHeatmap_multiple_Sets(
  peak,
  weightCol = NULL,
  TxDb = NULL,
  upstream = 1000,
  downstream = 1000,
  xlab = "",
  ylab = "",
  title = NULL,
  palette = NULL,
  verbose = TRUE,
  by = "gene",
  type = "start_site",
  nbin = NULL,
  ignore_strand = FALSE,
  windows_name = NULL,
  ncol = NULL,
  nrow = NULL,
  facet_label_text_size = 12
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{palette}{palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}}

\item{verbose}{print message or not}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}

\item{type}{one of "start_site", "end_site", "body"}

\item{nbin}{the amount of nbines}

\item{ignore_strand}{ignore the strand information or not}

\item{windows_name}{the name for each window, which will also be showed in the picture as labels}

\item{ncol}{the ncol of plotting a list of peak}

\item{nrow}{the nrow of plotting a list of peak}

\item{facet_label_text_size}{the size of facet label text}
}
\value{
figure
}
\description{
plot the heatmap of peaks align to a sets of regions
}


================================================
FILE: man/peak_Profile_Heatmap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{peak_Profile_Heatmap}
\alias{peak_Profile_Heatmap}
\title{peak_Profile_Heatmap}
\usage{
peak_Profile_Heatmap(
  peak,
  weightCol = NULL,
  TxDb = NULL,
  upstream = 1000,
  downstream = 1000,
  xlab = "",
  ylab = "",
  title = NULL,
  palette = NULL,
  verbose = TRUE,
  by = "gene",
  type = "start_site",
  nbin = NULL,
  ignore_strand = FALSE,
  windows_name = NULL,
  ncol = NULL,
  nrow = NULL,
  facet_label_text_size = 12,
  conf,
  facet = "row",
  free_y = TRUE,
  height_proportion = 4
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{palette}{palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}}

\item{verbose}{print message or not}

\item{by}{one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR', 'UTR'}

\item{type}{one of "start_site", "end_site", "body"}

\item{nbin}{the amount of nbines}

\item{ignore_strand}{ignore the strand information or not}

\item{windows_name}{the name for each window, which will also be showed in the picture as labels}

\item{ncol}{the ncol of plotting a list of peak}

\item{nrow}{the nrow of plotting a list of peak}

\item{facet_label_text_size}{the size of facet label text}

\item{conf}{confidence interval}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{height_proportion}{the proportion of profiling picture and heatmap}
}
\description{
plot peak heatmap and profile in a picture
}


================================================
FILE: man/plotAnnoBar-methods.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/csAnno.R
\docType{methods}
\name{plotAnnoBar}
\alias{plotAnnoBar}
\alias{plotAnnoBar,list-method}
\alias{plotAnnoBar,csAnno,ANY-method}
\title{plotAnnoBar method generics}
\usage{
plotAnnoBar(
  x,
  xlab = "",
  ylab = "Percentage(\%)",
  title = "Feature Distribution",
  ...
)

\S4method{plotAnnoBar}{list}(
  x,
  xlab = "",
  ylab = "Percentage(\%)",
  title = "Feature Distribution",
  ...
)

plotAnnoBar(x, xlab="", ylab='Percentage(\%)',title="Feature Distribution", ...)
}
\arguments{
\item{x}{\code{csAnno} instance}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{...}{additional paramter}
}
\value{
plot
}
\description{
plotAnnoBar method for \code{csAnno} instance
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/plotAnnoBar.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotAnno.R
\name{plotAnnoBar.data.frame}
\alias{plotAnnoBar.data.frame}
\title{plotAnnoBar.data.frame}
\usage{
plotAnnoBar.data.frame(
  anno.df,
  xlab = "",
  ylab = "Percentage(\%)",
  title = "Feature Distribution",
  categoryColumn
)
}
\arguments{
\item{anno.df}{annotation stats}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{plot title}

\item{categoryColumn}{category column}
}
\value{
bar plot that summarize genomic features of peaks
}
\description{
plot feature distribution based on their chromosome region
}
\details{
plot chromosome region features
}
\seealso{
\code{\link{annotatePeak}} \code{\link{plotAnnoPie}}
}
\author{
Guangchuang Yu \url{https://yulab-smu.top}
}


================================================
FILE: man/plotAnnoPie-methods.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/csAnno.R
\docType{methods}
\name{plotAnnoPie}
\alias{plotAnnoPie}
\alias{plotAnnoPie,csAnno,ANY-method}
\title{plotAnnoPie method generics}
\usage{
plotAnnoPie(
  x,
  ndigit = 2,
  cex = 0.9,
  col = NA,
  legend.position = "rightside",
  pie3D = FALSE,
  radius = 0.8,
  ...
)

plotAnnoPie(x,ndigit=2,cex=0.9,col=NA,legend.position="rightside",pie3D=FALSE,radius=0.8,...)
}
\arguments{
\item{x}{\code{csAnno} instance}

\item{ndigit}{number of digit to round}

\item{cex}{label cex}

\item{col}{color}

\item{legend.position}{topright or other.}

\item{pie3D}{plot in 3D or not}

\item{radius}{radius of the pie}

\item{...}{extra parameter}
}
\value{
plot
}
\description{
plotAnnoPie method for \code{csAnno} instance
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/plotAnnoPie.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotAnno.R
\name{plotAnnoPie.csAnno}
\alias{plotAnnoPie.csAnno}
\title{plotAnnoPie}
\usage{
plotAnnoPie.csAnno(
  x,
  ndigit = 2,
  cex = 0.8,
  col = NA,
  legend.position = "rightside",
  pie3D = FALSE,
  radius = 0.8,
  ...
)
}
\arguments{
\item{x}{csAnno object}

\item{ndigit}{number of digit to round}

\item{cex}{label cex}

\item{col}{color}

\item{legend.position}{topright or other.}

\item{pie3D}{plot in 3D or not}

\item{radius}{radius of Pie}

\item{...}{extra parameter}
}
\value{
pie plot of peak genomic feature annotation
}
\description{
pieplot from peak genomic annotation
}
\examples{
\dontrun{
require(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
peakfile <- system.file("extdata", "sample_peaks.txt", package="chipseeker")
peakAnno <- annotatePeak(peakfile, TxDb=txdb)
plotAnnoPie(peakAnno)
}
}
\seealso{
\code{\link{annotatePeak}} \code{\link{plotAnnoBar}}
}
\author{
Guangchuang Yu \url{https://yulab-smu.top}
}


================================================
FILE: man/plotAvgProf.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotAvgProf}
\alias{plotAvgProf}
\title{plotAvgProf}
\usage{
plotAvgProf(
  tagMatrix,
  xlim,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  conf,
  facet = "none",
  free_y = TRUE,
  origin_label = "TSS",
  verbose = TRUE,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix or a list of tagMatrix}

\item{xlim}{xlim}

\item{xlab}{x label}

\item{ylab}{y label}

\item{conf}{confidence interval}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{origin_label}{label of the center}

\item{verbose}{print message or not}

\item{...}{additional parameter}
}
\value{
ggplot object
}
\description{
plot the profile of peaks
}
\author{
G Yu; Y Yan
}


================================================
FILE: man/plotAvgProf.binning.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotAvgProf.binning}
\alias{plotAvgProf.binning}
\title{plotAvgProf.binning}
\usage{
plotAvgProf.binning(
  tagMatrix,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  conf,
  facet = "none",
  free_y = TRUE,
  upstream = NULL,
  downstream = NULL,
  label,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix or a list of tagMatrix}

\item{xlab}{x label}

\item{ylab}{y label}

\item{conf}{confidence interval}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled}

\item{upstream}{rel object reflects the percentage of flank extension, e.g rel(0.2)
integer reflects the actual length of flank extension or TSS region
NULL reflects the gene body with no extension}

\item{downstream}{rel object reflects the percentage of flank extension, e.g rel(0.2)
integer reflects the actual length of flank extension or TSS region
NULL reflects the gene body with no extension}

\item{label}{label}

\item{...}{additional parameter}
}
\value{
ggplot object
}
\description{
plot the profile of peaks  by binning
}


================================================
FILE: man/plotAvgProf2.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotAvgProf2}
\alias{plotAvgProf2}
\title{plotAvgProf}
\usage{
plotAvgProf2(
  peak,
  weightCol = NULL,
  TxDb = NULL,
  upstream = 1000,
  downstream = 1000,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  conf,
  facet = "none",
  free_y = TRUE,
  verbose = TRUE,
  ignore_strand = FALSE,
  ...
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{conf}{confidence interval}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{verbose}{print message or not}

\item{ignore_strand}{ignore the strand information or not}

\item{...}{additional parameter}
}
\value{
ggplot object
}
\description{
plot the profile of peaks that align to flank sequences of TSS
}
\details{
This function is the old function of \code{plotPeakProf2}. It can
only plot the start site region of gene.
}
\author{
G Yu, Ming L
}


================================================
FILE: man/plotDistToTSS-methods.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/csAnno.R
\docType{methods}
\name{plotDistToTSS}
\alias{plotDistToTSS}
\alias{plotDistToTSS,list-method}
\alias{plotDistToTSS,csAnno,ANY-method}
\title{plotDistToTSS method generics}
\usage{
plotDistToTSS(
  x,
  distanceColumn = "distanceToTSS",
  xlab = "",
  ylab = "Binding sites (\%) (5'->3')",
  title = "Distribution of transcription factor-binding loci relative to TSS",
  ...
)

\S4method{plotDistToTSS}{list}(
  x,
  distanceColumn = "distanceToTSS",
  xlab = "",
  ylab = "Binding sites (\%) (5'->3')",
  title = "Distribution of transcription factor-binding loci relative to TSS",
  distanceBreaks = c(0, 1000, 3000, 5000, 10000, 1e+05),
  palette = NULL,
  ...
)

plotDistToTSS(x,distanceColumn="distanceToTSS", xlab="",
ylab="Binding sites (\%) (5'->3')",
title="Distribution of transcription factor-binding loci relative to TSS",...)
}
\arguments{
\item{x}{\code{csAnno} instance}

\item{distanceColumn}{distance column name}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{...}{additional parameter}

\item{distanceBreaks}{breaks of distance, default is 'c(0, 1000, 3000, 5000, 10000, 100000)'}

\item{palette}{palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.}
}
\value{
plot
}
\description{
plotDistToTSS method for \code{csAnno} instance
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/plotDistToTSS.data.frame.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotDistToTSS.R
\name{plotDistToTSS.data.frame}
\alias{plotDistToTSS.data.frame}
\title{plotDistToTSS.data.frame}
\usage{
plotDistToTSS.data.frame(
  peakDist,
  distanceColumn = "distanceToTSS",
  distanceBreaks = c(0, 1000, 3000, 5000, 10000, 1e+05),
  palette = NULL,
  xlab = "",
  ylab = "Binding sites (\%) (5'->3')",
  title = "Distribution of transcription factor-binding loci relative to TSS",
  categoryColumn = ".id"
)
}
\arguments{
\item{peakDist}{peak annotation}

\item{distanceColumn}{column name of the distance from peak to nearest gene}

\item{distanceBreaks}{default is 'c(0, 1000, 3000, 5000, 10000, 100000)'}

\item{palette}{palette name for coloring different distances. Run `RColorBrewer::display.brewer.all()` to see all applicable values.}

\item{xlab}{x label}

\item{ylab}{y lable}

\item{title}{figure title}

\item{categoryColumn}{category column, default is ".id"}
}
\value{
bar plot that summarize distance from peak to
TSS of the nearest gene.
}
\description{
plot feature distribution based on the distances to the TSS
}
\examples{
\dontrun{
require(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
peakfile <- system.file("extdata", "sample_peaks.txt", package="ChIPseeker")
peakAnno <- annotatePeak(peakfile, TxDb=txdb)
plotDistToTSS(peakAnno)
}
}
\seealso{
\code{\link{annotatePeak}}
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/plotMultiProf.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotMultiProf}
\alias{plotMultiProf}
\title{internal function for plotPeakProf_MultiWindows}
\usage{
plotMultiProf(
  tagMatrix,
  conf,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  facet = "none",
  free_y = TRUE,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix}

\item{conf}{confidence interval}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{...}{additional parameter}
}
\description{
internal function for plotPeakProf_MultiWindows
}


================================================
FILE: man/plotMultiProf.binning.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotMultiProf.binning}
\alias{plotMultiProf.binning}
\title{internal function}
\usage{
plotMultiProf.binning(
  tagMatrix,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  conf,
  facet = "none",
  free_y = TRUE,
  upstream = NULL,
  downstream = NULL,
  label,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{conf}{confidence interval}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{upstream}{the upstream extension}

\item{downstream}{the downstream extension}

\item{label}{the label of the center}

\item{...}{additional parameter}
}
\description{
internal function
}


================================================
FILE: man/plotMultiProf.binning.internal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotMultiProf.binning.internal}
\alias{plotMultiProf.binning.internal}
\title{internal function}
\usage{
plotMultiProf.binning.internal(
  tagMatrix,
  conf,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  facet = "none",
  free_y = TRUE,
  upstream = NULL,
  downstream = NULL,
  label,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix}

\item{conf}{confidence interval}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{upstream}{the upstream extension}

\item{downstream}{the downstream extension}

\item{label}{the label of the center}

\item{...}{additional parameter}
}
\description{
internal function
}


================================================
FILE: man/plotMultiProf.normal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotMultiProf.normal}
\alias{plotMultiProf.normal}
\title{internal function}
\usage{
plotMultiProf.normal(
  tagMatrix,
  xlim,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  conf,
  facet = "none",
  free_y = TRUE,
  origin_label = "TSS",
  verbose = TRUE,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix}

\item{xlim}{xlim}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{conf}{confidence interval}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{origin_label}{the label of the center}

\item{verbose}{print message or not}

\item{...}{additional parameter}
}
\description{
internal function
}


================================================
FILE: man/plotMultiProf.normal.internal.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotMultiProf.normal.internal}
\alias{plotMultiProf.normal.internal}
\title{internal function}
\usage{
plotMultiProf.normal.internal(
  tagMatrix,
  conf,
  xlim = c(-3000, 3000),
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  facet = "row",
  free_y = TRUE,
  origin_label,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix}

\item{conf}{confidence interval}

\item{xlim}{xlim}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{origin_label}{the label of the center}

\item{...}{additional parameter}
}
\description{
internal function
}


================================================
FILE: man/plotPeakProf.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotPeakProf}
\alias{plotPeakProf}
\title{plotPeakProf_MultiWindows}
\usage{
plotPeakProf(
  tagMatrix = NULL,
  peak,
  upstream,
  downstream,
  conf,
  by,
  type,
  windows_name = NULL,
  weightCol = NULL,
  TxDb = NULL,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  facet = "row",
  free_y = TRUE,
  verbose = TRUE,
  nbin = NULL,
  ignore_strand = FALSE,
  ...
)
}
\arguments{
\item{tagMatrix}{tagMatrix or a list of tagMatrix}

\item{peak}{peak file or GRanges object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{conf}{confidence interval}

\item{by}{feature of interest}

\item{type}{one of "start_site", "end_site", "body"}

\item{windows_name}{the name for each window, which will also be showed in the picture as labels}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object or self-made granges objects}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{verbose}{print message or not}

\item{nbin}{the amount of bines}

\item{ignore_strand}{ignore the strand information or not}

\item{...}{additional parameter}
}
\value{
ggplot object
}
\description{
plot the profile of peaks
`
\code{plotPeakProf_MultiWindows()} is almost the same as \code{plotPeakProf2()}, having
the main difference of accepting two or more granges objects. Accepting more
granges objects can help compare the same peaks in different windows.
}
\details{
\code{TxDb} parameter can accept txdb object.
But many regions can not be obtained by txdb object. In this case,
Users can provide self-made granges served the same role 
as txdb object and pass to \code{TxDb} object.

\code{by} the features of interest. 

(1) if users use \code{txdb}, \code{by} can be one of 'gene', 'transcript', 'exon', 
'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.

(2) if users use self-made granges object, \code{by} can be everything. Because this \code{by}
will not pass to functions to get features, which is different from the case of using 
txdb object. This \code{by} is only used to made labels showed in picture.

\code{type} means the property of the region. one of the "start site",
"end site" and "body".

\code{upstream} and \code{downstream} parameter have different usages:

(1) if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
the flank of body region.

(2) if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
the upstream and downstream of the start_site or the end_site.

\code{weightCol} refers to column in peak file. This column acts as a weight value. Details
see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}

\code{nbin} refers to the number of bins. \code{getTagMatrix()} provide a binning method
to get the tag matrix.

There are two ways input a list of window.

(1) Users can input a list of self-made granges objects

(2) Users can input a list of \code{by} and only one \code{type}. In this way, 
\code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \code{by} and \code{type}.

Warning: 

(1) All of these window should be the same type. It means users can only
compare a list of "start site"/"end site"/"body region" with the same upstream
and downstream.

(2) So it will be only one \code{type} and several \code{by}.

(3) Users can make window by txdb object or self-made granges object. Users can only
choose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the
way of using txdb object. User can input any \code{by} in the way of using 
self-made granges object.

(4) Users can mingle the \code{by} designed for the two ways. \code{plotPeakProf_MultiWindows} can
accpet the hybrid \code{by}. But the above rules should be followed.

\url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}
}


================================================
FILE: man/plotPeakProf2.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotPeakProf2}
\alias{plotPeakProf2}
\title{plotPeakProf2}
\usage{
plotPeakProf2(
  peak,
  upstream,
  downstream,
  conf,
  by,
  type,
  weightCol = NULL,
  TxDb = NULL,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  facet = "none",
  free_y = TRUE,
  verbose = TRUE,
  nbin = NULL,
  ignore_strand = FALSE,
  ...
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{conf}{confidence interval}

\item{by}{e.g. 'gene', 'transcript', 'exon' or features of interest(e.g. "enhancer")}

\item{type}{one of "start_site", "end_site", "body"}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object, or self-made granges object}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{verbose}{print message or not}

\item{nbin}{the amount of nbines}

\item{ignore_strand}{ignore the strand information or not}

\item{...}{additional parameter}
}
\value{
ggplot object
}
\description{
plot the profile of peaks automatically
}
\details{
\code{peak} stands for the peak file. 

\code{by} the features of interest. 

(1) if users use \code{txdb}, \code{by} can be one of 'gene', 'transcript', 'exon', 
'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.

(2) if users use self-made granges object, \code{by} can be everything. Because this \code{by}
will not pass to functions to get features, which is different from the case of using 
txdb object. This \code{by} is only used to made labels showed in picture.

\code{type} means the property of the region. one of the "start site",
"end site" and "body".

\code{upstream} and \code{downstream} parameter have different usages:

(1) if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
the flank of body region.

(2) if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
the upstream and downstream of the start_site or the end_site.

\code{weightCol} refers to column in peak file. This column acts as a weight vaule. Details
see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}

\code{nbin} refers to the number of bins, providing a binning method
to get the tag matrix.

\code{TxDb} parameter can accept txdb object.
But many regions can not be obtained by txdb object. In this case,
Users can provide self-made granges served the same role 
as txdb object and pass to \code{TxDb} object.

\code{plotPeakProf2()} is different from the \code{plotPeakProf()}. \code{plotPeakProf2()} do not
need to provide \code{window} parameter, which means \code{plotPeakProf2()} will call relevent
functions to make \code{window} automatically.
}
\author{
G Yu, Ming Li
}


================================================
FILE: man/plotPeakProf_MultiWindows.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{plotPeakProf_MultiWindows}
\alias{plotPeakProf_MultiWindows}
\title{plotPeakProf_MultiWindows}
\usage{
plotPeakProf_MultiWindows(
  peak,
  upstream,
  downstream,
  conf,
  by,
  type,
  windows_name = NULL,
  weightCol = NULL,
  TxDb = NULL,
  xlab = "Genomic Region (5'->3')",
  ylab = "Peak Count Frequency",
  facet = "row",
  free_y = TRUE,
  verbose = TRUE,
  nbin = NULL,
  ignore_strand = FALSE,
  ...
)
}
\arguments{
\item{peak}{peak file or GRanges object}

\item{upstream}{upstream position}

\item{downstream}{downstream position}

\item{conf}{confidence interval}

\item{by}{feature of interest}

\item{type}{one of "start_site", "end_site", "body"}

\item{windows_name}{the name for each window, which will also be showed in the picture as labels}

\item{weightCol}{column name of weight}

\item{TxDb}{TxDb object or self-made granges objects}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{facet}{one of 'none', 'row' and 'column'}

\item{free_y}{if TRUE, y will be scaled by AvgProf}

\item{verbose}{print message or not}

\item{nbin}{the amount of bines}

\item{ignore_strand}{ignore the strand information or not}

\item{...}{additional parameter}
}
\value{
ggplot object
}
\description{
plot the profile of peaks in two or more windows
}
\details{
This function comes from \url{https://github.com/YuLab-SMU/ChIPseeker/issues/189}
`
\code{plotPeakProf_MultiWindows()} is almost the same as \code{plotPeakProf2()}, having
the main difference of accepting two or more granges objects. Accepting more
granges objects can help compare the same peaks in different windows.

\code{TxDb} parameter can accept txdb object.
But many regions can not be obtained by txdb object. In this case,
Users can provide self-made granges served the same role 
as txdb object and pass to \code{TxDb} object.

\code{by} the features of interest. 

(1) if users use \code{txdb}, \code{by} can be one of 'gene', 'transcript', 'exon', 
'intron' , '3UTR' , '5UTR', 'UTR'. These features can be obtained by functions from txdb object.

(2) if users use self-made granges object, \code{by} can be everything. Because this \code{by}
will not pass to functions to get features, which is different from the case of using 
txdb object. This \code{by} is only used to made labels showed in picture.

\code{type} means the property of the region. one of the "start site",
"end site" and "body".

\code{upstream} and \code{downstream} parameter have different usages:

(1) if \code{type == 'body'}, \code{upstream} and \code{downstream} can use to extend 
the flank of body region.

(2) if \code{type == 'start_site'/'end_site'}, \code{upstream} and \code{downstream} refer to
the upstream and downstream of the start_site or the end_site.

\code{weightCol} refers to column in peak file. This column acts as a weight value. Details
see \url{https://github.com/YuLab-SMU/ChIPseeker/issues/15}

\code{nbin} refers to the number of bins. \code{getTagMatrix()} provide a binning method
to get the tag matrix.

There are two ways input a list of window.

(1) Users can input a list of self-made granges objects

(2) Users can input a list of \code{by} and only one \code{type}. In this way, 
\code{plotPeakProf_MultiWindows()} can made a list of window from txdb object based on \code{by} and \code{type}.

Warning: 

(1) All of these window should be the same type. It means users can only
compare a list of "start site"/"end site"/"body region" with the same upstream
and downstream.

(2) So it will be only one \code{type} and several \code{by}.

(3) Users can make window by txdb object or self-made granges object. Users can only
choose one of 'gene', 'transcript', 'exon', 'intron' , '3UTR' , '5UTR' or 'UTR' in the
way of using txdb object. User can input any \code{by} in the way of using 
self-made granges object.

(4) Users can mingle the \code{by} designed for the two ways. \code{plotPeakProf_MultiWindows} can
accpet the hybrid \code{by}. But the above rules should be followed.
}


================================================
FILE: man/readPeakFile.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/readPeakFile.R
\name{readPeakFile}
\alias{readPeakFile}
\title{readPeakFile}
\usage{
readPeakFile(peakfile, as = "GRanges", ...)
}
\arguments{
\item{peakfile}{peak file}

\item{as}{output format, one of GRanges or data.frame}

\item{...}{additional parameter (pass to `utils::read.delim()`)}
}
\value{
peak information, in GRanges or data.frame object
}
\description{
read peak file and store in data.frame or GRanges object
}
\examples{
peakfile <- system.file("extdata", "sample_peaks.txt", package="ChIPseeker")
peak.gr <- readPeakFile(peakfile, as="GRanges")
peak.gr
}
\author{
G Yu
}


================================================
FILE: man/reexports.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\docType{import}
\name{reexports}
\alias{reexports}
\alias{GRangesList}
\alias{rel}
\title{Objects exported from other packages}
\keyword{internal}
\description{
These objects are imported from other packages. Follow the links
below to see their documentation.

\describe{
  \item{GenomicRanges}{\code{\link[GenomicRanges:GRangesList-class]{GRangesList}}}

  \item{ggplot2}{\code{\link[ggplot2:element]{rel}}}
}}


================================================
FILE: man/seq2gene.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/seq2gene.R
\name{seq2gene}
\alias{seq2gene}
\title{seq2gene}
\usage{
seq2gene(seq, tssRegion, flankDistance, TxDb, sameStrand = FALSE)
}
\arguments{
\item{seq}{genomic regions in GRanges object}

\item{tssRegion}{TSS region}

\item{flankDistance}{flanking search radius}

\item{TxDb}{TranscriptDb object}

\item{sameStrand}{logical whether find nearest/overlap gene in the same strand}
}
\value{
gene vector
}
\description{
annotate genomic regions to genes in many-to-many mapping
}
\details{
This funciton associates genomic regions with coding genes in a many-to-many mapping. It first maps genomic regions to host genes (either located in exon or intron), proximal genes (located in promoter regions) and flanking genes (located in upstream and downstream within user specify distance).
}
\examples{
\dontrun{
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
TxDb <- TxDb.Hsapiens.UCSC.hg19.knownGene
file <- getSampleFiles()[[1]] # a bed file
gr <- readPeakFile(file)
genes <- seq2gene(gr, tssRegion=c(-1000, 1000), flankDistance = 3000, TxDb) 
}
}
\author{
Guangchuang Yu
}


================================================
FILE: man/show-methods.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\docType{methods}
\name{show}
\alias{show}
\alias{show,csAnno,ANY-method}
\title{show method}
\usage{
show(object)
}
\arguments{
\item{object}{A \code{csAnno} instance}
}
\value{
message
}
\description{
show method for \code{csAnno} instance
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/shuffle.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/enrichOverlap.R
\name{shuffle}
\alias{shuffle}
\title{shuffle}
\usage{
shuffle(peak.gr, TxDb)
}
\arguments{
\item{peak.gr}{GRanges object}

\item{TxDb}{TxDb}
}
\value{
GRanges object
}
\description{
shuffle the position of peak
}
\author{
G Yu
}


================================================
FILE: man/tagHeatmap.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotTagMatrix.R
\name{tagHeatmap}
\alias{tagHeatmap}
\title{tagHeatmap}
\usage{
tagHeatmap(
  tagMatrix,
  xlab = "",
  ylab = "",
  title = NULL,
  palette = "RdBu",
  nrow = NULL,
  ncol = NULL
)
}
\arguments{
\item{tagMatrix}{tagMatrix or a list of tagMatrix}

\item{xlab}{xlab}

\item{ylab}{ylab}

\item{title}{title}

\item{palette}{palette to be filled in,details see \link[ggplot2]{scale_colour_brewer}}

\item{nrow}{the nrow of plotting a list of peak}

\item{ncol}{the ncol of plotting a list of peak}
}
\value{
figure
}
\description{
plot the heatmap of tagMatrix
}
\author{
G Yu
}


================================================
FILE: man/upsetplot-methods.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csAnno.R
\docType{methods}
\name{upsetplot}
\alias{upsetplot}
\title{upsetplot method}
\usage{
upsetplot(x, ...)
}
\arguments{
\item{x}{A \code{csAnno} instance}

\item{...}{additional parameter}
}
\value{
plot
}
\description{
upsetplot method generics
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/vennpie-methods.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/AllGenerics.R, R/csAnno.R
\docType{methods}
\name{vennpie}
\alias{vennpie}
\title{vennpie method generics}
\usage{
vennpie(x, r = 0.2, cex = 1.2, ...)

vennpie(x, r = 0.2, cex=1.2, ...)
}
\arguments{
\item{x}{A \code{csAnno} instance}

\item{r}{initial radius}

\item{cex}{value to adjust legend}

\item{...}{additional parameter}
}
\value{
plot
}
\description{
vennpie method generics
}
\author{
Guangchuang Yu \url{https://guangchuangyu.github.io}
}


================================================
FILE: man/vennplot.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vennplot.R
\name{vennplot}
\alias{vennplot}
\title{vennplot}
\usage{
vennplot(Sets, by = "gplots", ...)
}
\arguments{
\item{Sets}{a list of object, can be vector or GRanges object}

\item{by}{one of gplots, ggVennDiagram or Vennerable}

\item{...}{extra parameters using ggVennDiagram. Details see \link[ggVennDiagram]{ggVennDiagram}}
}
\value{
venn plot that summarize the overlap of peaks
from different experiments or gene annotation from
different peak files.
}
\description{
plot the overlap of a list of object
}
\details{
There are two ways to plot, which users can specify through `by`.

The first way is to use `gplots` packages, by setting `by = gplots`. This method
is default method. The venn plot produced through this way has no color.

The second way is to use `ggVennDiagram` packages, by setting `by = ggVennDiagram`. 
The venn plot produced through this way has colors which can be defined by users using
ggplot2 grammar e.g.(scale_fill_distiller()). And users can specify any details, like digital number,
text size and showing percentage or not, by inputting `...` extra parameters.
}
\examples{
## example not run
## require(TxDb.Hsapiens.UCSC.hg19.knownGene)
## txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
## peakfiles <- getSampleFiles()
## peakAnnoList <- lapply(peakfiles, annotatePeak)
## names(peakAnnoList) <- names(peakfiles)
## genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
## vennplot(genes)
}
\author{
G Yu
}


================================================
FILE: man/vennplot.peakfile.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vennplot.R
\name{vennplot.peakfile}
\alias{vennplot.peakfile}
\title{vennplot.peakfile}
\usage{
vennplot.peakfile(files, labels = NULL)
}
\arguments{
\item{files}{peak files}

\item{labels}{labels for peak files}
}
\value{
figure
}
\description{
vennplot for peak files
}
\author{
G Yu
}


================================================
FILE: tests/testthat/test-bed.R
================================================
library(ChIPseeker)

context("bed file")

test_that("parse bed file", {
    files <- getSampleFiles()
    for (i in seq_along(files)) {
        expect_true(is(readPeakFile(files[[i]]), "GRanges"))
    }
})


================================================
FILE: tests/testthat/test-getTagMatrix.R
================================================
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)

context("test getTagMatrix() and related functions")

test_that("getBioRegion function", {
  
  # test three kinds of regions derived from getBioRegion()
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  
  gene_start <- getBioRegion(TxDb = txdb,
                             upstream = 1000,
                             downstream = 1000,
                             by = 'gene',
                             type = "start_site")
  expect_is(gene_start,"GRanges")
  
  gene_end <- getBioRegion(TxDb = txdb,
                           upstream = 1000,
                           downstream = 1000,
                           by = 'gene',
                           type = "end_site")
  expect_is(gene_end,"GRanges")
  
  gene_body <- getBioRegion(TxDb = txdb,
                            upstream = 1000,
                            downstream = 1000,
                            by = 'gene',
                            type = "body")
  expect_is(gene_body,"GRanges")
  
  })

test_that("getPromoters functions",{
  
  # test two kinds of regions derived from getPromoters
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  
  gene <- getPromoters(TxDb=txdb,
                       upstream=1000,
                       downstream=1000,
                       by = "gene")
  
  transcript <- getPromoters(TxDb=txdb,
                             upstream=1000,
                             downstream=1000,
                             by = "transcript")
  
  expect_is(gene,"GRanges")
  expect_is(transcript,"GRanges")
  
})

test_that("makeBioRegionFromGranges function",{
  
  # we consider transcript region as enhancer region
  # and make self-made granges object
  # they can be the same in the form of granges object
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  enhancer <- transcripts(txdb)[1:5000,]
  
  ## we test three kinds of region, start_site, end_site and body
  enhancer_body <- makeBioRegionFromGranges(gr = enhancer,
                                            by = "enhancer",
                                            type = "body")
  
  enhancer_start <- makeBioRegionFromGranges(gr = enhancer,
                                             by = "enhancer",
                                             type = "start_site",
                                             upstream = 1000,
                                             downstream = 1000)
  
  enhancer_end <- makeBioRegionFromGranges(gr = enhancer,
                                           by = "enhancer",
                                           type = "end_site",
                                           upstream = 1000,
                                           downstream = 1000)
  
  expect_is(enhancer_body,"GRanges")
  expect_is(enhancer_start,"GRanges")
  expect_is(enhancer_end,"GRanges")
  
  ## test the label
  expect_equal(attr(enhancer_body,'label'),c("enhancer_SS","enhancer_TS"))
  expect_equal(attr(enhancer_start,'label'),"enhancer_SS")
  expect_equal(attr(enhancer_end,'label'),"enhancer_TS")
  
})

test_that("getTagMatrix function for single peak file",{
  
  peak <- getSampleFiles()[[4]]
  txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
  
  # make the window by getBioRegion()
  gene_start <- getBioRegion(TxDb = txdb,
                             upstream = 1000,
                             downstream = 1000,
                             by = 'gene',
                             type = "start_site")
  
  # make the window by makeBioRegionFromGranges()
  enhancer <- transcripts(txdb)[1:5000,]
  
  enhancer_body <- makeBioRegionFromGranges(gr = enhancer,
                                            by = "enhancer",
                                            type = "body")
  
  # test input window parameter
  mt1 <- getTagMatrix(peak = peak,
                      windows = gene_start,
                      weightCol = "V5")
  
  expect_is(mt1, "matrix")
  
  # without extending flank
  mt2_1 <- getTagMatrix(peak = peak,
                        windows = enhancer_body,
                        weightCol = "V5",
                        nbin = 800)
  
  expect_is(mt2_1, "matrix")
  
  # extend flank by rel object
  mt2_2 <- getTagMatrix(peak = peak,
                        windows = enhancer_body,
                        weightCol = "V5",
                        upstream = rel(0.2),
                        downstream = rel(0.2),
                        nbin = 800)
  
  expect_is(mt2_2, "matrix")
  
  # extend flank by actual number
  mt2_3 <- getTagMatrix(peak = peak,
                        windows = enhancer_body,
                        weightCol = "V5",
                        upstream = 1000,
                        downstream = 1000,
                        nbin = 800)
  
  expect_is(mt2_3, "matrix")
  
  # test input without window parameter 
  
  # make window through txdb object
  mt3 <- getTagMatrix(peak = peak,
                      weightCol = "V5",
                      TxDb = txdb,
                      by = "gene",
                      type = "start_site",
                      upstream = 3000,
                      downstream = 3000)
  
  expect_is(mt3, "matrix")
  
  # make window through self-made grange object
  mt4 <- getTagMatrix(peak = peak,
                      weightCol = "V5",
                      TxDb = enhancer,
                      by = "gene",
                      type = "start_site",
                      upstream = 1000,
                      downstream = 1000)
  
  expect_is(mt4, "matrix")
  
  # without extending flank
  mt5_1 <- getTagMatrix(peak = peak,
                        weightCol = "V5",
                        TxDb = txdb,
                        by = "gene",
                        type = "body",
                        nbin = 800)
  
  expect_is(mt5_1, "matrix")
  
  # extend flank by rel object
  mt5_2 <- getTagMatrix(peak = peak,
                        TxDb = enhancer,
                        weightCol = "V5",
                        by = "enhancer",
                        type = "body",
                        upstream = rel(0.2),
                        downstream = rel(0.2),
                        nbin = 800)
  
  expect_is(mt5_2, "matrix")
  
  # extend flank by actual number
  mt5_3 <- getTagMatrix(peak = peak,
                        TxDb = txdb,
                        weightCol = "V5",
                        by = "gene",
                        type = "body",
                        upstream = 1000,
                        downstream = 1000,
                        nbin = 800)
  
  expect_is(mt5_3, "matrix")
  
})


================================================
FILE: tests/testthat/test-txdb.R
================================================
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
library(ChIPseeker)
library(yulab.utils)

context("TXDB")

test_that("Update txdb", {
    hg19_txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    ChIPseeker:::.ChIPseekerEnv(hg19_txdb)
    expect_equal(ChIPseeker:::get_env_genome(), "hg19")

    hg38_txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
    ChIPseeker:::.ChIPseekerEnv(hg38_txdb)
    expect_equal(ChIPseeker:::get_env_genome(), "hg38")
})


test_that("txdb", {
    txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
    ChIPseeker:::.ChIPseekerEnv(txdb)
    expect_equal(ChIPseeker:::IDType(txdb), "Entrez Gene ID")

    if (packageVersion("TxDb.Hsapiens.UCSC.hg19.knownGene") > "3.22") {
        expect_equal(
            ChIPseeker:::TXID2EG("70455"),
            "ENST00000487630.1_3/ENST00000487630.1_3"
        )
        expect_equal(
            ChIPseeker:::TXID2EG("70455", geneIdOnly = TRUE),
            "ENST00000487630.1_3"
        )
    } else {
        expect_equal(ChIPseeker:::TXID2EG("70455"), "uc002qsd.4/1")
        expect_equal(ChIPseeker:::TXID2EG("70455", geneIdOnly = TRUE), "1")
    }
})


================================================
FILE: tests/testthat.R
================================================
library(testthat)
library(ChIPseeker)

test_check("ChIPseeker")


================================================
FILE: vignettes/ChIPseeker.Rmd
================================================
---
title: "ChIPseeker: an R package for ChIP peak Annotation, Comparison and Visualization"
author: "Guangchuang Yu\\

        School of Basic Medical Sciences, Southern Medical University"
date: "`r Sys.Date()`"
bibliography: ChIPseeker.bib
biblio-style: apalike
output:
  prettydoc::html_pretty:
    toc: true
    theme: cayman
    highlight: github
  pdf_document:
    toc: true
vignette: >
  %\VignetteIndexEntry{ChIPseeker: an R package for ChIP peak Annotation, Comparison and Visualization}
  %\VignetteEngine{knitr::rmarkdown}
  %\usepackage[utf8]{inputenc}
  %\VignetteEncoding{UTF-8}
---


```{r style, echo=FALSE, results='asis', message=FALSE}
knitr::opts_chunk$set(tidy         = FALSE,
                      warning      = FALSE,
                      message      = FALSE)

library(yulab.utils)
Biocannopkg <- yulab.utils::Biocpkg
```

```{r echo=FALSE, results='hide', message=FALSE}
library(GenomicFeatures)
library(GenomicRanges)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
library(ggplot2)
library(clusterProfiler)
library(ReactomePA)
library(ChIPseeker)
```

# Abstract

ChIPseeker is an R package for annotating ChIP-seq data analysis. It supports annotating ChIP peaks and provides functions to visualize ChIP peaks coverage over chromosomes and profiles of peaks binding to TSS regions. Comparison of ChIP peak profiles and annotation are also supported. Moreover, it supports evaluating significant overlap among ChIP-seq datasets. Currently, ChIPseeker contains 17,000 bed file information from GEO database. These datasets can be downloaded and compare with user's own data to explore significant overlap datasets for inferring co-regulation or transcription factor complex for further investigation.


# Citation

If you use `r Biocpkg("ChIPseeker")`[@yu_chipseeker_2015] in published research, please cite:

+ Q Wang<sup>#</sup>, M Li<sup>#</sup>, T Wu, L Zhan, L Li, M Chen, W Xie, Z Xie, E Hu, S Xu, __G Yu__<sup>\*</sup>. [Exploring epigenomic datasets by ChIPseeker](https://onlinelibrary.wiley.com/share/author/GYJGUBYCTRMYJFN2JFZZ?target=10.1002/cpz1.585). __*Current Protocols*__, 2022, 2(10): e585. 
+ __G Yu__<sup>\*</sup>, LG Wang, QY He<sup>\*</sup>. [ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparision and visualization](http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btv145). __*Bioinformatics*__. 2015, 31(14):2382-2383. 


# Introduction

Chromatin immunoprecipitation followed by high-throughput sequencing (ChIP-seq) has become standard technologies for genome wide identification of DNA-binding protein target sites. After read mappings and peak callings, the peak should be annotated to answer the biological questions. Annotation also create the possibility of integrating expression profile data to predict gene expression regulation. `r Biocpkg("ChIPseeker")`[@yu_chipseeker_2015] was developed for annotating nearest genes and genomic features to peaks.

ChIP peak data set comparison is also very important. We can use it as an index to estimate how well biological replications are. Even more important is applying to infer cooperative regulation. If two ChIP seq data, obtained by two different binding proteins, overlap significantly, these two proteins may form a complex or have interaction in regulation chromosome remodelling or gene expression. `r Biocpkg("ChIPseeker")`[@yu_chipseeker_2015] support statistical testing of significant overlap among ChIP seq data sets, and incorporate open access database GEO for users to compare their own dataset to those deposited in database. Protein interaction hypothesis can be generated by mining data deposited in database. Converting genome coordinations from one genome version to another is also supported, making this comparison available for different genome version and different species.

Several visualization functions are implemented to visualize the coverage of the ChIP seq data, peak annotation, average profile and heatmap of peaks binding to TSS region.

Functional enrichment analysis of the peaks can be performed by my Bioconductor packages `r Biocpkg("DOSE")`[@yu_dose_2015], `r Biocpkg("ReactomePA")`[@yu_reactomepa_2016], `r Biocpkg("clusterProfiler")`[@yu_clusterprofiler_2012].

```{r}
## loading packages
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
library(clusterProfiler)
```

# ChIP profiling
The datasets _CBX6_ and _CBX7_ in this vignettes were downloaded from _GEO (GSE40740)_[@pemberton_genome-wide_2014] while _ARmo\_0M_, _ARmo\_1nM_ and _ARmo\_100nM_ were downloaded from _GEO (GSE48308)_[@urbanucci_overexpression_2012] . `r Biocpkg("ChIPseeker")` provides `readPeakFile` to load the peak and store in `GRanges` object.

```{r}
files <- getSampleFiles()
print(files)
peak <- readPeakFile(files[[4]])
peak
```

## ChIP peaks coverage plot

After peak calling, we would like to know the peak locations over the whole genome, `covplot` function calculates the coverage of peak regions over chromosomes and generate a figure to visualize. [GRangesList](https://guangchuangyu.github.io/2016/02/covplot-supports-grangeslist) is also supported and can be used to compare coverage of multiple bed files.


```{r fig.height=8, fig.width=10}
covplot(peak, weightCol="V5")
```

```{r fig.height=4, fig.width=10}
covplot(peak, weightCol="V5", chrs=c("chr17", "chr18"), xlim=c(4.5e7, 5e7))
```

When `peak` is a `GRangsList` object, user can set the colors directly or by passing a palette to `fill_color`.

```{r fig.height=8, fig.width=10}
peaks = lapply(files[4:5], readPeakFile)
covplot(peaks, weightCol = "V5", fill_color = c("red","blue")) +
  theme(legend.position = "inside",
        legend.position.inside = c(0.8,0.2))
```


## Profile of ChIP peaks binding to TSS regions

First of all, for calculating the profile of ChIP peaks binding to TSS regions, we should prepare the TSS regions, which are defined as the flanking sequence of the TSS sites. Then align the peaks that are mapping to these regions, and generate the tagMatrix.


```{r}
## promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
## tagMatrix <- getTagMatrix(peak, windows=promoter)
##
## to speed up the compilation of this vignettes, we use a precalculated tagMatrix
data("tagMatrixList")
tagMatrix <- tagMatrixList[[4]]
```

In the above code, you should notice that tagMatrix is not restricted to TSS regions. The regions can be other types that defined by the user. `r Biocpkg("ChIPseeker")` expanded the scope of region. Users can input the `type` and `by` parameters to get the regions they want.

### Heatmap of ChIP binding to TSS regions

```{r fig.cap="Heatmap of ChIP peaks binding to TSS regions", fig.align="center", fig.height=9, fig.width=6}
tagHeatmap(tagMatrix)
```

`r Biocpkg("ChIPseeker")` provide a one step function to generate this figure from bed file. The following function will generate the same figure as above.

```{r eval=FALSE}
peakHeatmap(files[[4]], TxDb=txdb, upstream=3000, downstream=3000)
```

Users can use `nbin` parameter to speed up.
```{r eval=FALSE}
peakHeatmap(files[[4]],TxDb = txdb,nbin = 800,upstream=3000, downstream=3000)

```

Users can also use ggplot method to change the details of the figures.
```{r eval=FALSE}
peakHeatmap(files[[4]],TxDb = txdb,nbin = 800,upstream=3000, downstream=3000) +
  scale_fill_distiller(palette = "RdYlGn")
```

Users can also profile genebody regions with `peakHeatmap()`.
```{r fig.cap="Heatmap of genebody regions", fig.align="center", fig.height=9, fig.width=6,results='hide'}
peakHeatmap(peak = files[[4]],
            TxDb = txdb,
            upstream = rel(0.2),
            downstream = rel(0.2),
            by = "gene",
            type = "body",
            nbin = 800)
```

Sometimes there will be a need to explore the comparison of the peak heatmap over two regions, for example, the following picture is the peak over two gene sets. One possible scenery of using this method is to compare the peak heatmap over up-regulating genes and down-regulating genes. Here `txdb1` and `txdb2` is the simulated gene sets obtain from `TxDb.Hsapiens.UCSC.hg19.knownGene`. Using `peakHeatmap_multiple_Sets()`, accepting `list` object containing different regions information. The length of each part is correlated to the amount of regions.
```{r fig.cap="Heatmap of over two regions", fig.align="center", fig.height=9, fig.width=6,results='hide'}
txdb1 <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb2 <- unlist(fiveUTRsByTranscript(TxDb.Hsapiens.UCSC.hg19.knownGene))[1:10000,]

region_list <- list(geneX = txdb1, geneY = txdb2)
peakHeatmap_multiple_Sets(peak = files[[4]],
                          upstream = 1000,downstream = 1000,
                          by = c("geneX","geneY"),
                          type = "start_site",
                          TxDb = region_list,nbin = 800)
```

We also meet the need of ploting heatmap and peak profiling together.
```{r fig.cap="Combination of heatmap and peak profiling", fig.align="center", fig.height=9, fig.width=6,results='hide'}
peak_Profile_Heatmap(peak = files[[4]],
                     upstream = 1000,
                     downstream = 1000,
                     by = "gene",
                     type = "start_site",
                     TxDb = txdb,
                     nbin = 800)
```

Exploring several regions with heatmap and peak profiling is also supported.
```{r fig.cap="Combination of heatmap and peak profiling over several regions", fig.align="center", fig.height=12, fig.width=6,results='hide'}
txdb1 <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb2 <- unlist(fiveUTRsByTranscript(TxDb.Hsapiens.UCSC.hg19.knownGene))[1:10000,]

region_list <- list(geneX = txdb1, geneY = txdb2)
peak_Profile_Heatmap(peak = files[[4]],
                     upstream = 1000,
                     downstream = 1000,
                     by = c("geneX","geneY"),
                     type = "start_site",
                     TxDb = region_list,nbin = 800)
```

### Average Profile of ChIP peaks binding to TSS region

```{r eval=TRUE, fig.cap="Average Profile of ChIP peaks binding to TSS region", fig.align="center", fig.height=4, fig.width=7}
plotAvgProf(tagMatrix, xlim=c(-3000, 3000),
            xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
```


The function `plotAvgProf2` provide a one step from bed file to average profile plot. The following command will generate the same figure as shown above.

```{r eval=FALSE}
plotAvgProf2(files[[4]], TxDb=txdb, upstream=3000, downstream=3000,
             xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency")
```

Confidence interval estimated by bootstrap method is also supported for characterizing ChIP binding profiles.

```{r fig.cap="Average Profile of ChIP peaks binding to TSS region", fig.align="center", fig.height=4, fig.width=7, eval=F}
plotAvgProf(tagMatrix, xlim=c(-3000, 3000), conf = 0.95, resample = 1000)
```

![](figures/plotAvgProf_boot.png)

## Profile of ChIP peaks binding to different regions

Referring to the [issue #16](https://github.com/GuangchuangYu/ChIPseeker/issues/16) , we developed and improved several functions support start site region, end site region and body region of Gene/Transcript/Exon/Intron/3UTR/5UTR. `getBioRegion` can prepare the different regions for ChIP peaks to bind. `getTagMatrix` can accept `type`, `by`, `upstream` and `downstream` parameters to get tagmatrix according to different needs. `plotPeakProf` and `plotPeakProf2` supports the plotting of profiles of peaks binding to different regions.Users can also create heatmap or average profile of ChIP peaks binding to these regions.

In order to plot body regions, a new methond `binning`,was introduced to `getTagMatrix`. The idea of `binning` was derived from [deeptools](https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html)[@ramirez2016deeptools2]. `binning` scaled the regions having different lengths to the equal length by deviding the regions into the same amounts of boxes. Because the amount of boxes is equal, the regions can be thought of scaling to equal length.`binning` method can speed up the `getTagMatrix` by changing the precision from bp to box(several bps).

There are three ways to plot these regions. First, users can use `getBioRegion` to prepare the regions. Then align the peaks that are mapping to these regions, and generate the tagMatrix by `getTagMatrix`. At Last, plot the figures by `plotPeakProf`. Second, users can input `type` and `by` parameters to `getTagMatrix` to get the tagMatrix and plot the figures. Third, users can use `plotPeakProf2` to do everything in one step.

### Binning method for profile of ChIP peaks binding to TSS regions
Here uses the method of inputting `type` and `by` parameters. `type = "start_site"` means the start site region. `by = "gene"` means that it is the start site region of gene(TSS regions). If users want to use binning method, the `nbin` method must be set.
```{r eval=F}
## The results of binning method and normal method are nearly the same. 
tagMatrix_binning <- getTagMatrix(peak = peak, TxDb = txdb, 
                                  upstream = 3000, downstream = 3000, 
                                  type = "start_site", by = "gene", 
                                  weightCol = "V5", nbin = 800)
```

### Profile of ChIP peaks binding to body regions 
We improved and developed several functions to plot body region of Gene/Transcript/Exon/Intron/3UTR/5UTR. If users want to get more information from the body region, we added `upstream` and `downstream` parameters to functions in order to get flank extension of body regions. `upstream` and `downstream` can be NULL(default), rel object and actual numbers. NULL(default) reflects body regions with no flank extension. Rel object reflects the percentage of total length of body regions. Actual numbers reflects the actual length of flank extension. 

```{r eval=F}
## Here uses `plotPeakProf2` to do all things in one step.
## Gene body regions having lengths smaller than nbin will be filtered
## A message will be given to warning users about that.
## >> 9 peaks(0.872093%), having lengths smaller than 800bp, are filtered...

## the ignore_strand is FALSE in default. We put here to emphasize that.
## We will not show it again in the below example
plotPeakProf2(peak = peak, upstream = rel(0.2), downstream = rel(0.2),
              conf = 0.95, by = "gene", type = "body", nbin = 800,
              TxDb = txdb, weightCol = "V5",ignore_strand = F)
```
![](figures/plotPeakProf2_body_extend.png)

Users can also get the profile ChIP peaks binding to gene body regions with no flank extension or flank extension decided by actual length.
```{r eval=F}
## The first method using getBioRegion(), getTagMatrix() and plotPeakProf() to plot in three steps.
genebody <- getBioRegion(TxDb = txdb,
                         by = "gene",
                         type = "body")

matrix_no_flankextension <- getTagMatrix(peak,windows = genebody, nbin = 800)

plotPeakProf(matrix_no_flankextension,conf = 0.95)

## The second method of using getTagMatrix() and plotPeakProf() to plot in two steps
matrix_actual_extension <- getTagMatrix(peak,windows = genebody, nbin = 800,
                                        upstream = 1000,downstream = 1000)
plotPeakProf(matrix_actual_extension,conf = 0.95)

```

Users can also get the body region of 5UTR/3UTR.
```{r eval=F}
five_UTR_body <- getTagMatrix(peak = peak, 
                              TxDb = txdb,
                              upstream = rel(0.2),
                              downstream = rel(0.2), 
                              type = "body",
                              by = "5UTR",
                              weightCol = "V5",
                              nbin = 50)

plotPeakProf(tagMatrix = five_UTR_body, conf = 0.95)
```

### Profile of ChIP peaks binding to TTS regions 
```{r eval=F}
TTS_matrix <- getTagMatrix(peak = peak, 
                           TxDb = txdb,
                           upstream = 3000,
                           downstream = 3000, 
                           type = "end_site",
                           by = "gene",
                           weightCol = "V5")

plotPeakProf(tagMatrix = TTS_matrix, conf = 0.95)
```

# Peak Annotation
```{r}
peakAnno <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000),
                         TxDb=txdb, annoDb="org.Hs.eg.db")
```

Note that it would also be possible to use Ensembl-based `EnsDb` annotation 
databases created by the `r Biocpkg("ensembldb")` package for the
peak annotations by providing it with the `TxDb` parameter. Since UCSC-style 
chromosome names are used we have to change the style of the chromosome names
from *Ensembl* to *UCSC* in the example below.

```{r, eval = FALSE}
library(EnsDb.Hsapiens.v75)
edb <- EnsDb.Hsapiens.v75
seqlevelsStyle(edb) <- "UCSC"

peakAnno.edb <- annotatePeak(files[[4]], tssRegion=c(-3000, 3000),
                             TxDb=edb, annoDb="org.Hs.eg.db")
```

Peak Annotation is performed by `annotatePeak`. User can define TSS (transcription start site) region, by default TSS is defined from -3kb to +3kb. The output of `annotatePeak` is `csAnno` instance. `r Biocpkg("ChIPseeker")` provides `as.GRanges` to convert `csAnno` to `GRanges` instance, and `as.data.frame` to convert `csAnno` to `data.frame` which can be exported to file by `write.table`.

`TxDb` object contained transcript-related features of a particular genome. Bioconductor provides several package that containing `TxDb` object of model organisms with multiple commonly used genome version, for instance `r Biocannopkg("TxDb.Hsapiens.UCSC.hg38.knownGene")`, `r Biocannopkg("TxDb.Hsapiens.UCSC.hg19.knownGene")` for human genome hg38 and hg19, `r Biocannopkg("TxDb.Mmusculus.UCSC.mm10.knownGene")` and `r Biocannopkg("TxDb.Mmusculus.UCSC.mm9.knownGene")` for mouse genome mm10 and mm9, etc. User can also prepare their own `TxDb` object by retrieving information from UCSC Genome Bioinformatics and BioMart data resources by R function `makeTxDbFromBiomart` and `makeTxDbFromUCSC`. `TxDb` object should be passed for peak annotation.

All the peak information contained in peakfile will be retained in the output of `annotatePeak`. The position and strand information of nearest genes are reported. The distance from peak to the TSS of its nearest gene is also reported. The genomic region of the peak is reported in annotation column. Since some annotation may overlap, `r Biocpkg("ChIPseeker")` adopted the following priority in genomic annotation.

* Promoter
* 5' UTR
* 3' UTR
* Exon
* Intron
* Downstream
* Intergenic


_Downstream_ is defined as the downstream of gene end.

`r Biocpkg("ChIPseeker")` also provides parameter _genomicAnnotationPriority_ for user to prioritize this hierachy.

`annotatePeak` report detail information when the annotation is Exon or Intron, for instance "Exon (uc002sbe.3/9736, exon 69 of 80)", means that the peak is overlap with an Exon of transcript uc002sbe.3, and the corresponding Entrez gene ID is 9736 (Transcripts that belong to the same gene ID may differ in splice events), and this overlaped exon is the 69th exon of the 80 exons that this transcript uc002sbe.3 prossess.

Parameter annoDb is optional, if provided, extra columns including SYMBOL, GENENAME, ENSEMBL/ENTREZID will be added. The geneId column in annotation output will be consistent with the geneID in TxDb. If it is ENTREZID, ENSEMBL will be added if annoDb is provided, while if it is ENSEMBL ID, ENTREZID will be added.

## Visualize Genomic Annotation

To annotate the location of a given peak in terms of genomic features, `annotatePeak` assigns peaks to genomic annotation in "annotation" column of the output, which includes whether a peak is in the TSS, Exon, 5' UTR, 3' UTR, Intronic or Intergenic. Many researchers are very interesting in these annotations. TSS region can be defined by user and `annotatePeak` output in details of which exon/intron of which genes as illustrated in previous section.

Pie and Bar plot are supported to visualize the genomic annotation.
```{r fig.cap="Genomic Annotation by pieplot", fig.align="center", fig.height=6, fig.width=8}
plotAnnoPie(peakAnno)
```

```{r fig.cap="Genomic Annotation by barplot", fig.align="center", fig.height=4, fig.width=10}
plotAnnoBar(peakAnno)
```

Since some annotation overlap, user may interested to view the full annotation with their overlap, which can be partially resolved by `vennpie` function.

```{r fig.cap="Genomic Annotation by vennpie", fig.align="center", fig.height=8, fig.width=11}
vennpie(peakAnno)
```

We extend `r CRANpkg("UpSetR")` to view full annotation overlap. User can user `upsetplot` function.

```{r eval=F, fig.cap="Genomic Annotation by upsetplot", fig.align="center", fig.height=8, fig.width=12}
upsetplot(peakAnno)
```
![](figures/upset.png)

We can combine `vennpie` with `upsetplot` by setting *vennpie = TRUE*.
```{r eval=F, fig.cap="Genomic Annotation by upsetplot", fig.align="center", fig.height=8, fig.width=12}
upsetplot(peakAnno, vennpie=TRUE)
```
![](figures/upset_vennpie.png)


## Visualize distribution of TF-binding loci relative to TSS

The distance from the peak (binding site) to the TSS of the nearest gene is calculated by `annotatePeak` and reported in the output. We provide `plotDistToTSS` to calculate the percentage of binding sites upstream and downstream from the TSS of the nearest genes, and visualize the distribution.
```{r fig.cap="Distribution of Binding Sites", fig.align="center", fig.height=2, fig.width=6}
plotDistToTSS(peakAnno,
              title="Distribution of transcription factor-binding loci\nrelative to TSS")
```

# Functional enrichment analysis

Once we have obtained the annotated nearest genes, we can perform functional enrichment analysis to identify predominant biological themes among these genes by incorporating biological knowledge provided by biological ontologies. For instance, Gene Ontology (GO)[@ashburner_gene_2000] annotates genes to biological processes, molecular functions, and cellular components in a directed acyclic graph structure, Kyoto Encyclopedia of Genes and Genomes (KEGG)[@kanehisa_kegg_2004] annotates genes to pathways, Disease Ontology (DO)[@schriml_disease_2011] annotates genes with human disease association, and Reactome[@croft_reactome_2013] annotates gene to pathways and reactions.

`r Biocpkg("ChIPseeker")` also provides a function, __*seq2gene*__, for linking genomc regions to genes in a many-to-many mapping. It consider host gene (exon/intron), promoter region and flanking gene from intergenic region that may under control via cis-regulation. This function is designed to link both coding and non-coding genomic regions to coding genes and facilitate functional analysis.


Enrichment analysis is a widely used approach to identify biological themes. I have developed several Bioconductor packages for investigating whether the number of selected genes associated with a particular biological term is larger than expected, including `r Biocpkg("DOSE")`[@yu_dose_2015] for Disease Ontology, `r Biocpkg("ReactomePA")` for reactome pathway, `r Biocpkg("clusterProfiler")`[@yu_clusterprofiler_2012] for Gene Ontology and KEGG enrichment analysis.

```{r fig.width=8, fig.height=5}
library(ReactomePA)

pathway1 <- enrichPathway(as.data.frame(peakAnno)$geneId)
head(pathway1, 2)

gene <- seq2gene(peak, tssRegion = c(-1000, 1000), flankDistance = 3000, TxDb=txdb)
pathway2 <- enrichPathway(gene)
head(pathway2, 2)
dotplot(pathway2)
```

More information can be found in the vignettes of Bioconductor packages `r Biocpkg("DOSE")`[@yu_dose_2015], `r Biocpkg("ReactomePA")`, `r Biocpkg("clusterProfiler")`[@yu_clusterprofiler_2012], which also provide several methods to visualize enrichment results. The `r Biocpkg("clusterProfiler")`[@yu_clusterprofiler_2012] is designed for comparing and visualizing functional profiles among gene clusters, and can directly applied to compare biological themes at GO, DO, KEGG, Reactome perspective.


# ChIP peak data set comparison

## Profile of several ChIP peak data binding to TSS region

Function `plotAvgProf`, `tagHeatmap` and `plotPeakProf` can accept a list of `tagMatrix` and visualize profile or heatmap among several ChIP experiments, while `plotAvgProf2` , `peakHeatmap` and `plotPeakProf2` can accept a list of bed files and perform the same task in one step.


### Average profiles

```{r eval=TRUE, fig.cap="Average Profiles of ChIP peaks among different experiments", fig.align="center", fig.height=4, fig.width=6}
## promoter <- getPromoters(TxDb=txdb, upstream=3000, downstream=3000)
## tagMatrixList <- lapply(files, getTagMatrix, windows=promoter)
##
## to speed up the compilation of this vigenette, we load a precaculated tagMatrixList
data("tagMatrixList")
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000))
```

```{r eval=FALSE, fig.cap="Average Profiles of ChIP peaks among different experiments", fig.align="center", fig.height=7, fig.width=6}
plotAvgProf(tagMatrixList, xlim=c(-3000, 3000), conf=0.95,resample=500, facet="row")
```

![](figures/plotAvgProf_boot_list.png)

```{r eval=F}
## normal method
plotPeakProf2(files, upstream = 3000, downstream = 3000, conf = 0.95,
              by = "gene", type = "start_site", TxDb = txdb,
              facet = "row")

## binning method 
plotPeakProf2(files, upstream = 3000, downstream = 3000, conf = 0.95,
              by = "gene", type = "start_site", TxDb = txdb,
              facet = "row", nbin = 800)

```

### Peak heatmaps

```{r eval=TRUE, fig.cap="Heatmap of ChIP peaks among different experiments", fig.align="center", fig.height=8, fig.width=16}
tagHeatmap(tagMatrixList)
```

## Profile of several ChIP peak data binding to body region
Functions `plotPeakProf` and `plotPeakProf2` also support to plot profile of several ChIP peak data binding to body region.
```{r eval=F}
plotPeakProf2(files, upstream = rel(0.2), downstream = rel(0.2),
              conf = 0.95, by = "gene", type = "body",
              TxDb = txdb, facet = "row", nbin = 800)
```
![](figures/plotPeakProf_body_boot_list_.png)

## ChIP peak annotation comparision
The `plotAnnoBar` and `plotDistToTSS` can also accept input of a named list of annotated peaks (output of `annotatePeak`).


```{r}
peakAnnoList <- lapply(files, annotatePeak, TxDb=txdb,
                       tssRegion=c(-3000, 3000), verbose=FALSE)
```

We can use `plotAnnoBar` to comparing their genomic annotation.
```{r fig.cap="Genomic Annotation among different ChIPseq data", fig.align="center", fig.height=4, fig.width=6}
plotAnnoBar(peakAnnoList)
```

R function `plotDistToTSS` can use to comparing distance to TSS profiles among ChIPseq data.
```{r fig.cap="Distribution of Binding Sites among different ChIPseq data", fig.align="center", fig.height=5, fig.width=8}
plotDistToTSS(peakAnnoList)
```

## Functional profiles comparison
As shown in section 4, the annotated genes can analyzed by `r
Biocpkg("clusterProfiler")`[@yu_clusterprofiler_2012], `r
Biocpkg("DOSE")`[@yu_dose_2015], `r Biocpkg("meshes")` and `r
Biocpkg("ReactomePA")` for Gene Ontology, KEGG, Disease Ontology, MeSH and Reactome Pathway enrichment analysis.

The `r Biocpkg("clusterProfiler")`[@yu_clusterprofiler_2012] package provides `compareCluster` function for comparing biological themes among gene clusters, and can be easily adopted to compare different ChIP peak experiments.

```{r fig.width=8.5, fig.height=8.5}
genes = lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
names(genes) = sub("_", "\n", names(genes))
compKEGG <- compareCluster(geneCluster   = genes,
                         fun           = "enrichKEGG",
                         pvalueCutoff  = 0.05,
                         pAdjustMethod = "BH")
dotplot(compKEGG, showCategory = 15, title = "KEGG Pathway Enrichment Analysis")
```


## Overlap of peaks and annotated genes

User may want to compare the overlap peaks of replicate experiments or from different experiments. `r Biocpkg("ChIPseeker")` provides `peak2GRanges` that can read peak file and stored in GRanges object. Several files can be read simultaneously using lapply, and then passed to `vennplot` to calculate their overlap and draw venn plot.

`vennplot` accept a list of object, can be a list of GRanges or a list of vector. Here, I will demonstrate using `vennplot` to visualize the overlap of the nearest genes stored in peakAnnoList.

```{r fig.cap="Overlap of annotated genes", fig.align="center", fig.height=7, fig.width=7}
genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
vennplot(genes)
```

# Statistical testing of ChIP seq overlap

Overlap is very important, if two ChIP experiment by two different proteins overlap in a large fraction of their peaks, they may cooperative in regulation. Calculating the overlap is only touch the surface. `r Biocpkg("ChIPseeker")` implemented statistical methods to measure the significance of the overlap.

## Shuffle genome coordination

```{r}
p <- GRanges(seqnames=c("chr1", "chr3"),
             ranges=IRanges(start=c(1, 100), end=c(50, 130)))
shuffle(p, TxDb=txdb)
```

We implement the `shuffle` function to randomly permute the genomic locations of ChIP peaks defined in a genome which stored in `TxDb` object.

## Peak overlap enrichment analysis

With the ease of this `shuffle` method, we can generate thousands of random ChIP data and calculate the background null distribution of the overlap among ChIP data sets.

```{r}
enrichPeakOverlap(queryPeak     = files[[5]],
                  targetPeak    = unlist(files[1:4]),
                  TxDb          = txdb,
                  pAdjustMethod = "BH",
                  nShuffle      = 50,
                  chainFile     = NULL,
                  verbose       = FALSE)
```

Parameter _queryPeak_ is the query ChIP data, while _targetPeak_ is bed file name or a vector of bed file names from comparison; _nShuffle_ is the number to shuffle the peaks in _targetPeak_. To speed up the compilation of this vignettes, we only set _nShuffle_ to 50 as an example for only demonstration. User should set the number to 1000 or above for more robust result. Parameter _chainFile_ are chain file name for mapping the _targetPeak_ to the genome version consistent with _queryPeak_ when their genome version are different. This creat the possibility of comparison among different genome version and cross species.

In the output, _qSample_ is the name of _queryPeak_ and _qLen_ is the the number of peaks in _queryPeak_. _N\_OL_ is the number of overlap between _queryPeak_ and _targetPeak_.


# Data Mining with ChIP seq data deposited in GEO

There are many ChIP seq data sets that have been published and deposited in GEO database. We can compare our own dataset to those deposited in GEO to search for significant overlap data. Significant overlap of ChIP seq data by different binding proteins may be used to infer cooperative regulation and thus can be used to generate hypotheses.

We collect about **17,000** bed files deposited in GEO, user can use `getGEOspecies` to get a summary based on speices.

## GEO data collection

```{r}
getGEOspecies()
```


The summary can also based on genome version as illustrated below:

```{r}
getGEOgenomeVersion()
```

User can access the detail information by `getGEOInfo`, for each genome version.

```{r}
hg19 <- getGEOInfo(genome="hg19", simplify=TRUE)
head(hg19)
```

If _simplify_ is set to _FALSE_, extra information including _source\_name_, _extract\_protocol_, _description_, _data\_processing_ and _submission\_date_ will be incorporated.

## Download GEO ChIP data sets

`r Biocpkg("ChIPseeker")` provide function `downloadGEObedFiles` to download all the bed files of a particular genome.

```{r eval=FALSE}
downloadGEObedFiles(genome="hg19", destDir="hg19")
```

Or a vector of GSM accession number by `downloadGSMbedFiles`.
```{r eval=FALSE}
gsm <- hg19$gsm[sample(nrow(hg19), 10)]
downloadGSMbedFiles(gsm, destDir="hg19")
```


## Overlap significant testing

After download the bed files from GEO, we can pass them to `enrichPeakOverlap` for testing the significant of overlap. Parameter _targetPeak_ can be the folder, _e.g._ hg19, that containing bed files. `enrichPeakOverlap` will parse the folder and compare all the bed files. It is possible to test the overlap with bed files that are mapping to different genome or different genome versions, `enrichPeakOverlap` provide a parameter _chainFile_ that can pass a chain file and liftOver the _targetPeak_ to the genome version consistent with _queryPeak_. Signifcant overlap can be use to generate hypothesis of cooperative regulation.By mining the data deposited in GEO, we can identify some putative complex or interacted regulators in gene expression regulation or chromsome remodelling for further validation.


# Need helps?


If you have questions/issues, please visit
[ChIPseeker homepage](https://guangchuangyu.github.io/software/ChIPseeker/) first.
Your problems are mostly documented. If you think you found a bug, please follow
[the guide](https://guangchuangyu.github.io/2016/07/how-to-bug-author/) and
provide a reproducible example to be posted
on
[github issue tracker](https://github.com/GuangchuangYu/ChIPseeker/issues).
For questions, please post
to [Bioconductor support site](https://support.bioconductor.org/) and tag your
post with *ChIPseeker*.


For Chinese user, you can follow me on [WeChat (微信)](https://guangchuangyu.github.io/blog_images/biobabble.jpg).


# Session Information

Here is the output of `sessionInfo()` on the system on which this document was compiled:

```{r echo=FALSE}
sessionInfo()
```

# References


================================================
FILE: vignettes/ChIPseeker.bib
================================================

@article{yu_reactomepa_2016,
	title = {{ReactomePA}: an R/Bioconductor package for reactome pathway analysis and visualization},
	volume = {12},
	issn = {1742-2051},
	url = {http://pubs.rsc.org.eproxy2.lib.hku.hk/en/content/articlelanding/2016/mb/c5mb00663e},
	doi = {10.1039/C5MB00663E},
	shorttitle = {{ReactomePA}},
	abstract = {Reactome is a manually curated pathway annotation database for unveiling high-order biological pathways from high-throughput data. {ReactomePA} is an R/Bioconductor package providing enrichment analyses, including hypergeometric test and gene set enrichment analyses. A functional analysis can be applied to the genomic coordination obtained from a sequencing experiment to analyze the functional significance of genomic loci including cis-regulatory elements and non-coding regions. Comparison among different experiments is also supported. Moreover, {ReactomePA} provides several visualization functions to produce highly customizable, publication-quality figures. The source code and documents of {ReactomePA} are freely available through Bioconductor (http://www.bioconductor.org/packages/{ReactomePA}).},
	pages = {477--479},
	number = {2},
	journaltitle = {Molecular {BioSystems}},
	shortjournal = {Mol. {BioSyst}.},
	author = {Yu, Guangchuang and He, Qing-Yu},
	urldate = {2016-02-17},
	date = {2016-01-26},
	langid = {english}
}

@article{yu_chipseeker_2015,
         title  = "ChIPseeker: an R/Bioconductor package for ChIP peak annotation, comparison and visualization",
         author = {Yu, Guangchuang and Wang, Li-Gen and He, Qing-Yu},
         journal = "Bioinformatics",
         year    = "2015",
         volume  = "31",
         number  = "14",
         pages   = "2382-2383",
         PMID    = "25765347",
	 url     = {http://bioinformatics.oxfordjournals.org/content/31/14/2382.abstract},
         doi     = "10.1093/bioinformatics/btv145",
}

@article{yu_dose_2015,
	title = {{DOSE}: an R/Bioconductor package for disease ontology semantic and enrichment analysis},
	volume = {31},
	issn = {1367-4803, 1460-2059},
	url = {http://bioinformatics.oxfordjournals.org/content/31/4/608},
	doi = {10.1093/bioinformatics/btu684},
	shorttitle = {{DOSE}},
	abstract = {Summary: Disease ontology ({DO}) annotates human genes in the context of disease. {DO} is important annotation in translating molecular findings from high-throughput data to clinical relevance. {DOSE} is an R package providing semantic similarity computations among {DO} terms and genes which allows biologists to explore the similarities of diseases and of gene functions in disease perspective. Enrichment analyses including hypergeometric model and gene set enrichment analysis are also implemented to support discovering disease associations of high-throughput biological data. This allows biologists to verify disease relevance in a biological experiment and identify unexpected disease associations. Comparison among gene clusters is also supported.
Availability and implementation: {DOSE} is released under Artistic-2.0 License. The source code and documents are freely available through Bioconductor (http://www.bioconductor.org/packages/release/bioc/html/{DOSE}.html).
Supplementary information: Supplementary data are available at Bioinformatics online.
Contact: gcyu@connect.hku.hk or tqyhe@jnu.edu.cn},
	pages = {608--609},
	number = {4},
	journaltitle = {Bioinformatics},
	shortjournal = {Bioinformatics},
	author = {Yu, Guangchuang and Wang, Li-Gen and Yan, Guang-Rong and He, Qing-Yu},
	urldate = {2015-02-13},
	date = {2015-02-15},
	langid = {english}
}

@article{urbanucci_overexpression_2012,
	title = {Overexpression of androgen receptor enhances the binding of the receptor to the chromatin in prostate cancer},
	volume = {31},
	issn = {1476-5594},
	doi = {10.1038/onc.2011.401},
	abstract = {Androgen receptor ({AR)} is overexpressed in the majority of castration-resistant prostate cancers ({CRPCs).} Our goal was to study the effect of {AR} overexpression on the chromatin binding of the receptor and to identify {AR} target genes that may be important in the emergence of {CRPC.} We have established two sublines of {LNCaP} prostate cancer ({PC)} cell line, one overexpressing {AR} 2-3-fold and the other 4-5-fold compared with the control cells. We used chromatin immunoprecipitation ({ChIP)} and deep-sequencing (seq) to identify {AR-binding} sites ({ARBSs).} We found that the number of {ARBSs} and the {AR-binding} strength were positively associated with the level of {AR} when cells were stimulated with low concentrations of androgens. In cells overexpressing {AR}, the chromatin binding of the receptor took place in 100-fold lower concentration of the ligand than in control cells. We confirmed the association of {AR} level and chromatin binding in two {PC} xenografts, one containing {AR} gene amplification with high {AR} expression, and the other with low expression. By combining the {ChIP-seq} and expression profiling, we identified {AR} target genes that are upregulated in {PC.} Of them, the expression of {ZWINT}, {SKP2} (S-phase kinase-associated protein 2 (p45)) and {FEN1} (flap structure-specific endonuclease 1) was demonstrated to be increased in {CRPC}, while the expression of {SNAI2} was decreased in both {PC} and {CRPC.} {FEN1} protein expression was also associated with poor prognosis in prostatectomy-treated patients. Finally, the knock-down of {FEN1} with small interfering {RNA} inhibited the growth of {LNCaP} cells. Our data demonstrate that the overexpression of {AR} sensitizes the receptor binding to chromatin, thus, explaining how {AR} signaling pathway is reactivated in {CRPC} cells.},
	pages = {2153-2163},
	number = {17},
	journaltitle = {Oncogene},
	shortjournal = {Oncogene},
	author = {Urbanucci, A and Sahu, B and Seppälä, J and Larjo, A and Latonen, L M and Waltering, K K and Tammela, T L J and Vessella, R L and Lähdesmäki, H and Jänne, O A and Visakorpi, T},
	date = {2012-04-26},
	note = {{PMID:} 21909140},
	keywords = {Animals, Binding Sites, Cell Line, Tumor, Chromatin, Flap Endonucleases, Gene Amplification, Gene Expression Profiling, Humans, Intracellular Signaling Peptides and Proteins, Male, Mice, Nuclear Proteins, Nucleic Acid Amplification Techniques, Prostatic Neoplasms, Receptors, Androgen, S-Phase Kinase-Associated Proteins, Transplantation, Heterologous}
}

@article{pemberton_genome-wide_2014,
	title = {Genome-wide co-localization of Polycomb orthologs and their effects on gene expression in human fibroblasts},
	volume = {15},
	issn = {1465-6914},
	doi = {10.1186/gb-2014-15-2-r23},
	abstract = {{BACKGROUND:} Polycomb group proteins form multicomponent complexes that are important for establishing lineage-specific patterns of gene expression. Mammalian cells encode multiple permutations of the prototypic Polycomb repressive complex 1 ({PRC1)} with little evidence for functional specialization. An aim of this study is to determine whether the multiple orthologs that are co-expressed in human fibroblasts act on different target genes and whether their genomic location changes during cellular senescence.
{RESULTS:} Deep sequencing of chromatin immunoprecipitated with antibodies against {CBX6}, {CBX7}, {CBX8}, {RING1} and {RING2} reveals that the orthologs co-localize at multiple sites. {PCR-based} validation at representative loci suggests that a further six {PRC1} proteins have similar binding patterns. Importantly, sequential chromatin immunoprecipitation with antibodies against different orthologs implies that multiple variants of {PRC1} associate with the same {DNA.} At many loci, the binding profiles have a distinctive architecture that is preserved in two different types of fibroblast. Conversely, there are several hundred loci at which {PRC1} binding is cell type-specific and, contrary to expectations, the presence of {PRC1} does not necessarily equate with transcriptional silencing. Interestingly, the {PRC1} binding profiles are preserved in senescent cells despite changes in gene expression.
{CONCLUSIONS:} The multiple permutations of {PRC1} in human fibroblasts congregate at common rather than specific sites in the genome and with overlapping but distinctive binding profiles in different fibroblasts. The data imply that the effects of {PRC1} complexes on gene expression are more subtle than simply repressing the loci at which they bind.},
	pages = {R23},
	number = {2},
	journaltitle = {Genome biology},
	shortjournal = {Genome Biol.},
	author = {Pemberton, Helen and Anderton, Emma and Patel, Harshil and Brookes, Sharon and Chandler, Hollie and Palermo, Richard and Stock, Julie and Rodriguez-Niedenführ, Marc and Racek, Tomas and de Breed, Lucas and Stewart, Aengus and Matthews, Nik and Peters, Gordon},
	date = {2014-02-03},
	note = {{PMID:} 24485159}
}

@article{yu_clusterprofiler_2012,
	title = {{clusterProfiler:} an R Package for Comparing Biological Themes Among Gene Clusters},
	volume = {16},
	issn = {1536-2310, 1557-8100},
	shorttitle = {{clusterProfiler}},
	url = {http://online.liebertpub.com/doi/abs/10.1089/omi.2011.0118},
	doi = {10.1089/omi.2011.0118},
	number = {5},
	urldate = {2012-05-05},
	journal = {{OMICS:} A Journal of Integrative Biology},
	author = {Yu, Guangchuang and Wang, Li-Gen and Han, Yanyan and He, Qing-Yu},
	month = may,
	year = {2012},
	pages = {284--287},
}

@article{ashburner_gene_2000,
	title = {Gene Ontology: tool for the unification of biology},
	volume = {25},
	issn = {1061-4036},
	url = {http://dx.doi.org/10.1038/75556},
	doi = {10.1038/75556},
	shorttitle = {Gene Ontology},
	issue = {1},
	pages = {25-29},
	journaltitle = {Nat Genet},
	shortjournal = {Nat Genet},
	author = {Ashburner, Michael and Ball, Catherine A. and Blake, Judith A. and Botstein, David and Butler, Heather and Cherry, J. Michael and Davis, Allan P. and Dolinski, Kara and Dwight, Selina S. and Eppig, Janan T. and Harris, Midori A. and Hill, David P. and Issel-Tarver, Laurie and Kasarskis, Andrew and Lewis, Suzanna and Matese, John C. and Richardson, Joel E. and Ringwald, Martin and Rubin, Gerald M. and Sherlock, Gavin},
	urldate = {2010-04-13},
	date = {2000-05},
}

@article{kanehisa_kegg_2004,
	title = {The {KEGG} resource for deciphering the genome},
	volume = {32},
	issn = {0305-1048, 1362-4962},
	url = {http://nar.oxfordjournals.org/content/32/suppl_1/D277},
	doi = {10.1093/nar/gkh063},
	language = {en},
	issue = {suppl 1},
	pages = {D277-D280},
	journaltitle = {Nucleic Acids Research},
	shortjournal = {Nucl. Acids Res.},
	author = {Kanehisa, Minoru and Goto, Susumu and Kawashima, Shuichi and Okuno, Yasushi and Hattori, Masahiro},
	urldate = {2013-10-15},
	date = {2004},
	note = {{PMID:} 14681412},
}

@article{schriml_disease_2011,
	title = {Disease Ontology: a backbone for disease semantic integration},
	volume = {40},
	issn = {0305-1048, 1362-4962},
	url = {http://nar.oxfordjournals.org/content/40/D1/D940.long},
	doi = {10.1093/nar/gkr972},
	shorttitle = {Disease Ontology},
	issue = {D1},
	pages = {D940-D946},
	journaltitle = {Nucleic Acids Research},
	author = {Schriml, L. M. and Arze, C. and Nadendla, S. and Chang, Y.-W. W. and Mazaitis, M. and Felix, V. and Feng, G. and Kibbe, W. A.},
	urldate = {2012-03-01},
	date = {2011-11-12},
}


@article{croft_reactome_2013,
	title = {The Reactome pathway knowledgebase},
	volume = {42},
	issn = {0305-1048, 1362-4962},
	url = {http://nar.oxfordjournals.org/content/42/D1/D472.long},
	doi = {10.1093/nar/gkt1102},
	issue = {D1},
	pages = {D472-D477},
	journaltitle = {Nucleic Acids Research},
	author = {Croft, D. and Mundo, A. F. and Haw, R. and Milacic, M. and Weiser, J. and Wu, G. and Caudy, M. and Garapati, P. and Gillespie, M. and Kamdar, M. R. and Jassal, B. and Jupe, S. and Matthews, L. and May, B. and Palatnik, S. and Rothfels, K. and Shamovsky, V. and Song, H. and Williams, M. and Birney, E. and Hermjakob, H. and Stein, L. and {D'Eustachio}, P.},
	urldate = {2014-02-18},
	date = {2013-11-15},
}


@article{ramirez2016deeptools2,
  title={deepTools2: a next generation web server for deep-sequencing data analysis},
  author={Ram{\'\i}rez, Fidel and Ryan, Devon P and Gr{\"u}ning, Bj{\"o}rn and Bhardwaj, Vivek and Kilpert, Fabian and Richter, Andreas S and Heyne, Steffen and D{\"u}ndar, Friederike and Manke, Thomas},
  journal={Nucleic acids research},
  volume={44},
  number={W1},
  pages={W160--W165},
  year={2016},
  publisher={Oxford University Press}
}