Showing preview only (971K chars total). Download the full file or copy to clipboard to get everything.
Repository: SSSYDYSSS/TransProR
Branch: main
Commit: cdf08bed9130
Files: 138
Total size: 928.3 KB
Directory structure:
gitextract_t_svwp8g/
├── .Rbuildignore
├── .github/
│ ├── .gitignore
│ └── workflows/
│ └── R-CMD-check.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R/
│ ├── AdjustAlphaScale.R
│ ├── CircosFruits.R
│ ├── CombatNormal.R
│ ├── CombatTumor.R
│ ├── CompareMerge.R
│ ├── Contrast_Venn.R
│ ├── DESeq2Analyze.R
│ ├── EdgeRAnalyze.R
│ ├── EnrichCirclize.R
│ ├── EnrichCircoBar.R
│ ├── EnrichPolarBubble.R
│ ├── EnrichmentSpiralize.R
│ ├── FacetDensityFoldchange.R
│ ├── FilterDiffGenes.R
│ ├── FourDegsVenn.R
│ ├── GatherGraphEdge.R
│ ├── GatherGraphNode.R
│ ├── GeneColor.R
│ ├── GeneHighlights.R
│ ├── GeneMapPathway.R
│ ├── GetGtexExp.R
│ ├── GetTcgaExp.R
│ ├── HighlightByNode.R
│ ├── HighlightGenes.R
│ ├── LimmaAnalyze.R
│ ├── LogTransform.R
│ ├── MergeDensityFoldchange.R
│ ├── MergeGtexTcga.R
│ ├── MergeIDPosition.R
│ ├── MergeMethodColor.R
│ ├── NewGgraph.R
│ ├── PathwayCount.R
│ ├── PathwayDescription.R
│ ├── PrepDeseq2.R
│ ├── PrepEdgeR.R
│ ├── PrepLimma.R
│ ├── PrepWilcoxon.R
│ ├── ProcessHeatdata.R
│ ├── SeekGtexOrgan.R
│ ├── WilcoxonAnalyze.R
│ ├── data.R
│ ├── utils-pipe.R
│ └── zzz.R
├── README.md
├── TransProR.Rproj
├── data/
│ ├── all_degs_venn.rda
│ └── gtree.rda
├── data-raw/
│ └── MAKEDATA.R
├── dev/
│ ├── build.R
│ └── dev.R
├── inst/
│ └── extdata/
│ ├── DEG_deseq2.rds
│ ├── DEG_deseq2_test.rds
│ ├── DEG_edgeR_test.rds
│ ├── DEG_limma_voom_test.rds
│ ├── Diff_deseq2.rds
│ ├── GTEX_phenotype_test
│ ├── SKCM_Skin_TCGA_exp_normal_test.rds
│ ├── SKCM_Skin_TCGA_exp_tumor_test.rds
│ ├── Skin_SKCM_Gtex_test.rds
│ ├── TCGA-SKCM.GDC_phenotype_test.tsv
│ ├── TCGA-SKCM.htseq_counts_test.tsv
│ ├── TCGA_gencode.v22.annotation.gene.probeMap_test
│ ├── Wilcoxon_rank_sum_testoutRst_test.rds
│ ├── all_count_exp_test.csv
│ ├── ascii_art.txt
│ ├── combined_df.rds
│ ├── gtex_gene_expected_count_test
│ ├── gtex_probeMap_gencode.v23.annotation.gene.probemap_test
│ ├── p_tree_test.rds
│ ├── removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds
│ ├── removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds
│ ├── selected_genes_deseq2.rds
│ └── tree_plot.rds
├── man/
│ ├── Combat_Normal.Rd
│ ├── Contrast_Venn.Rd
│ ├── DESeq2_analyze.Rd
│ ├── Wilcoxon_analyze.Rd
│ ├── add_boxplot.Rd
│ ├── add_new_tile_layer.Rd
│ ├── adjust_alpha_scale.Rd
│ ├── adjust_color_tone.Rd
│ ├── adjust_export_pathway.Rd
│ ├── all_degs_venn.Rd
│ ├── circos_fruits.Rd
│ ├── combat_tumor.Rd
│ ├── compare_merge.Rd
│ ├── create_base_plot.Rd
│ ├── deg_filter.Rd
│ ├── drawLegends.Rd
│ ├── edgeR_analyze.Rd
│ ├── enrich_circo_bar.Rd
│ ├── enrich_polar_bubble.Rd
│ ├── enrichment_circlize.Rd
│ ├── enrichment_spiral_plots.Rd
│ ├── extract_descriptions_counts.Rd
│ ├── extract_ntop_pathways.Rd
│ ├── extract_positive_pathways.Rd
│ ├── facet_density_foldchange.Rd
│ ├── filter_diff_genes.Rd
│ ├── four_degs_venn.Rd
│ ├── gather_graph_edge.Rd
│ ├── gather_graph_node.Rd
│ ├── gene_color.Rd
│ ├── gene_highlights.Rd
│ ├── gene_map_pathway.Rd
│ ├── get_gtex_exp.Rd
│ ├── get_tcga_exp.Rd
│ ├── gtree.Rd
│ ├── highlight_by_node.Rd
│ ├── highlight_genes.Rd
│ ├── limma_analyze.Rd
│ ├── log_transform.Rd
│ ├── merge_density_foldchange.Rd
│ ├── merge_gtex_tcga.Rd
│ ├── merge_id_position.Rd
│ ├── merge_method_color.Rd
│ ├── new_ggraph.Rd
│ ├── pathway_count.Rd
│ ├── pathway_description.Rd
│ ├── pipe.Rd
│ ├── prep_deseq2.Rd
│ ├── prep_edgeR.Rd
│ ├── prep_limma.Rd
│ ├── prep_wilcoxon.Rd
│ ├── process_heatdata.Rd
│ ├── seek_gtex_organ.Rd
│ ├── selectPathways.Rd
│ └── spiral_newrle.Rd
└── vignettes/
└── TransProR.Rmd
================================================
FILE CONTENTS
================================================
================================================
FILE: .Rbuildignore
================================================
^.*\.Rproj$
^\.Rproj\.user$
^data-raw$
^data-raw$
^dev$
^CODE_OF_CONDUCT\.md$
^LICENSE\.md$
^\.github$
^\.RDataTmp$
================================================
FILE: .github/.gitignore
================================================
*.html
================================================
FILE: .github/workflows/R-CMD-check.yaml
================================================
name: R-CMD-check
on:
push:
branches: [main, master]
pull_request:
branches: [main, master]
jobs:
R-CMD-check:
runs-on: ${{ matrix.config.os }}
name: ${{ matrix.config.os }} (${{ matrix.config.r }})
strategy:
fail-fast: false
matrix:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}
env:
R_KEEP_PKG_SOURCE: yes
steps:
# Checkout repository
- uses: actions/checkout@v3
# Set up pandoc
- uses: r-lib/actions/setup-pandoc@v2
# Set up R environment
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true
# Automatically install dependencies from DESCRIPTION
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: >-
any::rcmdcheck
# Manually install Bioconductor and other missing dependencies
- name: Install dependencies
run: |
Rscript -e "if (!requireNamespace('BiocManager', quietly = TRUE)) install.packages('BiocManager')"
Rscript -e "BiocManager::install()"
Rscript -e "if (!requireNamespace('remotes', quietly = TRUE)) install.packages('remotes')"
Rscript -e "remotes::install_deps(dependencies = TRUE)"
# Run R CMD check
- uses: r-lib/actions/check-r-package@v2
with:
upload-snapshots: true
================================================
FILE: .gitignore
================================================
.Rproj.user
.Rhistory
.RData
.Ruserdata
.Rdata
.httr-oauth
.DS_Store
.quarto
.RDataTmp
================================================
FILE: CODE_OF_CONDUCT.md
================================================
# Contributor Covenant Code of Conduct
## Our Pledge
We as members, contributors, and leaders pledge to make participation in our
community a harassment-free experience for everyone, regardless of age, body
size, visible or invisible disability, ethnicity, sex characteristics, gender
identity and expression, level of experience, education, socio-economic status,
nationality, personal appearance, race, caste, color, religion, or sexual
identity and orientation.
We pledge to act and interact in ways that contribute to an open, welcoming,
diverse, inclusive, and healthy community.
## Our Standards
Examples of behavior that contributes to a positive environment for our
community include:
* Demonstrating empathy and kindness toward other people
* Being respectful of differing opinions, viewpoints, and experiences
* Giving and gracefully accepting constructive feedback
* Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
* Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
* The use of sexualized language or imagery, and sexual attention or advances of
any kind
* Trolling, insulting or derogatory comments, and personal or political attacks
* Public or private harassment
* Publishing others' private information, such as a physical or email address,
without their explicit permission
* Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
Community leaders are responsible for clarifying and enforcing our standards of
acceptable behavior and will take appropriate and fair corrective action in
response to any behavior that they deem inappropriate, threatening, offensive,
or harmful.
Community leaders have the right and responsibility to remove, edit, or reject
comments, commits, code, wiki edits, issues, and other contributions that are
not aligned to this Code of Conduct, and will communicate reasons for moderation
decisions when appropriate.
## Scope
This Code of Conduct applies within all community spaces, and also applies when
an individual is officially representing the community in public spaces.
Examples of representing our community include using an official e-mail address,
posting via an official social media account, or acting as an appointed
representative at an online or offline event.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
reported to the community leaders responsible for enforcement at yudongyue@mail.nankai.edu.cn.
All complaints will be reviewed and investigated promptly and fairly.
All community leaders are obligated to respect the privacy and security of the
reporter of any incident.
## Enforcement Guidelines
Community leaders will follow these Community Impact Guidelines in determining
the consequences for any action they deem in violation of this Code of Conduct:
### 1. Correction
**Community Impact**: Use of inappropriate language or other behavior deemed
unprofessional or unwelcome in the community.
**Consequence**: A private, written warning from community leaders, providing
clarity around the nature of the violation and an explanation of why the
behavior was inappropriate. A public apology may be requested.
### 2. Warning
**Community Impact**: A violation through a single incident or series of
actions.
**Consequence**: A warning with consequences for continued behavior. No
interaction with the people involved, including unsolicited interaction with
those enforcing the Code of Conduct, for a specified period of time. This
includes avoiding interactions in community spaces as well as external channels
like social media. Violating these terms may lead to a temporary or permanent
ban.
### 3. Temporary Ban
**Community Impact**: A serious violation of community standards, including
sustained inappropriate behavior.
**Consequence**: A temporary ban from any sort of interaction or public
communication with the community for a specified period of time. No public or
private interaction with the people involved, including unsolicited interaction
with those enforcing the Code of Conduct, is allowed during this period.
Violating these terms may lead to a permanent ban.
### 4. Permanent Ban
**Community Impact**: Demonstrating a pattern of violation of community
standards, including sustained inappropriate behavior, harassment of an
individual, or aggression toward or disparagement of classes of individuals.
**Consequence**: A permanent ban from any sort of public interaction within the
community.
## Attribution
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
version 2.1, available at
<https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
Community Impact Guidelines were inspired by
[Mozilla's code of conduct enforcement ladder][https://github.com/mozilla/inclusion].
For answers to common questions about this code of conduct, see the FAQ at
<https://www.contributor-covenant.org/faq>. Translations are available at <https://www.contributor-covenant.org/translations>.
[homepage]: https://www.contributor-covenant.org
================================================
FILE: DESCRIPTION
================================================
Package: TransProR
Type: Package
Title: Analysis and Visualization of Multi-Omics Data
Version: 1.0.4
Authors@R: person("Dongyue", "Yu", email = "yudongyue@mail.nankai.edu.cn", role = c("aut", "cre", "cph"),
comment = c(ORCID = "0000-0002-7041-2073"))
Maintainer: Dongyue Yu <yudongyue@mail.nankai.edu.cn>
Description: A tool for comprehensive transcriptomic data analysis, with a focus on transcript-level data preprocessing, expression profiling, differential expression analysis, and functional enrichment. It enables researchers to identify key biological processes, disease biomarkers, and gene regulatory mechanisms. 'TransProR' is aimed at researchers and bioinformaticians working with RNA-Seq data, providing an intuitive framework for in-depth analysis and visualization of transcriptomic datasets. The package includes comprehensive documentation and usage examples to guide users through the entire analysis pipeline. The differential expression analysis methods incorporated in the package include 'limma' (Ritchie et al., 2015, <doi:10.1093/nar/gkv007>; Smyth, 2005, <doi:10.1007/0-387-29362-0_23>), 'edgeR' (Robinson et al., 2010, <doi:10.1093/bioinformatics/btp616>), 'DESeq2' (Love et al., 2014, <doi:10.1186/s13059-014-0550-8>), and Wilcoxon tests (Li et al., 2022, <doi:10.1186/s13059-022-02648-4>), providing flexible and robust approaches to RNA-Seq data analysis. For more information, refer to the package vignettes and related publications.
Imports:
magrittr,
stats,
dplyr,
rlang,
tibble,
sva,
DESeq2,
utils,
edgeR,
limma,
ggplot2,
ggVennDiagram,
ggdensity,
ggpubr,
ggtree,
hrbrthemes,
grid,
ggraph,
tidygraph,
tidyr,
stringr,
geomtextpath,
ggalt,
ggnewscale,
Hmisc,
circlize,
graphics,
spiralize,
ComplexHeatmap,
grDevices
Suggests:
prettydoc,
knitr,
ggtreeExtra,
rmarkdown,
systemfonts
VignetteBuilder: knitr
License: MIT + file LICENSE
Encoding: UTF-8
URL: https://github.com/SSSYDYSSS/TransProRBook
BugReports: https://github.com/SSSYDYSSS/TransProR/issues
LazyData: true
RoxygenNote: 7.3.2
Depends:
R (>= 4.3.0)
================================================
FILE: LICENSE
================================================
YEAR: 2023
COPYRIGHT HOLDER: Dongyue Yu
================================================
FILE: LICENSE.md
================================================
# MIT License
Copyright (c) 2023 Dongyue Yu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand
export("%>%")
export(Combat_Normal)
export(Contrast_Venn)
export(DESeq2_analyze)
export(Wilcoxon_analyze)
export(add_boxplot)
export(add_new_tile_layer)
export(adjust_alpha_scale)
export(adjust_color_tone)
export(adjust_export_pathway)
export(circos_fruits)
export(combat_tumor)
export(compare_merge)
export(create_base_plot)
export(deg_filter)
export(drawLegends)
export(edgeR_analyze)
export(enrich_circo_bar)
export(enrich_polar_bubble)
export(enrichment_circlize)
export(enrichment_spiral_plots)
export(extract_descriptions_counts)
export(extract_ntop_pathways)
export(extract_positive_pathways)
export(facet_density_foldchange)
export(filter_diff_genes)
export(four_degs_venn)
export(gather_graph_edge)
export(gather_graph_node)
export(gene_color)
export(gene_highlights)
export(gene_map_pathway)
export(get_gtex_exp)
export(get_tcga_exp)
export(highlight_by_node)
export(highlight_genes)
export(limma_analyze)
export(log_transform)
export(merge_density_foldchange)
export(merge_gtex_tcga)
export(merge_id_position)
export(merge_method_color)
export(new_ggraph)
export(pathway_count)
export(pathway_description)
export(prep_deseq2)
export(prep_edgeR)
export(prep_limma)
export(prep_wilcoxon)
export(process_heatdata)
export(seek_gtex_organ)
export(selectPathways)
export(spiral_newrle)
importFrom(ComplexHeatmap,Legend)
importFrom(ComplexHeatmap,draw)
importFrom(ComplexHeatmap,packLegend)
importFrom(DESeq2,DESeq)
importFrom(DESeq2,DESeqDataSetFromMatrix)
importFrom(DESeq2,results)
importFrom(Hmisc,capitalize)
importFrom(circlize,chordDiagram)
importFrom(dplyr,across)
importFrom(dplyr,all_of)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
importFrom(dplyr,desc)
importFrom(dplyr,distinct)
importFrom(dplyr,filter)
importFrom(dplyr,group_by)
importFrom(dplyr,inner_join)
importFrom(dplyr,mutate)
importFrom(dplyr,n)
importFrom(dplyr,pull)
importFrom(dplyr,row_number)
importFrom(dplyr,select)
importFrom(dplyr,summarise)
importFrom(edgeR,DGEList)
importFrom(edgeR,calcNormFactors)
importFrom(edgeR,cpm)
importFrom(edgeR,estimateGLMCommonDisp)
importFrom(edgeR,estimateGLMTagwiseDisp)
importFrom(edgeR,estimateGLMTrendedDisp)
importFrom(edgeR,filterByExpr)
importFrom(edgeR,glmFit)
importFrom(edgeR,glmLRT)
importFrom(edgeR,topTags)
importFrom(geomtextpath,geom_textpath)
importFrom(ggVennDiagram,ggVennDiagram)
importFrom(ggalt,stat_xspline)
importFrom(ggdensity,geom_hdr)
importFrom(ggdensity,geom_hdr_rug)
importFrom(ggnewscale,new_scale)
importFrom(ggnewscale,new_scale_fill)
importFrom(ggplot2,aes)
importFrom(ggplot2,aes_string)
importFrom(ggplot2,alpha)
importFrom(ggplot2,annotate)
importFrom(ggplot2,coord_cartesian)
importFrom(ggplot2,coord_polar)
importFrom(ggplot2,element_blank)
importFrom(ggplot2,element_rect)
importFrom(ggplot2,expansion)
importFrom(ggplot2,facet_wrap)
importFrom(ggplot2,geom_bar)
importFrom(ggplot2,geom_hline)
importFrom(ggplot2,geom_point)
importFrom(ggplot2,geom_segment)
importFrom(ggplot2,geom_smooth)
importFrom(ggplot2,geom_text)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,guide_legend)
importFrom(ggplot2,labs)
importFrom(ggplot2,margin)
importFrom(ggplot2,scale_alpha_continuous)
importFrom(ggplot2,scale_color_brewer)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_fill_gradient)
importFrom(ggplot2,scale_fill_identity)
importFrom(ggplot2,scale_fill_manual)
importFrom(ggplot2,scale_size)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_y_continuous)
importFrom(ggplot2,theme)
importFrom(ggplot2,theme_minimal)
importFrom(ggplot2,theme_void)
importFrom(ggplot2,unit)
importFrom(ggpubr,stat_cor)
importFrom(ggraph,geom_edge_diagonal)
importFrom(ggraph,geom_node_point)
importFrom(ggraph,geom_node_text)
importFrom(ggraph,ggraph)
importFrom(ggraph,node_angle)
importFrom(ggraph,scale_edge_colour_brewer)
importFrom(ggtree,aes)
importFrom(ggtree,geom_hilight)
importFrom(ggtree,geom_point2)
importFrom(grDevices,col2rgb)
importFrom(grDevices,convertColor)
importFrom(grDevices,rgb)
importFrom(graphics,strwidth)
importFrom(grid,gpar)
importFrom(grid,grid.roundrect)
importFrom(grid,grid.text)
importFrom(grid,pushViewport)
importFrom(grid,unit)
importFrom(grid,upViewport)
importFrom(grid,viewport)
importFrom(hrbrthemes,theme_ipsum)
importFrom(limma,contrasts.fit)
importFrom(limma,eBayes)
importFrom(limma,lmFit)
importFrom(limma,topTable)
importFrom(magrittr,"%>%")
importFrom(rlang,.data)
importFrom(rlang,sym)
importFrom(spiralize,spiral_initialize)
importFrom(spiralize,spiral_rect)
importFrom(spiralize,spiral_text)
importFrom(spiralize,spiral_track)
importFrom(stats,as.formula)
importFrom(stats,model.matrix)
importFrom(stats,na.omit)
importFrom(stats,p.adjust)
importFrom(stats,quantile)
importFrom(stats,wilcox.test)
importFrom(stringr,str_c)
importFrom(sva,ComBat_seq)
importFrom(tibble,as_tibble)
importFrom(tibble,column_to_rownames)
importFrom(tibble,tibble)
importFrom(tidygraph,tbl_graph)
importFrom(tidyr,unite)
importFrom(utils,head)
importFrom(utils,read.table)
importFrom(utils,tail)
================================================
FILE: R/AdjustAlphaScale.R
================================================
#' Adjust Alpha Scale for Data Visualization
#'
#' This function dynamically adjusts the transparency scale for visualizations,
#' especially useful when the range of data values varies significantly across different sources.
#' It modifies the transparency scale based on the range of values present in the data,
#' ensuring that the visualization accurately reflects variations within the data.
#'
#' @importFrom ggplot2 scale_alpha_continuous guide_legend ggplot geom_point
#' @param data A data frame containing the values for which the alpha scale is to be adjusted.
#' @param name Character string that will be used as the title of the legend in the plot.
#' @param range Numeric vector of length 2 specifying the range of alpha values, defaults to c(0.2, 0.8).
#' @return A ggplot2 alpha scale adjustment layer.
#' @export
#'
#' @examples
#' # Assuming 'data' is a DataFrame with a 'value' column
#' plot_data <- data.frame(value = c(10, 20, 30, 40, 50))
#' ggplot2::ggplot(plot_data, ggplot2::aes(x = 1:nrow(plot_data), y = value)) +
#' ggplot2::geom_point(ggplot2::aes(alpha = value)) +
#' adjust_alpha_scale(plot_data, "Transparency Scale")
#'
adjust_alpha_scale <- function(data, name, range = c(0.2, 0.8)) {
min_val <- min(data$value, na.rm = TRUE) # Calculate minimum value, excluding NA
max_val <- max(data$value, na.rm = TRUE) # Calculate maximum value, excluding NA
# Apply scale_alpha_continuous from 'ggplot2' to adjust transparency
scale_alpha_continuous(
name = name, # Legend title
limits = c(min_val, max_val), # Set the data range for alpha scaling
range = range, # Set the alpha transparency range
guide = guide_legend(keywidth = 0.65, keyheight = 0.35, order = 2) # Customize legend appearance
)
}
================================================
FILE: R/CircosFruits.R
================================================
#' Create a base plot with gene expression data on a phylogenetic tree
#'
#' This function creates a base plot using 'ggtree' and 'ggtreeExtra' libraries, adding gene expression
#' data as colored tiles to the plot. It allows for dynamic coloring of the genes and includes
#' adjustments for alpha transparency based on the expression value.
#'
#' @importFrom ggplot2 aes scale_fill_manual scale_alpha_continuous guide_legend
#' @param p A ggtree plot object to which the data will be added.
#' @param data A data frame containing gene expression data with columns for Samples, Genes, and Values.
#' @param gene_colors A named vector of colors for genes.
#' @param gene_label A character string used as a label in the legend for the genes. Default is "Gene".
#' @return A `ggtree` plot object with the gene expression data added.
#' @export
#'
#' @examples
#' \donttest{
#' # Check and load required packages
#' if (requireNamespace("ggtreeExtra", quietly = TRUE) &&
#' requireNamespace("ggplot2", quietly = TRUE)) {
#' library(ggtreeExtra)
#' library(ggplot2)
#'
#' file_path <- system.file("extdata", "p_tree_test.rds", package = "TransProR")
#' p <- readRDS(file_path)
#'
#' # Create gene expression data frame
#' expression_data <- data.frame(
#' Sample = rep(c("Species_A", "Species_B", "Species_C", "Species_D"), each = 5),
#' Gene = rep(paste0("Gene", 1:5), times = 4),
#' Value = runif(20, min = 0, max = 1) # Randomly generate expression values between 0 and 1
#' )
#'
#' # Define gene colors (named vector)
#' gene_colors <- c(
#' Gene1 = "#491588",
#' Gene2 = "#301b8d",
#' Gene3 = "#1a237a",
#' Gene4 = "#11479c",
#' Gene5 = "#0a5797"
#' )
#'
#' # Call create_base_plot function to add gene expression data
#' p <- create_base_plot(p, expression_data, gene_colors)
#' } else {
#' message("Required packages 'ggtreeExtra' and 'ggplot2' are not installed.")
#' }
#' }
#'
create_base_plot <- function(p, data, gene_colors, gene_label="Gene") {
# Define local variables
Sample <- data$Sample
value <- data$value
Gene <- data$Gene
if (!requireNamespace("ggtreeExtra", quietly = TRUE)) {
stop("ggtreeExtra is required for using create_base_plot. Please install it.", call. = FALSE)
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("ggplot2 is required to use geom_tile. Please install it.", call. = FALSE)
}
p <- p +
ggtreeExtra::geom_fruit(
data=data,
geom="geom_tile",
mapping=ggplot2::aes(y=Sample, alpha=value, x=Gene, fill=Gene),
offset=0.001,
pwidth=2
) +
ggplot2::scale_fill_manual(
name=gene_label,
values=gene_colors,
guide=ggplot2::guide_legend(keywidth=0.65, keyheight=0.35, order=1)
) +
# Assuming the function 'adjust_alpha_scale' is defined elsewhere to adjust alpha scale based on the expression values
adjust_alpha_scale(data, gene_label)
return(p)
}
#' Add a boxplot layer to a `ggtree` plot
#'
#' This function adds a boxplot layer to an existing `ggtree` plot object using ggtreeExtra's geom_fruit for boxplots.
#' It is primarily used to display statistical summaries of the data related to gene expressions or other metrics.
#'
#' @importFrom ggplot2 aes
#' @param p An existing ggtree plot object.
#' @param data A data frame containing the data to be plotted. Expected to have columns for 'Sample' and 'value'.
#' @param fill_color A character string specifying the fill color for the boxplots. Default is "#f28131".
#' @param alpha Numeric value for the transparency of the boxplots. Default is 0.6.
#' @param offset Numeric value, the position of the boxplot on the x-axis relative to its gene name. Default is 0.22.
#' @param pwidth Numeric value, the width of the boxplot. Default is 0.5.
#' @return A `ggtree` plot object with the added boxplot layer.
#' @export
#'
#' @examples
#' \donttest{
#' # Check and load required packages
#' if (requireNamespace("ggtreeExtra", quietly = TRUE) &&
#' requireNamespace("ggplot2", quietly = TRUE)) {
#' library(ggtreeExtra)
#' library(ggplot2)
#'
#' file_path <- system.file("extdata", "p_tree_test.rds", package = "TransProR")
#' p <- readRDS(file_path)
#'
#' # Create boxplot data frame
#' boxplot_data <- data.frame(
#' Sample = rep(c("Species_A", "Species_B", "Species_C", "Species_D"), each = 30),
#' value = c(
#' rnorm(30, mean = 5, sd = 1), # Data for Species_A
#' rnorm(30, mean = 7, sd = 1.5), # Data for Species_B
#' rnorm(30, mean = 6, sd = 1.2), # Data for Species_C
#' rnorm(30, mean = 8, sd = 1.3) # Data for Species_D
#' )
#' )
#'
#' # Call add_boxplot function to add boxplot layer
#' p_with_boxplot <- add_boxplot(p, boxplot_data)
#' } else {
#' message("Required packages 'ggtreeExtra' and 'ggplot2' are not installed.")
#' }
#' }
#'
add_boxplot <- function(p, data, fill_color="#f28131", alpha=0.6, offset=0.22, pwidth=0.5) {
# Define local variables
Sample <- data$Sample
value <- data$value
if (!requireNamespace("ggtreeExtra", quietly = TRUE)) {
stop("ggtreeExtra is required for using create_base_plot. Please install it.", call. = FALSE)
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("ggplot2 is required to use geom_boxplot. Please install it.", call. = FALSE)
}
p + ggtreeExtra::geom_fruit(
data=data,
geom="geom_boxplot",
mapping=ggplot2::aes(y=Sample, x=value),
fill=fill_color,
alpha=alpha,
offset=offset,
pwidth=pwidth,
size=0.05,
outlier.size=0.3,
outlier.stroke=0.06,
outlier.shape=21,
show.legend=FALSE
)
}
#' Add a new tile layer with dynamic scales to a `ggtree` plot
#'
#' This function adds a new tile layer to an existing `ggtree` plot object, allowing for separate scales for fill
#' and alpha transparency. This is useful when you want to add additional data layers without interfering with
#' the existing scales in the plot. It utilizes the 'ggnewscale' package to reset scales for new layers.
#'
#' @importFrom ggplot2 aes scale_fill_manual guide_legend
#' @importFrom ggnewscale new_scale
#' @param p An existing ggtree plot object.
#' @param data A data frame containing the data to be plotted. Expected to have columns for 'Sample', 'Gene', and 'value'.
#' @param gene_colors A named vector of colors for genes.
#' @param gene_label A character string used as a label in the legend for the genes.
#' @param alpha_value A numeric or named vector for setting the alpha scale based on values.
#' @param offset Numeric value, the position of the tile on the x-axis relative to its gene name. Default is 0.02.
#' @param pwidth Numeric value, the width of the tile. Default is 2.
#' @return A `ggtree` plot object with the added tile layer and new scales.
#' @export
#'
#' @examples
#' \donttest{
#' # Check and load required packages
#' if (requireNamespace("ggtreeExtra", quietly = TRUE) &&
#' requireNamespace("ggplot2", quietly = TRUE) &&
#' requireNamespace("ggnewscale", quietly = TRUE)) {
#' library(ggtreeExtra)
#' library(ggplot2)
#' library(ggnewscale)
#'
#' file_path <- system.file("extdata", "p_tree_test.rds", package = "TransProR")
#' p <- readRDS(file_path)
#'
#' # Create new expression data
#' new_expression_data <- data.frame(
#' Sample = rep(c("Species_A", "Species_B", "Species_C", "Species_D"), each = 3),
#' Gene = rep(c("Gene6", "Gene7", "Gene8"), times = 4),
#' Value = runif(12, min = 0, max = 1) # Randomly generate expression values between 0 and 1
#' )
#'
#' # Define new gene colors
#' new_gene_colors <- c(
#' Gene6 = "#0b5f63",
#' Gene7 = "#074d41",
#' Gene8 = "#1f5e27"
#' )
#'
#' # Define gene label and alpha values
#' gene_label <- "New Genes"
#' alpha_value <- c(0.3, 0.9)
#'
#' # Add new tile layer
#' p_with_new_layer <- add_new_tile_layer(
#' p,
#' new_expression_data,
#' new_gene_colors,
#' gene_label,
#' alpha_value,
#' offset = 0.02,
#' pwidth = 2
#' )
#' } else {
#' message("Required packages 'ggtreeExtra', 'ggplot2', and 'ggnewscale' are not installed.")
#' }
#' }
#'
add_new_tile_layer <- function(p, data, gene_colors, gene_label, alpha_value=c(0.3, 0.9), offset=0.02, pwidth=2) {
# Define local variables
Sample <- data$Sample
value <- data$value
Gene <- data$Gene
if (!requireNamespace("ggtreeExtra", quietly = TRUE)) {
stop("ggtreeExtra is required for using create_base_plot. Please install it.", call. = FALSE)
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("ggplot2 is required to use geom_tile. Please install it.", call. = FALSE)
}
p + ggnewscale::new_scale("alpha") + ggnewscale::new_scale("fill") +
ggtreeExtra::geom_fruit(
data=data,
geom="geom_tile",
mapping=ggplot2::aes(y=Sample, alpha=value, x=Gene, fill=Gene),
offset=offset,
pwidth=pwidth
) +
ggplot2::scale_fill_manual(
name=gene_label,
values=gene_colors,
guide=ggplot2::guide_legend(keywidth=0.65, keyheight=0.35, order=1)
) +
adjust_alpha_scale(data, gene_label, alpha_value) # Assuming function signature and usage
}
#' Add multiple layers to a `ggtree` plot for visualizing gene expression and enrichment data
#'
#' This function sequentially adds multiple layers to a `ggtree` plot, including gene expression data, boxplots for statistical
#' summaries, and additional tile layers for pathway enrichment scores from SSGSEA and GSVA analyses. It utilizes separate
#' functions for adding each type of layer and allows for the specification of gene colors as well as adjustments in aesthetics
#' for each layer. The function is designed to work with specific data structures and assumes all functions for adding layers
#' are defined and available.
#'
#' @param p A `ggtree` plot object to which the data and layers will be added.
#' @param long_format_HeatdataDeseq A data frame containing gene expression data with columns for `Samples`, `Genes`, and `Values`.
#' @param ssgsea_kegg_HeatdataDeseq A data frame containing SSGSEA analysis results with columns for `Samples`, `Genes`, and `Values`.
#' @param gsva_kegg_HeatdataDeseq A data frame containing GSVA analysis results with columns for `Samples`, `Genes`, and `Values`.
#' @param gene_colors A named vector of colors for genes, used for coloring tiles in different layers.
#' @return A `ggtree` plot object with multiple layers added for comprehensive visualization.
#' @export
#'
#' @examples
#' \donttest{
#' # Check and load required packages
#' if (requireNamespace("ggtreeExtra", quietly = TRUE) &&
#' requireNamespace("ggplot2", quietly = TRUE)) {
#' library(ggtreeExtra)
#' library(ggplot2)
#'
#' # Example data for gene expression, SSGSEA, and GSVA
#' file_path <- system.file("extdata", "p_tree_test.rds", package = "TransProR")
#' p <- readRDS(file_path)
#'
#' # Create gene expression data frame (long_format_HeatdataDeseq)
#' long_format_HeatdataDeseq <- data.frame(
#' Sample = rep(c("Species_A", "Species_B", "Species_C", "Species_D"), each = 5),
#' Genes = rep(paste0("Gene", 1:5), times = 4),
#' Value = runif(20, min = 0, max = 1) # Randomly generate expression values between 0 and 1
#' )
#'
#' # Create SSGSEA analysis results data frame (ssgsea_kegg_HeatdataDeseq)
#' ssgsea_kegg_HeatdataDeseq <- data.frame(
#' Sample = rep(c("Species_A", "Species_B", "Species_C", "Species_D"), each = 3),
#' Genes = rep(c("Pathway1", "Pathway2", "Pathway3"), times = 4),
#' Value = runif(12, min = 0, max = 1) # Randomly generate enrichment scores between 0 and 1
#' )
#'
#' # Create GSVA analysis results data frame (gsva_kegg_HeatdataDeseq)
#' gsva_kegg_HeatdataDeseq <- data.frame(
#' Sample = rep(c("Species_A", "Species_B", "Species_C", "Species_D"), each = 4),
#' Genes = rep(c("PathwayA", "PathwayB", "PathwayC", "PathwayD"), times = 4),
#' Value = runif(16, min = 0, max = 1) # Randomly generate enrichment scores between 0 and 1
#' )
#'
#' # Define gene and pathway colors (named vector), including all genes and pathways
#' gene_colors <- c(
#' # Genes for gene expression
#' Gene1 = "#491588",
#' Gene2 = "#301b8d",
#' Gene3 = "#1a237a",
#' Gene4 = "#11479c",
#' Gene5 = "#0a5797",
#' # Pathways for SSGSEA
#' Pathway1 = "#0b5f63",
#' Pathway2 = "#074d41",
#' Pathway3 = "#1f5e27",
#' # Pathways for GSVA
#' PathwayA = "#366928",
#' PathwayB = "#827729",
#' PathwayC = "#a1d99b",
#' PathwayD = "#c7e9c0"
#' )
#'
#' # Call circos_fruits function to add multiple layers
#' final_plot <- circos_fruits(
#' p,
#' long_format_HeatdataDeseq,
#' ssgsea_kegg_HeatdataDeseq,
#' gsva_kegg_HeatdataDeseq,
#' gene_colors
#' )
#' } else {
#' message("Required packages 'ggtreeExtra' and 'ggplot2' are not installed.")
#' }
#' }
#'
circos_fruits <- function(p, long_format_HeatdataDeseq, ssgsea_kegg_HeatdataDeseq, gsva_kegg_HeatdataDeseq, gene_colors) {
if (!requireNamespace("ggtreeExtra", quietly = TRUE)) {
stop("ggtreeExtra is required for using create_base_plot. Please install it.", call. = FALSE)
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
stop("ggplot2 is required to use geom_tile. Please install it.", call. = FALSE)
}
# Create the base plot with gene expression data
p1 <- create_base_plot(p, long_format_HeatdataDeseq, gene_colors)
# Add a boxplot layer to the base plot
p2 <- add_boxplot(p1, long_format_HeatdataDeseq)
# Add a new tile layer for SSGSEA data
p3 <- add_new_tile_layer(p2, ssgsea_kegg_HeatdataDeseq, gene_colors, "Ssgsea Term")
# Add another boxplot layer with specific aesthetic adjustments
p4 <- add_boxplot(p3, ssgsea_kegg_HeatdataDeseq, fill_color="#f28131", alpha=0.65, offset=0.32)
# Add a new tile layer for GSVA data with specific alpha and offset adjustments
p5 <- add_new_tile_layer(p4, gsva_kegg_HeatdataDeseq, gene_colors, "Gsva Term", alpha_value=c(0.3, 0.9), offset=0.02)
# Return the final plot
return(p5)
}
================================================
FILE: R/CombatNormal.R
================================================
#' Process and Correct Batch Effects in TCGA's normal tissue and GTEX Data
#'
#' This function takes a TCGA's normal tissue data set and a pre-saved GTEX data set, asks the user
#' for specific TCGA's normal tissue types to retain, then merges the two datasets. The merged dataset
#' is then corrected for batch effects using the ComBat_seq function from the 'sva' package.
#'
#' @description
#' The function first extracts histological types from the provided TCGA's normal tissue data set.
#' After displaying these types, the user is prompted to input specific types to retain.
#' The data is then filtered based on this input.
#' The GTEX and TCGA's normal tissue datasets are then combined and batch corrected.
#'
#' Note: This function assumes that TCGA's normal samples and GTEX samples represent different batches.
#'
#' @param TCGA_normal_data_path The path to the tumor data stored in an RDS file.
#' @param gtex_data_path The path to the GTEX data stored in an RDS file.
#' @param CombatNormal_output_path A character string specifying the path where the output RDS file will be saved.
#' @param auto_mode Logical. If set to TRUE, the function will not prompt the user for input and
#' will instead use the values provided in default_input. Default is FALSE.
#' @param default_input Character string. When auto_mode is TRUE, this parameter specifies the default
#' TGCA's normal tissue types to be retained. It should be provided as a comma-separated string (e.g., "11,12").
#'
#' @return A data.frame with corrected values after the ComBat_seq adjustment. Note that this function also saves the
#' combat_count_df data as an RDS file at the specified output path.
#'
#' @details
#' The ComBat_seq function from the 'sva' package is used to correct batch effects.
#' The function requires the 'sva' package to be installed and loaded externally.
#'
#' The example code uses `tempfile()` to generate temporary paths dynamically during execution.
#' These paths are valid during the `R CMD check` process, even if no actual files exist,
#' because `tempfile()` generates a unique file path that does not depend on the user's file system.
#' Using `tempfile()` ensures that the example code does not rely on specific external files and
#' avoids errors during `R CMD check`. CRAN review checks for documentation correctness
#' and syntax parsing, not the existence of actual files, as long as the example code is syntactically valid.
#'
#' @examples
#' TCGA_normal_file <- system.file("extdata",
#' "SKCM_Skin_TCGA_exp_normal_test.rds",
#' package = "TransProR")
#' gtex_file <- system.file("extdata", "Skin_SKCM_Gtex_test.rds", package = "TransProR")
#' output_file <- file.path(tempdir(), "SKCM_Skin_Combat_Normal_TCGA_GTEX_count.rds")
#'
#' SKCM_Skin_Combat_Normal_TCGA_GTEX_count <- Combat_Normal(
#' TCGA_normal_data_path = TCGA_normal_file,
#' gtex_data_path = gtex_file,
#' CombatNormal_output_path = output_file,
#' auto_mode = TRUE,
#' default_input = "skip"
#' )
#' head(SKCM_Skin_Combat_Normal_TCGA_GTEX_count)[1:5, 1:5]
#'
#' @seealso \code{\link[sva]{ComBat_seq}}
#' @importFrom sva ComBat_seq
#' @importFrom tibble column_to_rownames
#' @export
Combat_Normal <- function(TCGA_normal_data_path, gtex_data_path, CombatNormal_output_path, auto_mode = FALSE, default_input = "11,12") {
# Load the TGCA's normal tissue data and GTEX data
TCGA_normal_data <- readRDS(TCGA_normal_data_path)
gtex_data <- readRDS(gtex_data_path)
# Extract histological types for the TGCA's normal tissue data's samples
NormalHistologicalTypes <- substring(colnames(TCGA_normal_data), 14, 15)
# Filter only the normal samples
normal_data <- TCGA_normal_data[, as.numeric(NormalHistologicalTypes) > 10]
# Display the table to the user
normal_hist_table <- table(NormalHistologicalTypes)
#print(normal_hist_table)
message(" ")
message("NormalHistologicalTypes:")
message(paste(names(normal_hist_table), normal_hist_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Ask the user for input or use default input in auto_mode
if(auto_mode) {
selected_types <- strsplit(default_input, ",")[[1]]
} else {
message("Please input the normal tissue types you wish to retain or 'skip' to only use GTEX data: ")
selected_types <- strsplit(readline(), ",")[[1]]
}
# If the user didn't select 'skip'
if (!is.element("skip", selected_types)) {
# Filter the tumor data based on user's input
normal <- normal_data[, NormalHistologicalTypes %in% selected_types]
# Combine GTEX and selected TCGA data
# Merge the datasets, ensuring both have genes as row names
combined_data <- merge(normal, gtex_data, by = "row.names")
combined_data <- tibble::column_to_rownames(combined_data, var = "Row.names") # Set the row names
# Create group vector (All samples as same group)
combined_group <- rep("all_group", ncol(combined_data))
# Create batch vector for selected normal TCGA samples
tcga_batches <- match(NormalHistologicalTypes[NormalHistologicalTypes %in% selected_types], selected_types)
# Combine the batch vector for TCGA samples with GTEX batch
combined_batch <- c(tcga_batches, rep("GTEX", ncol(gtex_data)))
# Modify the tumor values
combined_data_count <- 2^(combined_data) - 1
combined_data_count <- apply(combined_data_count, 2, as.integer)
rownames(combined_data_count) <- rownames(combined_data)
# Correct for batch effects using ComBat_seq
combat_count <- sva::ComBat_seq(as.matrix(combined_data_count),
batch = combined_batch,
group = combined_group)
# Convert matrix to data frame
combat_count_df <- as.data.frame(combat_count)
saveRDS(combat_count_df, file = CombatNormal_output_path)
return(combat_count_df)
} else {
combat_count_matrix <- 2^(gtex_data) - 1
combat_count_matrix <- apply(combat_count_matrix, 2, as.integer)
rownames(combat_count_matrix) <- rownames(gtex_data)
combat_count_df <- as.data.frame(combat_count_matrix)
saveRDS(combat_count_df, file = CombatNormal_output_path)
return(combat_count_df)
}
}
================================================
FILE: R/CombatTumor.R
================================================
#' Process and Correct Batch Effects in Tumor Data
#'
#' This function takes a tumor data set, asks the user for specific tumor types to retain,
#' and then corrects for batch effects using the ComBat_seq function from the 'sva' package.
#'
#' @description
#' The function first extracts histological types from the provided tumor data set.
#' After displaying these types, the user is prompted to input specific types to retain.
#' The data is then filtered based on this input.
#'
#' Note: This example assumes that different tumor types represent different batches in a general sense.
#' Users need to adjust the batch and group vectors based on real-life scenarios.
#'
#' @param tumor_data_path The path to the tumor data stored in an RDS file.
#' @param CombatTumor_output_path A character string specifying the path where the output RDS file will be saved.
#' @param auto_mode Logical. If set to TRUE, the function will not prompt the user for input and
#' will instead use the values provided in default_input. Default is FALSE.
#' @param default_input Character string. When auto_mode is TRUE, this parameter specifies the default
#' tumor types to be retained. It should be provided as a comma-separated string (e.g., "01,06").
#'
#' @return A data.frame with corrected values after the ComBat_seq adjustment. Note that this function also saves the
#' combat_count_df data as an RDS file at the specified output path.
#'
#' @details
#' The ComBat_seq function from the sva package is used to correct batch effects.
#' The function requires the 'sva' package to be installed and loaded externally.
#'
#' @examples
#' tumor_file <- system.file("extdata",
#' "SKCM_Skin_TCGA_exp_tumor_test.rds",
#' package = "TransProR")
#' output_file <- file.path(tempdir(), "SKCM_combat_count.rds")
#'
#' SKCM_combat_count <- combat_tumor(
#' tumor_data_path = tumor_file,
#' CombatTumor_output_path = output_file,
#' auto_mode = TRUE,
#' default_input = "01,06"
#' )
#'
#' head(SKCM_combat_count)[1:5, 1:5]
#'
#' @seealso \code{\link[sva]{ComBat_seq}}
#' @importFrom sva ComBat_seq
#' @export
combat_tumor <- function(tumor_data_path, CombatTumor_output_path, auto_mode = FALSE, default_input = "01,06") {
# Load the tumor data
tumor_data <- readRDS(tumor_data_path)
# Extract histological types
TumorHistologicalTypes <- substring(colnames(tumor_data), 14, 15)
tumor_hist_table <- table(TumorHistologicalTypes)
# Display the table to the user
message(" ")
message("TumorHistologicalTypes:")
message(paste(names(tumor_hist_table), tumor_hist_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Ask the user for input or use default input in auto_mode
if(auto_mode) {
selected_types <- strsplit(default_input, ",")[[1]]
} else {
message("Please input the tumor types you wish to retain, separated by commas (e.g., 01,06): ")
selected_types <- strsplit(readline(), ",")[[1]]
}
# Filter the tumor data based on user's input
tumor <- tumor_data[, TumorHistologicalTypes %in% selected_types]
# Modify the tumor values
tumor1 <- 2^(tumor) - 1
tumor1 <- apply(tumor1, 2, as.integer)
rownames(tumor1) <- rownames(tumor)
# If only one sample type is chosen, skip batch correction and return modified tumor data
if(length(selected_types) == 1) {
combat_count_df <- as.data.frame(tumor1)
} else {
# Create group vector
selected_group = rep("all_group", length(which(TumorHistologicalTypes %in% selected_types)))
# Create batch vector based on group vector
selected_batch = match(TumorHistologicalTypes[TumorHistologicalTypes %in% selected_types], selected_types)
# Correct for batch effects using ComBat_seq
combat_count <- sva::ComBat_seq(as.matrix(tumor1),
batch = selected_batch,
group = selected_group)
# Convert matrix to data frame
combat_count_df <- as.data.frame(combat_count)
}
saveRDS(combat_count_df, file = CombatTumor_output_path)
return(combat_count_df)
}
================================================
FILE: R/CompareMerge.R
================================================
#' Compare and merge specific columns from two DEG data frames
#'
#' This function takes two DEG data frames, inner joins them by a specified gene column,
#' checks if a specified column is identical across both data frames, and merges them if they are.
#' The resulting data frame will have a merged column named after the compared column.
#'
#' @importFrom dplyr inner_join
#' @param df1 First data frame.
#' @param df2 Second data frame.
#' @param by_gene Column name by which to join the data frames, typically "Gene".
#' @param compare_col Column to compare for identity, which will also be the name of the merged column.
#' @param suffixes Suffixes to use for non-identical column names in the joined data frame.
#' @param df_name Name to assign to the resulting data frame for identification.
#' @return A data frame with processed columns.
#' @examples
#' # Create simulated DESeq2 data
#' DEG_deseq2 <- data.frame(
#' Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"),
#' change = c("up", "down", "no_change", "up", "down"),
#' log2FoldChange = c(2.5, -3.2, 0.1, 1.8, -2.5),
#' pvalue = c(0.01, 0.05, 0.9, 0.02, 0.03)
#' )
#'
#' # Display the first 5 rows of the DESeq2 data
#' head(DEG_deseq2, 5)
#'
#' # Create simulated edgeR data
#' DEG_edgeR <- data.frame(
#' Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"),
#' change = c("up", "down", "no_change", "no_change", "up"),
#' log2FoldChange = c(2.3, -3.1, 0.2, 0.1, 2.7),
#' pvalue = c(0.02, 0.04, 0.8, 0.6, 0.01)
#' )
#'
#' # Display the first 5 rows of the edgeR data
#' head(DEG_edgeR, 5)
#'
#' # Merge the DESeq2 and edgeR data
#' deseq2_edgeR <- compare_merge(
#' df1 = DEG_deseq2,
#' df2 = DEG_edgeR,
#' by_gene = "Gene",
#' compare_col = "change",
#' suffixes = c("_1", "_2"),
#' df_name = "deseq2_edgeR"
#' )
#'
#' @export
compare_merge <- function(df1, df2, by_gene, compare_col, suffixes, df_name) {
# Perform an inner join on the 'Gene' column
merged_df <- dplyr::inner_join(df1, df2, by = by_gene, suffix = suffixes)
# Generate column names for comparison
col1 <- paste0(compare_col, suffixes[1])
col2 <- paste0(compare_col, suffixes[2])
# Check if the specified columns are identical
if (all(merged_df[[col1]] == merged_df[[col2]])) {
# If completely identical, merge these columns into one and rename
merged_df[[compare_col]] <- merged_df[[col1]]
# Remove original compared columns
merged_df[[col1]] <- NULL
merged_df[[col2]] <- NULL
} else {
# Handle non-identical case
message("The columns", col1, "and", col2, "are not identical.\n")
}
# Assign the specified name for identification
merged_df$name <- df_name
# Return the modified data frame
return(merged_df)
}
================================================
FILE: R/Contrast_Venn.R
================================================
#' Function to Create a Venn Diagram of DEGs with Custom Colors
#'
#' This function creates a Venn Diagram using the 'ggVennDiagram' package.
#' It allows customization of various aesthetic elements of the diagram, including colors.
#'
#' @importFrom ggplot2 scale_fill_gradient scale_color_manual scale_x_continuous expansion
#' @importFrom ggVennDiagram ggVennDiagram
#' @param all_degs_venn A list of DEG sets for Venn Diagram creation.
#' @param edge_colors A vector of colors for the edges of the Venn Diagram sets.
#' @param name_color A vector of colors for the names of the sets in the Venn Diagram.
#' @param fill_colors A vector of two colors for the gradient fill of the Venn Diagram.
#' @param label_size The size of the labels showing the number of elements in each set (default is 4).
#' @param edge_size The size of the edges of the Venn Diagram sets (default is 3).
#' @return A `ggplot` object representing the Venn Diagram.
#' @examples
#'
#' data("all_degs_venn", package = "TransProR")
#'
#' edge_colors <- c("#1b62bb","#13822e","#332c3a","#9e2d39")
#' name_color <- c("#1b64bb","#13828e","#337c3a","#9e9d39")
#' fill_colors <- c("#e3f2fa", "#0288d1")
#'
#' Contrast_degs_venn <- Contrast_Venn(all_degs_venn, edge_colors, name_color, fill_colors)
#' @export
#'
Contrast_Venn <- function(all_degs_venn,
edge_colors,
name_color,
fill_colors,
label_size = 4,
edge_size = 3) {
# Draw Venn Diagram
ggVennDiagram::ggVennDiagram(all_degs_venn,
set_size = 5, # Font size for set names
set_color = name_color,
label_alpha = 0, # Transparency of the background box for counts/percentages, default is white
label_size = label_size, # Font size for counts/percentages
edge_size = edge_size # Thickness of the edges
) +
ggplot2::scale_fill_gradient(low = fill_colors[1], high = fill_colors[2]) + # Gradient fill color based on value size
ggplot2::scale_color_manual(values = edge_colors) + # Manually set edge colors
ggplot2::scale_x_continuous(expand = ggplot2::expansion(mult = .3))
}
================================================
FILE: R/DESeq2Analyze.R
================================================
#' Differential Gene Expression Analysis using 'DESeq2'
#'
#' 'DESeq2': Differential gene expression analysis based on the negative binomial distribution.
#' This function utilizes the 'DESeq2' package to conduct differential gene expression analysis.
#' It processes tumor and normal expression data, applies DESeq2 analysis,
#' and outputs the results along with information on gene expression changes.
#'
#' The DESeq2 methodology is based on modeling count data using a negative binomial distribution,
#' which allows for handling the variability observed in gene expression data, especially in
#' small sample sizes. This approach is well-suited for RNA-Seq data analysis.
#'
#' @importFrom DESeq2 DESeqDataSetFromMatrix DESeq results
#' @importFrom dplyr mutate
#' @importFrom tibble column_to_rownames
#' @importFrom stats na.omit
#' @param tumor_file Path to the tumor data file (RDS format).
#' @param normal_file Path to the normal data file (RDS format).
#' @param output_file Path to save the output DEG data (RDS format).
#' @param logFC Threshold for log fold change.
#' @param p_value Threshold for p-value.
#' @return A data frame of differential expression results.
#' @examples
#' # Define file paths for tumor and normal data from the data folder
#' tumor_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
#' package = "TransProR")
#' normal_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
#' package = "TransProR")
#' output_file <- file.path(tempdir(), "DEG_DESeq2.rds")
#'
#' DEG_DESeq2 <- DESeq2_analyze(
#' tumor_file = tumor_file,
#' normal_file = normal_file,
#' output_file = output_file,
#' 2.5,
#' 0.01
#' )
#'
#' # View the top 5 rows of the result
#' head(DEG_DESeq2, 5)
#'
#' @references
#' DESeq2:Differential gene expression analysis based on the negative binomial distribution.
#' For more information, visit the page:
#' https://docs.gdc.cancer.gov/Data/Bioinformatics_Pipelines/Expression_mRNA_Pipeline/
#' @export
DESeq2_analyze <- function(tumor_file, normal_file, output_file, logFC = 2.5, p_value = 0.01) {
# Read and merge data
tumor <- readRDS(tumor_file)
normal <- readRDS(normal_file)
all_count_exp <- merge(tumor, normal, by = "row.names") # Merge the datasets, ensuring both have genes as row names
all_count_exp <- tibble::column_to_rownames(all_count_exp, var = "Row.names") # Set the row names
# Create group factor
group <- factor(c(rep('tumor', ncol(tumor)), rep('normal', ncol(normal))), levels = c("normal", "tumor"))
group_table <- table(group)
message("Group Table:")
message(paste(names(group_table), group_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Prepare DESeq2 dataset
colData <- data.frame(row.names = colnames(all_count_exp), group = group) # Create a dataframe to store the grouping information of samples, with the row names as sample names and the column names as group information.
dds <- DESeq2::DESeqDataSetFromMatrix(countData = all_count_exp, # Expression matrix, with rows as genes and columns as samples, containing integers derived from the calculation of reads or fragments.
colData = colData, # Sample information matrix (dataframe), showing the correspondence between the column names of the expression matrix and the grouping information, with row names as sample names. The first column indicates the treatment status of the sample (control or treatment, tumor or normal, etc.), referred to as the group.
design = ~ group) # Differential comparison matrix, which informs the differential analysis function about the variables between which differences are to be analyzed. Simply put, it specifies which are the controls and which are the treatments. The group refers to the group in colData, which is the grouping information.
# Perform differential expression analysis.
dds <- DESeq2::DESeq(dds)
# Perform DESeq2 analysis
# Extract the results of differential expression and perform a comparison. Here, the 'contrast' parameter specifies the groups to be compared.
# The 'contrast' parameter must be written in a vector format with three elements, and the order cannot be reversed.
res <- DESeq2::results(dds, contrast = c("group", "tumor", "normal"))
# Sort the differential results according to 'padj' (adjusted p-value). This step is necessary only for DESeq2, as limma and edgeR will automatically sort the results.
resOrdered <- res[order(res$padj), ]
DEG <- as.data.frame(stats::na.omit(resOrdered)) # Remove missing values. If this step is not taken, some genes with very low expression levels will result in NA values after calculation, causing difficulties in subsequent analysis and plotting.
# Add a 'change' column to mark up- or down-regulated genes, with the threshold set according to requirements.
DEG <- dplyr::mutate(DEG, change = ifelse(DEG$pvalue < p_value & DEG$log2FoldChange < -logFC, "down",
ifelse(DEG$pvalue < p_value & DEG$log2FoldChange > logFC, "up", "stable")))
# Output table of gene expression changes
change_table <- table(DEG$change)
message(" ")
message("Change Table:")
message(paste(names(change_table), change_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Save results
#save(DEG, file = output_file)
saveRDS(DEG, file = output_file)
# Return results
return(DEG)
}
================================================
FILE: R/EdgeRAnalyze.R
================================================
#' Differential Gene Expression Analysis using 'edgeR'
#'
#' This function performs differential gene expression analysis using the 'edgeR' package.
#' It reads tumor and normal expression data, merges them, filters low-expressed genes,
#' normalizes the data, performs edgeR analysis, and outputs the results along with information
#' on gene expression changes.
#'
#' @importFrom tibble column_to_rownames
#' @importFrom dplyr mutate
#' @importFrom edgeR DGEList cpm calcNormFactors estimateGLMCommonDisp estimateGLMTrendedDisp estimateGLMTagwiseDisp glmFit glmLRT topTags
#' @importFrom stats model.matrix
#' @param tumor_file Path to the tumor data file (RDS format).
#' @param normal_file Path to the normal data file (RDS format).
#' @param output_file Path to save the output DEG data (RDS format).
#' @param logFC_threshold Threshold for log fold change for marking up/down-regulated genes.
#' @param p_value_threshold Threshold for p-value for filtering significant genes.
#' @return A data frame of differential expression results.
#' @references
#' edgeR: Differential analysis of sequence read count data.
#' For more information, visit the edgeR Bioconductor page:
#' https://www.bioconductor.org/packages/release/bioc/vignettes/edgeR/inst/doc/edgeRUsersGuide.pdf
#' @export
#'
#' @examples
#' # Define file paths for tumor and normal data from the data folder
#' tumor_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
#' package = "TransProR")
#' normal_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
#' package = "TransProR")
#' output_file <- file.path(tempdir(), "DEG_edgeR.rds")
#'
#' DEG_edgeR <- edgeR_analyze(
#' tumor_file = tumor_file,
#' normal_file = normal_file,
#' output_file = output_file,
#' 2.5,
#' 0.01
#' )
#'
#' # View the top 5 rows of the result
#' head(DEG_edgeR, 5)
#'
edgeR_analyze <- function(tumor_file, normal_file, output_file, logFC_threshold = 2.5, p_value_threshold = 0.01) {
tumor <- readRDS(tumor_file)
normal <- readRDS(normal_file)
# Merge the datasets, ensuring both have genes as row names
all_count_exp <- merge(tumor, normal, by = "row.names")
all_count_exp <- tibble::column_to_rownames(all_count_exp, var = "Row.names")
# Define groups for tumor and normal samples
group <- c(rep('tumor', ncol(tumor)), rep('normal', ncol(normal)))
group <- factor(group, levels = c("normal", "tumor"))
group_table <- table(group)
message("Group Table:")
message(paste(names(group_table), group_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Create DGEList object for gene expression data and group information
d <- edgeR::DGEList(counts = all_count_exp, group = group)
# Filter lowly expressed genes based on CPM values
keep <- rowSums(edgeR::cpm(d) > 1) >= 2
d <- d[keep, , keep.lib.sizes = FALSE]
# Update library size information in the samples
d$samples$lib.size <- colSums(d$counts)
# Normalize the data using the TMM method
d <- edgeR::calcNormFactors(d)
# Create design matrix for differential analysis model
design <- stats::model.matrix(~0 + factor(group))
rownames(design) <- colnames(d)
colnames(design) <- levels(factor(group))
# Estimate dispersions - common dispersion, trended dispersion, tagwise dispersion
d <- edgeR::estimateGLMCommonDisp(d, design)
d <- edgeR::estimateGLMTrendedDisp(d, design)
d <- edgeR::estimateGLMTagwiseDisp(d, design)
# Fit the model using Generalized Linear Model (GLM)
fit <- edgeR::glmFit(d, design)
# Perform differential expression analysis using Likelihood Ratio Test (LRT)
lrt <- edgeR::glmLRT(fit, contrast = c(-1, 1)) # Note that the 'contrast' here is different from 'DESeq2'. Here, we only need to input c(-1, 1): -1 corresponds to normal, 1 corresponds to tumor.
# Retrieve top differentially expressed genes
nrDEG <- edgeR::topTags(lrt, n = nrow(d))
DEG_edgeR <- as.data.frame(nrDEG)
# Add 'change' column to mark up/down-regulated genes
k1 <- (DEG_edgeR$PValue < p_value_threshold) & (DEG_edgeR$logFC < -logFC_threshold)
k2 <- (DEG_edgeR$PValue < p_value_threshold) & (DEG_edgeR$logFC > logFC_threshold)
DEG_edgeR <- dplyr::mutate(DEG_edgeR, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
change_table <- table(DEG_edgeR$change)
message("Change Table:")
message(paste(names(change_table), change_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Save results to the specified output file
saveRDS(DEG_edgeR, file = output_file)
return(DEG_edgeR)
}
================================================
FILE: R/EnrichCirclize.R
================================================
#' Adjust and Export Pathway Analysis Results
#'
#' This function processes a dataframe containing fgsea results. It adjusts pathway names
#' by removing underscores, converting to lowercase, then capitalizing the first letter,
#' and joining the components with spaces. It selects and merges the top upregulated
#' and downregulated pathways based on enrichment score (ES) and p-value.
#'
#' @param fgseaRes Dataframe containing fgsea results with columns 'pathway', 'ES', and 'pval'.
#' @param nTop Integer, number of top pathways to select based on the p-value.
#' @return A vector containing combined top upregulated and downregulated pathways.
#' @importFrom Hmisc capitalize
#' @export
#' @examples
#' # Create a synthetic fgseaRes dataframe
#'fgseaRes <- data.frame(
#' pathway = c("KEGG_APOPTOSIS",
#' "GO_CELL_CYCLE",
#' "REACTOME_DNA_REPAIR",
#' "KEGG_METABOLISM",
#' "GO_TRANSPORT"),
#' ES = c(0.45, -0.22, 0.56, -0.35, 0.33),
#' pval = c(0.001, 0.02, 0.0003, 0.05, 0.01)
#')
#'
#' # Run the function to get top pathways
#'result <- adjust_export_pathway(fgseaRes = fgseaRes, nTop = 2)
#'
adjust_export_pathway <- function(fgseaRes, nTop = 10) {
# Adjust pathway names
fgseaRes$pathway <- as.character(fgseaRes$pathway)
for(i in 1:nrow(fgseaRes)){
message("Processing row ", i)
term = fgseaRes$pathway[i]
### 1. Split the string
term = unlist(strsplit(term, split="_", fixed=TRUE))[-1]
### 2. Convert to lowercase, then capitalize the first letter
term = Hmisc::capitalize(tolower(term))
### 3. Concatenate with spaces
term = paste(term, collapse=" ")
### 4. Data export
fgseaRes$pathway[i] = term
}
# Select top upregulated pathways
topPathwaysUp <- fgseaRes[fgseaRes$ES > 0,][order(fgseaRes$pval[fgseaRes$ES > 0]), 'pathway'][1:nTop]
# Select top downregulated pathways
topPathwaysDown <- fgseaRes[fgseaRes$ES < 0,][order(fgseaRes$pval[fgseaRes$ES < 0]), 'pathway'][1:nTop]
# Combine top pathways and convert any potential list to a vector
combinedPathways <- unlist(c(topPathwaysUp, rev(topPathwaysDown)), use.names = FALSE)
return(list(combinedPathways = combinedPathways, fgseaRes = fgseaRes))
}
#' Randomly Select Pathways with Limited Word Count
#'
#' This function randomly selects a specified number of pathways from a given list, ensuring that each selected pathway name does not exceed a specified number of words. It filters out pathways with names longer than the specified word limit before making the selection.
#'
#' @param pathways Character vector of pathways.
#' @param max_words Integer, maximum number of words allowed in the pathway name.
#' @param num_select Integer, number of pathways to randomly select.
#' @return A character vector of selected pathways.
#' @export
#' @examples
#' pathway_list <- c("pathway_one response to stimulus",
#' "pathway_two cell growth and death",
#' "pathway_three regulation of cellular process",
#' "pathway_four metabolic process")
#' selected_pathways <- selectPathways(pathway_list, max_words = 5, num_select = 2)
#'
selectPathways <- function(pathways, max_words = 10, num_select = 10) {
# Check input
if (!is.character(pathways)) {
stop("Please provide a character vector of pathways.")
}
# Filter pathways with word count not exceeding max_words
filtered_pathways <- pathways[sapply(pathways, function(x) length(strsplit(x, " ")[[1]]) <= max_words)]
# Randomly select num_select pathways from the filtered list
if (length(filtered_pathways) >= num_select) {
selected_pathways <- sample(filtered_pathways, num_select)
} else {
warning("Not enough pathways with <= ", max_words, " words. Returning as many as possible.")
selected_pathways <- sample(filtered_pathways, length(filtered_pathways))
}
return(selected_pathways)
}
#' Draw Dual-Sided Legends on a Plot
#'
#' This function creates two sets of legends, one on the left and one on the right side of a plot.
#' It displays color-coded legends with labels corresponding to different data categories.
#' Each legend entry consists of a colored rectangle and a text label. The left side legend has
#' text aligned to the right of the color block, while the right side legend has text aligned
#' to the left of the color block.
#'
#' @param labels Vector of labels for the legends.
#' @param colors Vector of colors corresponding to the labels.
#' @param legend_width The width of each legend viewport expressed in grid units.
#' @param x_positions Numeric vector of length 2 specifying the x-positions of the left and right legends.
#' @param y_position The y-position common for both legends, expressed as a fraction of the plot height.
#' @param just_positions List of two vectors, each specifying the horizontal and vertical justification for the legends.
#' @param text_alignments List of two character strings specifying text alignments for the legends ('left' or 'right').
#' @param font_size Numeric value specifying the font size for the legend labels.
#' @return Invisible. This function is called for its side effects of drawing legends on a plot.
#' @importFrom grid pushViewport viewport grid.roundrect grid.text upViewport unit
#' @export
#' @examples
#' labels <- c("Label1", "Label2", "Label3", "Label4", "Label5", "Label6")
#' colors <- c("#ff0000", "#00ff00", "#0000ff", "#ffff00", "#ff00ff", "#00ffff")
#'
#' # Convert to 'unit' objects for grid
#' grid::grid.roundrect(
#' x = grid::unit(0.5, "npc"), # "npc" stands for normalized parent coordinates
#' y = grid::unit(0.5, "npc"),
#' width = grid::unit(0.1, "npc"),
#' height = grid::unit(0.05, "npc"),
#' gp = grid::gpar(fill = "red"),
#' r = grid::unit(0.1, "npc") # rounding radius
#' )
#'
#' # Example of drawing legends with specific labels and colors
#' drawLegends(labels, colors, grid::unit(2, "cm"), c(0.225, 0.75), 0.5,
#' list(c("left", "center"), c("right", "center")),
#' list("right", "left"), 10)
#'
drawLegends <- function(labels, colors, legend_width, x_positions, y_position, just_positions, text_alignments, font_size) {
half_length <- length(labels) / 2
legend_height <- grid::unit(1, "lines") * half_length
# Draw left-side legend
grid::pushViewport(grid::viewport(
width = legend_width,
height = legend_height,
x = x_positions[1],
y = y_position,
just = just_positions[[1]]
))
for (i in seq_len(half_length)) {
grid::grid.roundrect(
x = grid::unit(1, "npc") - grid::unit(0.5, "cm"),
y = grid::unit(1, "npc") - grid::unit(i / half_length, "npc") + grid::unit(0.5 / half_length, "npc"),
width = grid::unit(0.7, "cm"),
height = grid::unit(0.9 / half_length, "npc"),
gp = grid::gpar(fill = colors[i], col = NA),
r = grid::unit(0.3, "snpc")
)
grid::grid.text(
labels[i],
x = grid::unit(1, "npc") - grid::unit(1, "cm"),
y = grid::unit(1, "npc") - grid::unit(i / half_length, "npc") + grid::unit(0.5 / half_length, "npc"),
gp = grid::gpar(col = colors[i], fontsize = font_size),
just = text_alignments[[1]]
)
}
grid::upViewport()
# Draw right-side legend
grid::pushViewport(grid::viewport(
width = legend_width,
height = legend_height,
x = x_positions[2],
y = y_position,
just = just_positions[[2]]
))
for (i in (half_length + 1):length(labels)) {
grid::grid.roundrect(
x = grid::unit(1, "npc") - grid::unit(0.6, "cm"),
y = grid::unit(1, "npc") - grid::unit((i - half_length) / half_length, "npc") + grid::unit(0.5 / half_length, "npc"),
width = grid::unit(0.7, "cm"),
height = grid::unit(0.9 / half_length, "npc"),
gp = grid::gpar(fill = colors[i], col = NA),
r = grid::unit(0.3, "snpc")
)
grid::grid.text(
labels[i],
x = grid::unit(1, "npc") - grid::unit(0.1, "cm"),
y = grid::unit(1, "npc") - grid::unit((i - half_length) / half_length, "npc") + grid::unit(0.5 / half_length, "npc"),
gp = grid::gpar(col = colors[i], fontsize = font_size),
just = text_alignments[[2]]
)
}
grid::upViewport()
}
#' Draw Chord Diagram with Legends
#'
#' This function creates a chord diagram from a specified dataframe and draws two sets of legends for it.
#' It adjusts the track height of the chord diagram to optimize space and uses specified colors for the grid.
#' Legends are drawn at specified positions with configurable text alignments and font sizes.
#'
#' @param all_combined_df A dataframe containing the matrix for the chord diagram.
#' @param original_colors A vector of colors for the grid columns of the chord diagram.
#' @param labels A vector of labels for the first legend.
#' @param colors A vector of colors corresponding to the first legend's labels.
#' @param labels2 A vector of labels for the second legend.
#' @param colors2 A vector of colors corresponding to the second legend's labels.
#' @param font_size The font size used for legend texts, defaults to 10.
#' @return Invisible, primarily used for its side effects of drawing on a graphics device.
#' @importFrom circlize chordDiagram
#' @importFrom grid unit
#' @importFrom graphics strwidth
#' @export
#' @examples
#' # Sample Chord Diagram Matrix
#' all_combined_df <- data.frame(
#' A = c(10, 20, 30),
#' B = c(15, 25, 35),
#' C = c(5, 10, 15)
#' )
#' rownames(all_combined_df) <- c("A", "B", "C")
#'
#' # Colors for the grid of the chord diagram (corresponding to columns of the matrix)
#' original_colors <- c("red", "green", "blue")
#'
#' # Name the colors according to the sectors (A, B, C)
#' names(original_colors) <- colnames(all_combined_df)
#'
#' # Labels and Colors for the First Legend
#' labels <- c("Label 1", "Label 2", "Label 3")
#' colors <- c("yellow", "purple", "cyan")
#'
#' # Labels and Colors for the Second Legend
#' labels2 <- c("Label A", "Label B", "Label C")
#' colors2 <- c("orange", "pink", "brown")
#'
#' # Font size for the legend texts (optional, default is 10)
#' font_size <- 10
#'
#' # Call the enrichment_circlize function with the sample data
#' # This is just an example; the plot will be rendered in an appropriate graphics context
#' # such as RStudio's plot pane or an external plotting window.
#' plot1 <- enrichment_circlize(all_combined_df,
#' original_colors,
#' labels,
#' colors,
#' labels2,
#' colors2,
#' font_size
#' )
#'
enrichment_circlize <- function(all_combined_df, original_colors, labels, colors,
labels2, colors2, font_size = 10) {
# Calculate adjusted height for the chord diagram
max_height <- max(graphics::strwidth(unlist(dimnames(all_combined_df)), "inches")) * 1.2
# Draw the chord diagram
circlize::chordDiagram(all_combined_df, grid.col = original_colors, annotationTrack = "grid",
directional = -1, direction.type = c("diffHeight", "arrows"),
link.arr.type = "big.arrow", preAllocateTracks = list(track.height = max_height))
# Draw the first set of legends
legend_width <- grid::unit(2, "cm")
x_positions <- c(0.225, 0.75)
y_position <- 0.5
just_positions <- list(c("left", "center"), c("right", "center"))
text_alignments <- list("right", "left")
drawLegends(labels, colors, legend_width, x_positions, y_position, just_positions, text_alignments, font_size)
# Draw the second set of legends
x_positions2 <- c(0.3, 0.68)
y_position2 <- 0.7
drawLegends(labels2, colors2, legend_width, x_positions2, y_position2, just_positions, text_alignments, font_size)
}
================================================
FILE: R/EnrichCircoBar.R
================================================
#' Extract and Count Descriptions with Specified Color
#'
#' This function filters a data frame for specified descriptions, selects the 'Description' and 'Count' columns,
#' and adds a new column with a specified color.
#'
#' @param df A data frame containing at least 'Description' and 'Count' columns.
#' @param descriptions A vector of descriptions to filter in the data frame.
#' @param color A character string specifying the color to be added as a new column.
#' @return A data frame filtered by descriptions, containing 'Description', 'Count', and a new 'color' column.
#' @export
#'
#' @examples
#' # Generate Sample Input Data for extract_descriptions_counts Function
#'
#' # Create a sample data frame with 'Description' and 'Count' columns
#' data <- data.frame(
#' Description = c(
#' "immunoglobulin production",
#' "B cell mediated immunity",
#' "T cell activation",
#' "antigen processing and presentation",
#' "cytokine signaling",
#' "natural killer cell activity",
#' "phagocytosis",
#' "complement activation",
#' "antibody-dependent cellular cytotoxicity",
#' "regulatory T cell function"
#' ),
#' Count = c(
#' 150, # immunoglobulin production
#' 200, # B cell mediated immunity
#' 175, # T cell activation
#' 125, # antigen processing and presentation
#' 190, # cytokine signaling
#' 160, # natural killer cell activity
#' 140, # phagocytosis
#' 180, # complement activation
#' 130, # antibody-dependent cellular cytotoxicity
#' 170 # regulatory T cell function
#' ),
#' stringsAsFactors = FALSE # Ensure that strings are not converted to factors
#' )
#'
#'
#'
#' descriptions_to_filter <- c("immunoglobulin production", "B cell mediated immunity")
#' specified_color <- "red" # You can specify any color you desire
#' filtered_data_with_color <- extract_descriptions_counts(
#' data, descriptions_to_filter,
#' specified_color)
#' print(filtered_data_with_color)
#'
extract_descriptions_counts <- function(df, descriptions, color) {
# Filter rows where the Description column values are in the descriptions vector
result_df <- df[df$Description %in% descriptions, ]
# Select Description and Count columns
result_df <- result_df[, c("Description", "Count")]
# Add a new column 'color' with the specified color value
result_df$color <- color
return(result_df)
}
#' Combine and Visualize Data with Circular Bar Chart
#'
#' This function combines multiple data frames, arranges them, and visualizes the combined data
#' in a Circular Bar Chart using the 'ggplot2' and 'ggalluvial' packages.
#'
#' @importFrom dplyr bind_rows arrange desc row_number mutate
#' @importFrom ggplot2 ggplot geom_bar geom_text scale_fill_manual scale_y_continuous scale_x_continuous coord_polar labs theme_minimal theme element_rect element_blank
#' @importFrom rlang .data
#' @param data_list A list of data frames to be combined.
#' @return A `ggplot` object representing the Circular Bar Chart.
#' @export
#'
#' @examples
#' # Create sample data frames for each enrichment category
#'
#' # 1. Biological Process (BP)
#' filtered_data_BP <- data.frame(
#' Description = c(
#' "immune response",
#' "cell proliferation",
#' "signal transduction",
#' "apoptotic process",
#' "metabolic process"
#' ),
#' Count = c(120, 85, 150, 60, 95),
#' color = c(
#' "#1f77b4", # blue
#' "#ff7f0e", # orange
#' "#2ca02c", # green
#' "#d62728", # red
#' "#9467bd" # purple
#' ),
#' stringsAsFactors = FALSE
#' )
#'
#' # 2. Cellular Component (CC)
#' filtered_data_CC <- data.frame(
#' Description = c(
#' "nucleus",
#' "cytoplasm",
#' "membrane",
#' "mitochondrion",
#' "extracellular space"
#' ),
#' Count = c(90, 110, 75, 65, 80),
#' color = c(
#' "#1f77b4",
#' "#ff7f0e",
#' "#2ca02c",
#' "#d62728",
#' "#9467bd"
#' ),
#' stringsAsFactors = FALSE
#' )
#'
#' # 3. Molecular Function (MF)
#' filtered_data_MF <- data.frame(
#' Description = c(
#' "protein binding",
#' "DNA binding",
#' "enzyme activity",
#' "transporter activity",
#' "receptor activity"
#' ),
#' Count = c(140, 130, 100, 70, 90),
#' color = c(
#' "#1f77b4",
#' "#ff7f0e",
#' "#2ca02c",
#' "#d62728",
#' "#9467bd"
#' ),
#' stringsAsFactors = FALSE
#' )
#'
#' # 4. Disease Ontology (DO)
#' filtered_data_DO <- data.frame(
#' Description = c(
#' "cancer",
#' "cardiovascular disease",
#' "neurological disorder",
#' "metabolic disease",
#' "infectious disease"
#' ),
#' Count = c(200, 150, 120, 90, 160),
#' color = c(
#' "#1f77b4",
#' "#ff7f0e",
#' "#2ca02c",
#' "#d62728",
#' "#9467bd"
#' ),
#' stringsAsFactors = FALSE
#' )
#'
#' # 5. Reactome Pathways
#' filtered_data_Reactome <- data.frame(
#' Description = c(
#' "Cell Cycle",
#' "Apoptosis",
#' "DNA Repair",
#' "Signal Transduction",
#' "Metabolism of Proteins"
#' ),
#' Count = c(110, 95, 80, 130, 85),
#' color = c(
#' "#1f77b4",
#' "#ff7f0e",
#' "#2ca02c",
#' "#d62728",
#' "#9467bd"
#' ),
#' stringsAsFactors = FALSE
#' )
#'
#' # 6. KEGG Pathways
#' filtered_data_kegg <- data.frame(
#' Description = c(
#' "PI3K-Akt signaling pathway",
#' "MAPK signaling pathway",
#' "NF-kappa B signaling pathway",
#' "JAK-STAT signaling pathway",
#' "Toll-like receptor signaling pathway"
#' ),
#' Count = c(175, 160, 145, 130, 155),
#' color = c(
#' "#1f77b4",
#' "#ff7f0e",
#' "#2ca02c",
#' "#d62728",
#' "#9467bd"
#' ),
#' stringsAsFactors = FALSE
#' )
#'
#' # Combine all filtered data frames into a list
#' data_list <- list(
#' BP = filtered_data_BP,
#' CC = filtered_data_CC,
#' MF = filtered_data_MF,
#' DO = filtered_data_DO,
#' Reactome = filtered_data_Reactome,
#' KEGG = filtered_data_kegg
#' )
#'
#' # Create the Circular Bar Chart
#' combined_and_visualized_data <- enrich_circo_bar(data_list)
#'
enrich_circo_bar <- function(data_list) {
# Combine data frames
combined_data <- dplyr::bind_rows(data_list)
# Sort by 'Count' column in descending order and add an 'id' column
combined_data <- combined_data %>%
dplyr::arrange(.data$Count) %>%
dplyr::mutate(id = dplyr::row_number())
# Ensure 'Description' is a factor with correct level order
combined_data <- combined_data %>%
dplyr::mutate(Description = factor(.data$Description, levels = unique(.data$Description)))
# Extract fill colors, ensuring colors match 'Description' levels
fill_colors <- combined_data$color[match(levels(combined_data$Description), combined_data$Description)]
# Calculate the expanded max values for Count and id
max_count <- max(combined_data$Count) + (max(combined_data$Count) / 5)
max_id <- max(combined_data$id) + 1.5
# Create the plot
p <- ggplot2::ggplot(combined_data, ggplot2::aes(x = .data$id, y = .data$Count, fill = .data$Description)) +
ggplot2::geom_bar(stat = 'identity', width = 0.7) +
ggplot2::geom_text(ggplot2::aes(x = .data$id, y = 0, label = .data$Description), hjust = 1.03, size = 3.5, color = fill_colors) +
ggplot2::scale_fill_manual(values = fill_colors, guide = "none") +
ggplot2::scale_y_continuous(expand = c(0, 0), limits = c(0, max_count), position = 'right') +
ggplot2::scale_x_continuous(expand = c(0, 0), limits = c(-1, max_id)) +
ggplot2::coord_polar(theta = 'y') +
ggplot2::labs(title = 'Enrichment CircularBar Chart', subtitle = "Including: BP/MF/CC/DO/KEGG/Reactome") +
ggplot2::theme_minimal() +
ggplot2::theme(plot.background = ggplot2::element_rect(fill = 'white', color = 'white'),
axis.title = ggplot2::element_blank(),
axis.text = ggplot2::element_blank())
return(p)
}
================================================
FILE: R/EnrichPolarBubble.R
================================================
#' Enrichment Polar Bubble Plot
#'
#' This function creates a polar bubble plot using 'ggplot2'. It is designed to visually represent data with methods and positional metrics integrated, highlighting specific IDs if necessary.
#'
#' @param final_combined_df_with_id_and_position A data frame containing 'id', 'Count', 'method', 'Description', 'point_position', 'test_color'.
#' @param pal A named vector of colors corresponding to the 'method' values.
#' @param highlight_ids A vector of IDs to highlight.
#' @importFrom ggplot2 ggplot aes geom_point geom_hline geom_segment scale_fill_manual scale_size coord_polar theme_void scale_color_manual scale_x_continuous scale_y_continuous annotate theme scale_fill_identity
#' @importFrom dplyr filter
#' @importFrom stringr str_c
#' @importFrom tibble tibble
#' @importFrom geomtextpath geom_textpath
#' @importFrom ggalt stat_xspline
#' @importFrom ggnewscale new_scale_fill
#' @return A `ggplot` object representing the enriched polar bubble plot.
#' @examples
#' final_df <- data.frame(id = 1:10, Count = c(10, 20, 30, 40, 50, 60, 70, 80, 90, 100),
#' method = rep("Method1", 10),
#' Description = LETTERS[1:10],
#' point_position = seq(10, 100, 10),
#' test_color = sample(c("red", "blue"), 10, replace = TRUE))
#' pal <- c("Method1" = "blue")
#' highlight_ids <- c(1, 5, 9)
#' enrich_polar_bubble(final_df, pal, highlight_ids)
#' @export
enrich_polar_bubble <- function(final_combined_df_with_id_and_position, pal, highlight_ids) {
# Extract unique levels of point_position and prepend 0
levels <- c(0, sort(unique(final_combined_df_with_id_and_position$point_position)))
max_level_plus_five <- max(levels) + 5
max_id_plus_one <- max(final_combined_df_with_id_and_position$id) + 1
gg <- ggplot2::ggplot(data = final_combined_df_with_id_and_position,
ggplot2::aes(x = .data$id, y = .data$Count, group = .data$method, fill = .data$method, color = .data$method)) +
ggplot2::geom_hline(yintercept = levels, color = "grey85") +
ggplot2::geom_hline(yintercept = max_level_plus_five, color = "grey15") +
ggplot2::geom_segment(data = tibble::tibble(x = 1:max_id_plus_one, y = 0, yend = max_level_plus_five),
ggplot2::aes(x = .data$x, xend = .data$x, y = .data$y, yend = .data$yend),
inherit.aes = FALSE, color = "grey85") +
ggplot2::geom_segment(data = tibble::tibble(x = 1:max_id_plus_one, y = max(levels), yend = max_level_plus_five),
ggplot2::aes(x = .data$x, xend = .data$x, y = .data$y, yend = .data$yend),
inherit.aes = FALSE, color = "grey15") +
geomtextpath::geom_textpath(ggplot2::aes(x = final_combined_df_with_id_and_position$id, y = max_level_plus_five + 5, label = final_combined_df_with_id_and_position$Description, angle = 55),
inherit.aes = FALSE, hjust = 0, size = 5.5, color = final_combined_df_with_id_and_position$test_color) +
ggplot2::geom_point(ggplot2::aes(x = .data$id, y = .data$point_position, size = .data$Count),
shape = 21, alpha = 0.6) +
ggalt::stat_xspline(geom = "line", spline_shape = 0.25, linewidth = 0.75, alpha = 0.4) +
ggalt::stat_xspline(geom = "area", alpha = 0.25, spline_shape = 0.25, outline.type = "upper") +
ggplot2::scale_size(range = c(0, 14), breaks = levels,
guide = ggplot2::guide_legend(title = "Count")) +
ggplot2::scale_fill_manual(values = pal) +
ggplot2::scale_color_manual(values = pal) +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_continuous(limits = c(-50, max_level_plus_five + 15), expand = c(0, 0)) +
ggplot2::coord_polar() +
ggplot2::annotate("text", x = 1, y = levels, label = "-", hjust = 1, size = 4) +
ggplot2::annotate("text", x = 1.1, y = levels, label = levels, hjust = 0, size = 4) +
ggplot2::annotate("text", x = 1, y = -50, label = stringr::str_c("BP/CC/MF/KEGG/DO/REACTOME"), size = 4) +
ggplot2::theme_void() +
ggplot2::theme(plot.background = ggplot2::element_rect(fill = "white", color = NA)) +
ggnewscale::new_scale_fill() +
ggplot2::geom_rect(data = dplyr::filter(final_combined_df_with_id_and_position, .data$id %in% highlight_ids),
ggplot2::aes(xmin = .data$id - 0.5, xmax = .data$id + 0.5, ymin = 0, ymax = max_level_plus_five, fill = .data$test_color),
alpha = 0.1, inherit.aes = FALSE) +
ggplot2::scale_fill_identity() # Use colors specified in 'test_color'
return(gg)
}
================================================
FILE: R/EnrichmentSpiralize.R
================================================
#' Extract and Store Top Pathways for Each Sample
#'
#' This function processes a dataframe containing SSGSEA KEGG results. It allows specifying the number
#' of top pathways to extract for each sample based on their scores, and stores these in a new dataframe
#' with sample names and pathway scores.
#'
#' @param ssgsea_kegg Dataframe containing SSGSEA KEGG results with samples as columns and pathways as rows.
#' @param nTop Integer, number of top pathways to select for each sample.
#' @return A dataframe with columns 'Pathway', 'Sample', and 'Value' representing the top pathways for each sample.
#' @importFrom utils head
#' @export
#' @examples
#' # Example: Generating input data for the extract_ntop_pathways function
#'
#' # Define example pathways
#' pathways <- c("Pathway_A", "Pathway_B", "Pathway_C", "Pathway_D", "Pathway_E",
#' "Pathway_F", "Pathway_G", "Pathway_H", "Pathway_I", "Pathway_J")
#'
#' # Define example samples
#' samples <- c("Sample_1", "Sample_2", "Sample_3")
#'
#' # Generate random SSGSEA KEGG scores between 0 and 1
#' set.seed(123) # For reproducibility
#' ssgsea_scores <- matrix(runif(length(pathways) * length(samples), min = 0, max = 1),
#' nrow = length(pathways), ncol = length(samples),
#' dimnames = list(pathways, samples))
#'
#' # Convert to a data frame
#' ssgsea_kegg <- as.data.frame(ssgsea_scores)
#'
#' # Extract the top 3 pathways for each sample
#' top_pathways <- extract_ntop_pathways(ssgsea_kegg, nTop = 3)
#'
extract_ntop_pathways <- function(ssgsea_kegg, nTop = 5) {
# Initialize an empty data frame to store the results
results <- data.frame(Pathway = character(), Sample = character(), Value = numeric(), stringsAsFactors = FALSE)
# Iterate through each sample, starting from the first column
for (i in 1:ncol(ssgsea_kegg)) {
sample_name <- colnames(ssgsea_kegg)[i]
# To avoid factor type errors, ensure the data is numeric
column_data <- as.numeric(ssgsea_kegg[[i]])
# Create a new data frame with numeric data for sorting and extracting
pathway_data <- data.frame(Pathway = rownames(ssgsea_kegg), Value = column_data, stringsAsFactors = FALSE)
# Sort by value in descending order and take the top nTop entries
top_paths <- utils::head(pathway_data[order(-pathway_data$Value),], nTop)
# Bind to the results data frame
results <- rbind(results, data.frame(Pathway = top_paths$Pathway, Sample = sample_name, Value = top_paths$Value))
}
return(results)
}
#' Extract Positive Pathways from SSGSEA Results and Select Random Samples
#'
#' This function processes the results of SSGSEA, specifically focusing on KEGG pathways.
#' It extracts pathways with positive values from each sample and randomly selects a subset of them.
#'
#' @param ssgsea_kegg A matrix or data frame with pathways as rows and samples as columns.
#' @param max_paths_per_sample Integer, maximum number of pathways to select per sample.
#' @return A data frame with selected pathways, samples, and their corresponding values.
#' @export
#' @examples
#' # Example: Generating input data for the extract_positive_pathways function
#'
#' # Define example pathways
#' pathways <- c("Pathway_1", "Pathway_2", "Pathway_3", "Pathway_4", "Pathway_5",
#' "Pathway_6", "Pathway_7", "Pathway_8", "Pathway_9", "Pathway_10")
#'
#' # Define example samples
#' samples <- c("Sample_A", "Sample_B", "Sample_C")
#'
#' # Generate random SSGSEA KEGG scores including both positive and negative values
#' set.seed(456) # For reproducibility
#' ssgsea_scores <- matrix(rnorm(length(pathways) * length(samples), mean = 0, sd = 1),
#' nrow = length(pathways), ncol = length(samples),
#' dimnames = list(pathways, samples))
#'
#' # Convert to a data frame
#' ssgsea_kegg <- as.data.frame(ssgsea_scores)
#'
#' # Use the extract_positive_pathways function to extract up to 3 positive pathways per sample
#' selected_positive_pathways <- extract_positive_pathways(ssgsea_kegg, max_paths_per_sample = 3)
#'
extract_positive_pathways <- function(ssgsea_kegg, max_paths_per_sample = 5) {
# Initialize an empty data frame to store the results
results <- data.frame(Pathway = character(), Sample = character(), Value = numeric(), stringsAsFactors = FALSE)
# Iterate over each sample
for (i in 1:ncol(ssgsea_kegg)) {
sample_name <- colnames(ssgsea_kegg)[i]
# Ensure the data is numeric
column_data <- as.numeric(ssgsea_kegg[[i]])
# Create a new data frame with pathway names and values
pathway_data <- data.frame(Pathway = rownames(ssgsea_kegg), Value = column_data, stringsAsFactors = FALSE)
# Filter for positive values
positive_paths <- pathway_data[pathway_data$Value > 0,]
# If there are positive values, randomly select a few pathways
if (nrow(positive_paths) > 0) {
selected_paths <- positive_paths[sample(nrow(positive_paths), min(max_paths_per_sample, nrow(positive_paths))),]
# Bind to the results data frame
results <- rbind(results, data.frame(Pathway = selected_paths$Pathway, Sample = sample_name, Value = selected_paths$Value))
}
}
return(results)
}
#' Adjust Color Tone by Modifying Saturation and Luminance
#'
#' This function adjusts the saturation and luminance of a given color. It works by converting
#' the color from RGB to Luv color space, applying the scaling factors to the saturation and luminance,
#' and then converting it back to RGB.
#'
#' @param color A color in hexadecimal format (e.g., "#FF0000") or a valid R color name.
#' @param saturation_scale Numeric, the scaling factor for saturation (values < 1 decrease saturation, values > 1 increase saturation).
#' @param luminance_scale Numeric, the scaling factor for luminance (values < 1 darken the color, values > 1 lighten the color).
#' @return Returns a color in hexadecimal format adjusted according to the provided scales.
#' @importFrom grDevices convertColor col2rgb rgb
#' @export
#' @examples
#' adjusted_color <- adjust_color_tone("#FF0000", saturation_scale = 0.8, luminance_scale = 1.2)
#' print(adjusted_color)
#'
adjust_color_tone <- function(color, saturation_scale, luminance_scale) {
# Convert the input color to RGB, then to Luv color space
rgb <- t(grDevices::col2rgb(color) / 255)
luv <- grDevices::convertColor(rgb, from = "sRGB", to = "Luv")
# Apply scaling factors to saturation and luminance
luv[, 2:3] <- luv[, 2:3] * saturation_scale # Adjust saturation
luv[, 1] <- luv[, 1] * luminance_scale # Adjust luminance
# Convert back to RGB and correct color values to stay within the valid range
rgb_new <- grDevices::convertColor(luv, from = "Luv", to = "sRGB")
rgb_new <- rgb_new * 255
rgb_new[rgb_new > 255] <- 255 # Prevent color values from exceeding the maximum
# Convert adjusted RGB values back to hexadecimal format
apply(rgb_new, 1, function(x) grDevices::rgb(x[1], x[2], x[3], maxColorValue = 255))
}
#' Render a Spiral Plot Using Run-Length Encoding
#'
#' This function creates a spiral plot for visualizing sequential data in a compact and visually appealing way.
#' It uses run-length encoding to represent the lengths and colors of sequences in the spiral.
#'
#' @param x A vector representing categories or segments.
#' @param samples A vector indicating the sample each segment belongs to.
#' @param values Numeric vector indicating the lengths of each segment.
#' @param colors Character vector specifying the colors for each segment.
#' @param labels Logical, whether to add labels to each segment.
#' @importFrom grid gpar unit
#' @importFrom spiralize spiral_rect spiral_text spiral_initialize spiral_track
#' @export
#' @return No return value, called for side effects. This function generates a spiral plot and optionally adds labels.
#' @examples
#' # Example: Creating a spiral plot using the spiral_newrle function
#'
#' # Define example data
#' x <- c("A", "A", "B", "C")
#' samples <- c("Sample1", "Sample1", "Sample2", "Sample2")
#' values <- c(20, 30, 15, 35)
#' colors <- c("red", "blue", "green", "purple")
#' labels <- TRUE
#'
#' # Initialize the spiral plot, setting the x-axis range and scaling
#' spiralize::spiral_initialize(xlim = c(0, sum(values)), scale_by = "curve_length",
#' vp_param = list(x = grid::unit(0, "npc"), just = "left"))
#'
#' # Create a track for the spiral plot
#' spiralize::spiral_track(height = 0.5)
#'
#' # Add segments to the spiral plot using run-length encoding
#' spiral_newrle(x, samples, values, colors, labels)
#'
spiral_newrle <- function(x, samples, values, colors, labels = FALSE) {
x <- as.vector(x) # Ensure x is a vector
samples <- as.vector(samples) # Ensure samples is a vector
values <- as.numeric(values) # Ensure values are numeric
position_start <- 0 # Initialize starting position
current_sample <- samples[1] # Start with the first sample
cumulative_start <- position_start # Initialize cumulative start for labels
# Loop through each value
for (i in seq_along(values)) {
position_end <- position_start + values[i] # Calculate end position
# Use the specified color, defaulting to red if missing
color <- if (!is.na(colors[i])) colors[i] else "red"
# Draw the segment in the spiral
spiralize::spiral_rect(position_start, 0, position_end, 1, gp = grid::gpar(fill = color, col = NA))
# Check for sample change or last element
if (i == length(values) || samples[i + 1] != current_sample) {
if (labels) {
label_position <- (cumulative_start + position_end) / 2
spiralize::spiral_text(label_position, 0.5, current_sample, facing = "curved_inside", nice_facing = TRUE)
}
cumulative_start <- position_end # Reset for next sample
if (i < length(values)) {
current_sample <- samples[i + 1]
}
}
position_start <- position_end # Move to next start position
}
}
#' Create Spiral Plots with Legends Using 'spiralize' and 'ComplexHeatmap'
#'
#' This function initializes a spiral plot, adds tracks for pathways and samples,
#' and generates legends based on the sample and pathway information in the provided data frame.
#' It uses 'spiralize' for the spiral plot and 'ComplexHeatmap' for handling legends.
#'
#' @param results A data frame containing 'Pathway', 'Sample', 'Value', 'PathwayColor', and 'SampleColor' columns.
#' @importFrom grid gpar
#' @importFrom spiralize spiral_initialize spiral_track
#' @importFrom ComplexHeatmap packLegend Legend draw
#' @importFrom ggplot2 unit
#' @export
#' @return No return value, called for side effects. This function generates spiral plots and adds legends based on sample and pathway information.
#' @examples
#' # Example: Creating enrichment spiral plots with legends
#'
#' # Define the results data frame
#' results <- data.frame(
#' Pathway = c("Pathway1", "Pathway1", "Pathway2", "Pathway2", "Pathway3"),
#' Sample = c("Sample1", "Sample1", "Sample2", "Sample2", "Sample3"),
#' Value = c(20, 30, 15, 35, 25),
#' PathwayColor = c("red", "red", "blue", "blue", "orange"),
#' SampleColor = c("green", "green", "purple", "purple", "cyan"),
#' stringsAsFactors = FALSE
#' )
#'
#' # Create the enrichment spiral plots with legends
#' enrichment_spiral_plots(results)
#'
enrichment_spiral_plots <- function(results) {
if (!requireNamespace("systemfonts", quietly = TRUE)) {
stop("ggplot2 is required to use the function. Please install it.", call. = FALSE)
}
# Calculate the total value for setting the x-axis range
n <- sum(results$Value)
# Initialize the spiral plot
spiralize::spiral_initialize(xlim = c(0, n), scale_by = "curve_length",
vp_param = list(x = ggplot2::unit(0, "npc"), just = "left"))
# Add a track for pathways
spiralize::spiral_track(height = 0.4)
spiral_newrle(results$Pathway, results$Sample, results$Value, results$PathwayColor, labels = FALSE)
# Add a track for samples
spiralize::spiral_track(height = 0.4)
spiral_newrle(results$Sample, results$Sample, results$Value, results$SampleColor, labels = TRUE)
# Generate legends based on sample, using unique pathway and color information
lgd_list <- tapply(1:nrow(results), results$Sample, function(ind) {
ComplexHeatmap::Legend(title = results$Sample[ind][1], at = unique(results$Pathway[ind]),
legend_gp = grid::gpar(fill = unique(results$PathwayColor[ind])))
})
# Set the maximum height for the legends and draw them
lgd <- ComplexHeatmap::packLegend(list = lgd_list, max_height = ggplot2::unit(7, "inch"))
ComplexHeatmap::draw(lgd, x = ggplot2::unit(1, "npc") + ggplot2::unit(1, "mm"), just = "left")
}
================================================
FILE: R/FacetDensityFoldchange.R
================================================
#' Create faceted high-density region plots with optional points and density contours
#'
#' This function creates faceted high-density region plots using ggdensity for
#' adding optional density rug and density contours, and scatter points. It also adds a regression line
#' and Pearson correlation label. The plot is faceted by a grouping variable.
#'
#' @param data Data frame containing variables for plotting.
#' @param x_var Name of the x-axis variable as a string.
#' @param y_var Name of the y-axis variable as a string.
#' @param group_var Name of the grouping variable for color mapping as a string.
#' @param facet_var Name of the faceting variable.
#' @param palette Color palette for the plot as a character vector.
#' @param show_points Logical, if TRUE adds scatter points to the plot.
#' @param show_density Logical, if TRUE adds filled density contours to the plot.
#' @param point_size Size of the points, relevant if show_points is TRUE.
#' @param point_alpha Transparency level of the points, relevant if show_points is TRUE.
#' @param line_size Size of the regression line.
#' @param cor_method Method to calculate correlation ("pearson" or "spearman").
#' @param cor_label_pos Vector of length 2 indicating the position of the correlation label (x and y).
#' @param cor_vjust Vertical justification for correlation label, default is NULL.
#' @return A `ggplot` object representing the high-density region plot.
#' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth scale_fill_manual scale_color_manual facet_wrap theme margin
#' @importFrom ggdensity geom_hdr geom_hdr_rug
#' @importFrom ggpubr stat_cor
#' @importFrom hrbrthemes theme_ipsum
#' @importFrom grid unit
#' @importFrom stats as.formula
#' @examples
#' combined_df_file <- system.file("extdata", "combined_df.rds", package = "TransProR")
#' combined_df <- readRDS(combined_df_file)
#' pal2 = c("#2787e0","#1a9ae0","#1dabbf","#00897b","#43a047","#7cb342")
#' all_facet_density_foldchange_name1 <- facet_density_foldchange(
#' data = combined_df,
#' x_var = "log2FoldChange_1",
#' y_var = "log2FoldChange_2",
#' group_var = "name",
#' facet_var = "name",
#' palette = pal2,
#' show_points = TRUE,
#' show_density = FALSE,
#' point_size = 2,
#' point_alpha = 0.1,
#' line_size = 1.6,
#' cor_method = "pearson",
#' cor_label_pos = c("left", "top"),
#' cor_vjust = 1
#' )
#' @export
facet_density_foldchange <- function(data,
x_var,
y_var,
group_var,
facet_var,
palette,
show_points = FALSE,
show_density = TRUE,
point_size = 2.5,
point_alpha = 0.1,
line_size = 1.6,
cor_method = "pearson",
cor_label_pos = c("left", 0.97),
cor_vjust = NULL) {
# Begin constructing the ggplot
plot <- ggplot2::ggplot(data, ggplot2::aes_string(x = x_var, y = y_var, fill = group_var))
# Optionally add density rug
plot <- plot + ggdensity::geom_hdr_rug()
# Optionally add density contours
if (show_density) {
plot <- plot + ggdensity::geom_hdr()
}
# Optionally add points
if (show_points) {
plot <- plot + ggplot2::geom_point(ggplot2::aes_string(color = group_var), shape = 21,
size = point_size, alpha = point_alpha)
}
# Add regression line and correlation label
plot <- plot +
ggplot2::geom_smooth(ggplot2::aes_string(x = x_var, y = y_var, color = group_var),
method = 'lm', level = 0.95, size = line_size)
# Add regression line and correlation label
if (is.null(cor_vjust)) {
plot <- plot + ggpubr::stat_cor(ggplot2::aes_string(color = group_var), method = cor_method, label.x.npc = cor_label_pos[1], label.y.npc = cor_label_pos[2])
} else {
plot <- plot + ggpubr::stat_cor(ggplot2::aes_string(color = group_var), method = cor_method, label.x.npc = cor_label_pos[1], label.y.npc = cor_label_pos[2], vjust = cor_vjust)
}
# Customize scales and facet wrapping
plot <- plot +
ggplot2::scale_fill_manual(values = palette) +
ggplot2::scale_color_manual(values = palette) +
ggplot2::facet_wrap(stats::as.formula(paste0("~ ", facet_var)), scales = "free_x") +
hrbrthemes::theme_ipsum() +
ggplot2::theme(plot.margin = ggplot2::margin(10, 10, 10, 10),
plot.background = ggplot2::element_rect(fill = "white", color = "white"),
panel.spacing = grid::unit(2, "mm"))
# Return the `ggplot` object
return(plot)
}
================================================
FILE: R/FilterDiffGenes.R
================================================
#' Filter Differentially Expressed Genes
#'
#' This function filters a data frame to identify genes with significant differential expression
#' based on specified thresholds for p-values and log fold change. It allows for flexible
#' input of column names for p-values and log fold change.
#'
#' @importFrom dplyr filter
#' @importFrom rlang sym
#' @param data A data frame containing gene expression data.
#' @param p_val_col Character string indicating the column name for p-values. Default is "adj.P.Val".
#' @param log_fc_col Character string indicating the column name for log fold change. Default is "logFC".
#' @param p_val_threshold Numeric threshold for filtering p-values. Default is 0.05.
#' @param log_fc_threshold Numeric threshold for filtering log fold change. Default is 1.0.
#' @return A data frame with genes filtered by the specified criteria.
#' @export
#'
#' @examples
#' # Create a sample data frame with p-values and log fold changes
#' sample_data <- data.frame(
#' adj.P.Val = c(0.03, 0.06, 0.02, 0.07),
#' logFC = c(1.5, 0.8, -1.2, 1.1),
#' gene = c("Gene1", "Gene2", "Gene3", "Gene4")
#' )
#'
#' # Use the filter_diff_genes function to filter significant genes
#' filtered_genes <- filter_diff_genes(sample_data)
#' print(filtered_genes)
filter_diff_genes <- function(data, p_val_col = "adj.P.Val", log_fc_col = "logFC",
p_val_threshold = 0.05, log_fc_threshold = 1.0) {
# Check for the specified columns in the data frame
if (!(p_val_col %in% names(data))) {
stop(paste("Column", p_val_col, "not found in the data frame."))
}
if (!(log_fc_col %in% names(data))) {
stop(paste("Column", log_fc_col, "not found in the data frame."))
}
# Filter the data based on the specified thresholds for p-values and log fold change
filtered_data <- data %>%
dplyr::filter(!!rlang::sym(p_val_col) < p_val_threshold) %>%
dplyr::filter(abs(!!rlang::sym(log_fc_col)) > log_fc_threshold)
# Return the filtered data frame
return(filtered_data)
}
================================================
FILE: R/FourDegsVenn.R
================================================
#' Function to Filter Differentially Expressed Genes (DEGs)
#'
#' This function filters out genes based on their expression change status.
#' It returns the names of genes which are not "stable".
#'
#' @param df A data frame containing gene expression data.
#' @return A vector of gene names that are differentially expressed.
#' @examples
#' DEG_deseq2_file <- system.file("extdata", "DEG_deseq2.rds", package = "TransProR")
#' DEG_deseq2 <- readRDS(DEG_deseq2_file)
#' DEG_deseq2_test <- deg_filter(DEG_deseq2)
#' @export
deg_filter <- function(df){
# Selecting gene names where change is not "stable"
rownames(df)[df$change != "stable"]
}
#' Function to Create a Venn Diagram of DEGs
#'
#' This function creates a Venn Diagram using the ggVennDiagram package.
#' It allows customization of various aesthetic elements of the diagram.
#' @importFrom ggplot2 alpha scale_fill_gradient scale_color_manual scale_x_continuous expansion
#' @importFrom ggVennDiagram ggVennDiagram
#' @param degs_list A list of DEG sets for Venn Diagram creation.
#' @return A ggplot object representing the Venn Diagram.
#' @examples
#' data("all_degs_venn", package = "TransProR")
#' four_degs_venn <- four_degs_venn(all_degs_venn)
#' @export
four_degs_venn <- function(degs_list){
# Defining edge colors and alpha transparency for the Venn Diagram
edge_colors <- c(ggplot2::alpha("#1b64bb", 0.5), ggplot2::alpha("#13828e", 0.5),
ggplot2::alpha("#337c3a", 0.5), ggplot2::alpha("#9e9d39", 0.5))
edge_color <- ggplot2::alpha(c("#1b64bb","#13828e","#337c3a","#9e9d39"), 0.8)
# Creating a Venn Diagram using 'ggVennDiagram'
ggVennDiagram::ggVennDiagram(
degs_list,
set_size = 5, # Font size for group names
set_color = edge_color, # Color for group names
label_alpha= 0, # Transparency of background box for labels
label_size = 4, # Font size for labels
edge_size = 3 # Thickness of edges
) +
ggplot2::scale_fill_gradient(low="#e1f2f1", high = "#11786b") + # Gradient fill based on values
ggplot2::scale_color_manual(values = edge_colors) + # Manually set edge colors
ggplot2::scale_x_continuous(expand = ggplot2::expansion(mult = .3)) # Adjusting x-axis scaling
}
================================================
FILE: R/GatherGraphEdge.R
================================================
#' Gather graph edge from data frame
#' Please note that this function is from the 'ggraph' package and has not been altered in functionality,
#' but it has been optimized and iterated.
#' It is not original content of 'TransProR'.
#' However, since 'ggraph' caused frequent GitHub Action errors during the creation of 'TransProR',
#' the author directly referenced the involved functions in 'TransProR'.
#' This is not the author's original creation. All users please be aware!
#' @param df A data frame
#' @param index A vector of column names to group by
#' @param root Root name for the root node connections, optional
#' @return A tibble of graph edges
#' @export
#' @importFrom dplyr mutate select group_by summarise bind_rows all_of across
#' @importFrom tidyr unite
#' @importFrom tibble as_tibble
#' @name gather_graph_edge
gather_graph_edge <- function(df, index = NULL, root = NULL) {
if (length(index) < 2) {
stop("Please specify at least two index columns.")
}
prepare_edge <- function(data, from, to, sep = "/") {
data %>%
tidyr::unite("from", dplyr::all_of(from), sep = sep, remove = FALSE) %>%
tidyr::unite("to", dplyr::all_of(to), sep = sep) %>%
dplyr::select(.data$from, .data$to) %>%
dplyr::mutate(dplyr::across(c("from", "to"), as.character))
}
edges <- lapply(seq_along(index)[-1], function(i) {
prepare_edge(df, index[1:(i - 1)], index[1:i])
})
edges <- dplyr::bind_rows(edges)
edges <- tibble::as_tibble(edges)
if (!is.null(root)) {
root_edges <- df %>%
dplyr::group_by(.data[[index[1]]]) %>%
dplyr::summarise(count = dplyr::n(), .groups = 'drop') %>%
dplyr::mutate(from = root, to = as.character(.data[[index[1]]])) %>%
dplyr::select(.data$from, .data$to)
edges <- dplyr::bind_rows(root_edges, edges)
}
return(edges)
}
================================================
FILE: R/GatherGraphNode.R
================================================
#' Gather graph nodes from a data frame
#' Please note that this function is from the 'ggraph' package and has not been altered in functionality,
#' but it has been optimized and iterated.
#' It is not original content of 'TransProR'.
#' However, since 'ggraph' caused frequent GitHub Action errors during the creation of 'TransProR',
#' the author directly referenced the involved functions in 'TransProR'.
#' This is not the author's original creation. All users please be aware!
#' @inheritParams gather_graph_edge
#' @param value Column name used for summarizing node size, defaults to the last column
#' @return a tibble of graph nodes
#' @export
#' @importFrom dplyr group_by summarise mutate bind_rows n all_of across
#' @importFrom tidyr unite
#' @importFrom tibble as_tibble
#' @importFrom utils tail
gather_graph_node <- function(df, index = NULL, value = utils::tail(colnames(df), 1), root = NULL) {
if (length(index) < 2) {
stop("Please specify at least two index columns.")
} else {
nodes_list <- lapply(seq_along(index), function(i) {
dots <- index[1:i]
df %>%
dplyr::group_by(dplyr::across(dplyr::all_of(dots))) %>%
dplyr::summarise(node.size = sum(.data[[value]], na.rm = TRUE),
node.level = index[i],
node.count = dplyr::n(), .groups = 'drop') %>%
dplyr::mutate(node.short_name = as.character(.data[[dots[length(dots)]]]),
node.branch = as.character(.data[[dots[1]]])) %>%
tidyr::unite(col = "node.name", all_of(dots), sep = "/")
})
nodes <- dplyr::bind_rows(nodes_list) %>%
tibble::as_tibble()
nodes$node.level <- factor(nodes$node.level, levels = index)
if (!is.null(root)) {
root_data <- data.frame(
node.name = root,
node.size = sum(df[[value]], na.rm = TRUE),
node.level = root,
node.count = 1,
node.short_name = root,
node.branch = root,
stringsAsFactors = FALSE
)
nodes <- dplyr::bind_rows(root_data, nodes)
nodes$node.level <- factor(nodes$node.level, levels = c(root, index))
}
return(nodes)
}
}
================================================
FILE: R/GeneColor.R
================================================
#' Merge Genes with Color Information Based on Up/Down Regulation
#'
#' This function merges selected genes with differential expression data and adds a color column based on up/down regulation.
#'
#' @param selected_genes A data frame containing selected genes with a column named "Symble".
#' @param DEG_deseq2 A data frame containing differential expression data with a column named "Symble" and a column named "change" indicating up/down regulation.
#' @param up_color The color to assign to genes with up-regulated expression.
#' @param down_color The color to assign to genes with down-regulated expression.
#' @return A data frame containing merged genes with an additional color column.
#' @export
#'
#' @examples
#' selected_genes_deseq2_file <- system.file("extdata",
#' "selected_genes_deseq2.rds",
#' package = "TransProR")
#' selected_genes_deseq2 <- readRDS(selected_genes_deseq2_file)
#' Diff_deseq2_file <- system.file("extdata", "Diff_deseq2.rds", package = "TransProR")
#' Diff_deseq2 <- readRDS(Diff_deseq2_file)
#'
#' result_deseq2 <- gene_color(selected_genes_deseq2, Diff_deseq2, "#0000EE", "#fc4746")
gene_color <- function(selected_genes, DEG_deseq2, up_color, down_color) {
# Ensure that selected_genes has a column named "Symble"
if (!"Symble" %in% colnames(selected_genes)) {
stop("selected_genes data frame must have a 'Symble' column.")
}
# Ensure that the row names column of DEG_deseq2 is correctly set to "Symble"
DEG_deseq2$Symble <- rownames(DEG_deseq2)
# Merge data frames: add the 'change' column from DEG_deseq2 to the selected_genes data frame
merged_genes <- merge(selected_genes, DEG_deseq2[, c("Symble", "change")], by = "Symble", all.x = TRUE)
# Add color column based on the 'change' column values
merged_genes$color <- ifelse(merged_genes$change == "up", up_color,
ifelse(merged_genes$change == "down", down_color, NA))
# Ensure that the length of the color column matches the number of rows in merged_genes
if (length(merged_genes$color) != nrow(merged_genes)) {
stop("Color assignment failed due to unexpected values in 'change' column.")
}
return(merged_genes)
}
================================================
FILE: R/GeneHighlights.R
================================================
#' Add gene highlights to a ggtree object
#'
#' This function enhances a `ggtree` plot by adding highlights for specific genes. It adds both a semi-transparent fan-shaped
#' highlight and a point at the node corresponding to each gene. Colors for each gene can be customized.
#'
#' @param ggtree_obj A ggtree object to which the highlights will be added.
#' @param genes_to_highlight A data frame containing genes and their corresponding colors.
#' @param hilight_extend Integer, the extension of the highlight fan in degrees.
#' @return A ggtree object with added gene highlights.
#' @importFrom ggtree geom_hilight geom_point2
#' @importFrom dplyr filter select pull
#' @examples
#' data("gtree", package = "TransProR")
#'
#' # Define genes and their colors
#' genes_df <- data.frame(Symble = c("t5", "t9"),
#' color = c("#FF0000", "#0000FF"))
#'
#' # Add highlights
#' gtree <- gene_highlights(gtree, genes_to_highlight = genes_df)
#'
#' @export
gene_highlights <- function(ggtree_obj, genes_to_highlight, hilight_extend = 18) {
# Ensure the input is a `ggtree` object
if (!inherits(ggtree_obj, "ggtree")) {
stop("The first argument must be a ggtree object.")
}
# Ensure the second argument is a data frame
if (!("data.frame" %in% class(genes_to_highlight))) {
stop("The second argument must be a data frame.")
}
if (!requireNamespace("systemfonts", quietly = TRUE)) {
stop("ggplot2 is required to use the function. Please install it.", call. = FALSE)
}
# Extract tree data and ensure it is in a data frame format
tree_data <- as.data.frame(ggtree_obj$data)
# Map gene names and colors to the tree nodes and create geom_hilight and geom_point2 layers for each node
highlight_commands <- lapply(1:nrow(genes_to_highlight), function(i) {
gene <- genes_to_highlight$Symble[i]
color <- genes_to_highlight$color[i]
node <- dplyr::filter(tree_data, .data$label == gene) %>%
dplyr::select(node) %>%
dplyr::pull()
if (!is.na(node)) {
list(
ggtree::geom_hilight(node=node, fill=color, alpha = .3, extend = hilight_extend),
ggtree::geom_point2(ggtree::aes(subset = (.data$label == gene)), color=color, size=2, alpha=0.6)
)
} else {
warning(paste("Gene", gene, "not found in the ggtree object."))
NULL
}
})
# Remove NULL elements from the list of commands
highlight_commands <- Filter(Negate(is.null), highlight_commands)
# Flatten the list of commands and apply them to the `ggtree` object
ggtree_obj <- ggtree_obj + do.call(c, highlight_commands)
return(ggtree_obj)
}
================================================
FILE: R/GeneMapPathway.R
================================================
#' Create Pathway-Gene Mapping Data Frame
#'
#' This function takes multiple data frames and pathway IDs, merging them into a new data frame.
#' Each data frame represents a type of analysis (e.g., BP, KEGG, MF, etc.).
#' @param BP_dataframe Data frame for Biological Process analysis
#' @param BP_ids Selected pathway IDs for Biological Process analysis
#' @param KEGG_dataframe Data frame for KEGG analysis
#' @param KEGG_ids Selected pathway IDs for KEGG analysis
#' @param MF_dataframe Data frame for Molecular Function analysis
#' @param MF_ids Selected pathway IDs for Molecular Function analysis
#' @param REACTOME_dataframe Data frame for REACTOME analysis
#' @param REACTOME_ids Selected pathway IDs for REACTOME analysis
#' @param CC_dataframe Data frame for Cellular Component analysis
#' @param CC_ids Selected pathway IDs for Cellular Component analysis
#' @param DO_dataframe Data frame for Disease Ontology analysis
#' @param DO_ids Selected pathway IDs for Disease Ontology analysis
#' @return A new data frame that includes pathways, gene, type, and value columns
#' @export
#' @examples
#' # Simulating data for different analysis types
#'
#' # Simulate Biological Process (BP) data frame
#' BP_df <- data.frame(
#' ID = c("GO:0002376", "GO:0019724"),
#' geneID = c("GENE1/GENE2", "GENE3/GENE4"),
#' Description = c("Immune response", "Glycosylation process")
#' )
#'
#' # Simulate KEGG data frame
#' KEGG_df <- data.frame(
#' ID = c("12345", "67890"),
#' geneID = c("GENE5/GENE6", "GENE7/GENE8"),
#' Description = c("Pathway 1", "Pathway 2")
#' )
#'
#' # Simulate Molecular Function (MF) data frame
#' MF_df <- data.frame(
#' ID = c("ABC123", "DEF456"),
#' geneID = c("GENE9/GENE10", "GENE11/GENE12"),
#' Description = c("Molecular function A", "Molecular function B")
#' )
#'
#' # Simulate REACTOME data frame
#' REACTOME_df <- data.frame(
#' ID = c("R-HSA-12345", "R-HSA-67890"),
#' geneID = c("GENE13/GENE14", "GENE15/GENE16"),
#' Description = c("Pathway in Reactome 1", "Pathway in Reactome 2")
#' )
#'
#' # Simulate Cellular Component (CC) data frame
#' CC_df <- data.frame(
#' ID = c("GO:0005575", "GO:0005634"),
#' geneID = c("GENE17/GENE18", "GENE19/GENE20"),
#' Description = c("Cellular component A", "Cellular component B")
#' )
#'
#' # Simulate Disease Ontology (DO) data frame
#' DO_df <- data.frame(
#' ID = c("DOID:123", "DOID:456"),
#' geneID = c("GENE21/GENE22", "GENE23/GENE24"),
#' Description = c("Disease A", "Disease B")
#' )
#'
#' # Example pathway IDs for each analysis
#' BP_ids <- c("GO:0002376", "GO:0019724")
#' KEGG_ids <- c("12345", "67890")
#' MF_ids <- c("ABC123", "DEF456")
#' REACTOME_ids <- c("R-HSA-12345", "R-HSA-67890")
#' CC_ids <- c("GO:0005575", "GO:0005634")
#' DO_ids <- c("DOID:123", "DOID:456")
#'
#' # Generate the pathway-gene map using the gene_map_pathway function
#' pathway_gene_map <- gene_map_pathway(
#' BP_dataframe = BP_df, BP_ids = BP_ids,
#' KEGG_dataframe = KEGG_df, KEGG_ids = KEGG_ids,
#' MF_dataframe = MF_df, MF_ids = MF_ids,
#' REACTOME_dataframe = REACTOME_df, REACTOME_ids = REACTOME_ids,
#' CC_dataframe = CC_df, CC_ids = CC_ids,
#' DO_dataframe = DO_df, DO_ids = DO_ids
#' )
#'
#' # Display the resulting pathway-gene mapping data frame
#' print(pathway_gene_map)
#'
gene_map_pathway <- function(BP_dataframe, BP_ids, KEGG_dataframe, KEGG_ids, MF_dataframe, MF_ids, REACTOME_dataframe, REACTOME_ids, CC_dataframe, CC_ids, DO_dataframe, DO_ids) {
# Create an empty data frame
pathway_gene_map <- data.frame(
pathway_description = character(),
gene4 = character(),
type = character(),
value = integer()
)
# Helper function to extract information from a data frame and add it to the new data frame
add_to_map <- function(df, ids, type) {
if (is.null(df) || is.null(ids)) return()
selected_rows <- df[df$ID %in% ids, ]
for (row in seq(nrow(selected_rows))) {
genes <- strsplit(as.character(selected_rows$geneID[row]), "/")[[1]]
# Here, instead of the ID, we use the Description column
description <- as.character(selected_rows$Description[row])
for (gene in genes) {
pathway_gene_map <<- rbind(pathway_gene_map, data.frame(
pathway = description,
gene = gene,
type = type,
value = 1
))
}
}
}
# Apply the helper function to add data for each type of analysis
add_to_map(BP_dataframe, BP_ids, "BP")
add_to_map(KEGG_dataframe, KEGG_ids, "KEGG")
add_to_map(MF_dataframe, MF_ids, "MF")
add_to_map(REACTOME_dataframe, REACTOME_ids, "REACTOME")
add_to_map(CC_dataframe, CC_ids, "CC")
add_to_map(DO_dataframe, DO_ids, "DO")
return(pathway_gene_map)
}
================================================
FILE: R/GetGtexExp.R
================================================
#' Get GTEx Expression Data for Specific Organ
#'
#' This function retrieves gene expression data from the GTEx project that is specific to a certain organ.
#' It performs various checks and processing steps to ensure that the data is consistent and relevant to the
#' specified organ. The filtered and cleaned data is saved as an RDS file for further analysis.
#'
#' @param organ_specific A character string specifying the organ to filter the gene expression data by.
#' @param file_path A character string specifying the path to the GTEx gene expression data file.
#' @param probe_map_path A character string specifying the path to the gtex_probeMap_gencode data file.
#' @param pheno_path A character string specifying the path to the GTEx phenotype data file.
#' @param output_path A character string specifying the path where the output RDS file will be saved.
#'
#' @details The function begins by checking if the gene expression and phenotype data files exist at
#' the specified paths. It then loads these data files and processes them by setting appropriate row names,
#' modifying column names for clarity, and filtering samples based on the specified organ. The function ensures
#' that only samples present in both datasets are retained for consistency. It also removes any duplicate gene
#' entries to prevent redundancy. Finally, the processed data is saved as an RDS file.
#'
#' @return A data frame containing gene expression data for the specified organ.
#' Rows represent genes, and columns represent samples. Note that this function also saves the
#' organ-specific GTEx data as an RDS file at the specified output path.
#'
#' @note The function will stop and throw an error if the input files do not exist, or if no samples are found
#' for the specified organ.
#'
#' @note CRITICAL: The 'output_path' parameter must end with '.rds' to be properly recognized by the function. It is also highly recommended
#' that the path includes specific identifiers related to the target samples. Please structure the 'output_path' following this pattern: './your_directory/your_sample_type.gtex.rds'.
#'
#' @importFrom utils read.table
#' @importFrom dplyr distinct filter
#' @importFrom rlang .data
#' @export
get_gtex_exp <- function(organ_specific,
file_path,
probe_map_path,
pheno_path,
output_path) {
# Check for the existence of the file paths
if (!file.exists(file_path) | !file.exists(pheno_path) | !file.exists(probe_map_path)) {
stop("One or more of the input files do not exist.")
}
# Load the gene expression, probe map, and phenotype data files from the provided paths
# gtex.exp <- data.table::fread(file_path, header = TRUE, sep = '\t', data.table = FALSE)
# gtex.pro <- data.table::fread(probe_map_path, header = TRUE, sep = '\t', data.table = FALSE)
# gtex.phe <- data.table::fread(pheno_path, header = TRUE, sep = '\t', data.table = FALSE)
# Load the gene expression, probe map, and phenotype data files
gtex.exp <- utils::read.table(file_path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE)
gtex.pro <- utils::read.table(probe_map_path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE)
gtex.phe <- utils::read.table(pheno_path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE)
# Merge the probe map with the expression data
gtex.pro <- gtex.pro[, c(1,2)] # Assuming the columns of interest are the first two
gtex.count.pro <- merge(gtex.pro, gtex.exp, by.x = "id", by.y = "sample")
# Set the row names for the samples, facilitating subsequent operations
rownames(gtex.phe) <- gtex.phe$Sample
# Modify column names to be more intuitive
colnames(gtex.phe) <- c("Sample", "body_site_detail (SMTSD)", "primary_site", "gender", "patient", "cohort")
# Filter samples based on the specified organ
specific_samples <- dplyr::filter(gtex.phe, .data$primary_site == organ_specific)
# If no corresponding samples are found, halt the function with an error message
if (nrow(specific_samples) == 0) {
stop("No samples found for the specified organ.")
}
# Print the number of samples found for the specified organ
message("Number of samples for", organ_specific, ":", nrow(specific_samples), "\n")
# Ensure processing only for samples present in both expression and phenotype data through intersection
valid_sample_names <- intersect(rownames(specific_samples), colnames(gtex.count.pro)) # merge_phe_count_gtex
gtex_data <- gtex.count.pro[, c("gene", valid_sample_names)] # Extract data for relevant samples
# Remove duplicate gene entries and set row names as gene names
gtex_data <- dplyr::distinct(gtex_data, .data$gene, .keep_all = TRUE)
rownames(gtex_data) <- gtex_data$gene
gtex_data <- gtex_data[, -1] # Remove the 'gene' column, keeping only expression data
# Save the results as an RDS file for future data analysis tasks
saveRDS(gtex_data, output_path)
return(gtex_data)
}
================================================
FILE: R/GetTcgaExp.R
================================================
#' TCGA Expression Data Processing
#'
#' This function processes expression data and phenotype information, separates tumor and normal samples,
#' and saves the results into different files. It's specifically designed for data obtained from TCGA.
#'
#' @param counts_file_path File path to the counts data (usually in the form of a large matrix with gene expression data).
#' @param gene_probes_file_path File path containing the gene probes data.
#' @param phenotype_file_path File path to the phenotype data, which includes various sample attributes.
#' @param output_file_path Path where the output files, distinguished between tumor and normal, will be saved.
#'
#' @return A list containing matrices for tumor and normal expression data.
#'
#' @note IMPORTANT: This function assumes that the input files follow a specific format and structure, typically found in TCGA data releases.
#' Users should verify their data's compatibility. Additionally, the function does not perform error checking on the data's content,
#' which users should handle through proper preprocessing.
#'
#' @note CRITICAL: The 'output_file_path' parameter must end with '.rds' to be properly recognized by the function. It is also highly recommended
#' that the path includes specific identifiers related to the target samples, as the function will create further subdivisions in the specified
#' path for tumor or normal tissues. Please structure the 'output_file_path' following this pattern: './your_directory/your_sample_type.exp.rds'.
#'
#' @importFrom dplyr distinct filter
#' @importFrom utils read.table
#' @importFrom rlang .data
#' @export
#' @author Dongyue Yu
#'
#' @examples
#' counts_file <- system.file("extdata", "TCGA-SKCM.htseq_counts_test.tsv", package = "TransProR")
#' gene_probes_file <- system.file("extdata",
#' "TCGA_gencode.v22.annotation.gene.probeMap_test",
#' package = "TransProR")
#' phenotype_file <- system.file("extdata", "TCGA-SKCM.GDC_phenotype_test.tsv", package = "TransProR")
#' ouput_file <- file.path(tempdir(), "SKCM_Skin_TCGA_exp_test.rds")
#'
#' SKCM_exp <- get_tcga_exp(
#' counts_file_path = counts_file,
#' gene_probes_file_path = gene_probes_file,
#' phenotype_file_path = phenotype_file,
#' output_file_path = ouput_file
#' )
#' head(SKCM_exp[["tumor_tcga_data"]])[1:5, 1:5]
#' head(SKCM_exp[["normal_tcga_data"]], n = 10) # Because there is only one column.
get_tcga_exp <- function(counts_file_path,
gene_probes_file_path,
phenotype_file_path,
output_file_path) {
# Load expression matrix
# count_data <- data.table::fread(counts_file_path, header = TRUE, sep = '\t', data.table = FALSE)
count_data <- utils::read.table(counts_file_path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE)
# Load gene ID conversion information
# gene_probes <- data.table::fread(gene_probes_file_path, header = TRUE, sep = '\t', data.table = FALSE)
gene_probes <- utils::read.table(gene_probes_file_path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE)
# Keep only necessary columns
gene_probes <- gene_probes[, c(1, 2)]
# Merge gene ID information with expression matrix
count_probes_merged <- merge(gene_probes, count_data, by.x = "id", by.y = "Ensembl_ID")
# Remove duplicates
count_data_unique <- dplyr::distinct(count_probes_merged, .data$gene, .keep_all = TRUE)
# Set gene names as row names
rownames(count_data_unique) <- count_data_unique$gene
count_data_final <- count_data_unique[, -c(1,2)] # Remove extra columns
# Load clinical information
# phenotype_data <- data.table::fread(phenotype_file_path, header = TRUE, sep = '\t', data.table = FALSE)
# Load clinical information with proper column types
phenotype_data <- utils::read.table(phenotype_file_path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE,
colClasses = list(
withdrawn = "logical",
releasable.project = "logical",
is_ffpe.samples = "logical",
oct_embedded.samples = "logical"
))
rownames(phenotype_data) <- phenotype_data$submitter_id.samples # Set sample names as row names
# Check first if there are any "Metastatic", "Primary Tumor", or "normal" samples in your data
table(phenotype_data$sample_type.samples)
# Create a data frame for tumor samples
tumor_samples <- phenotype_data %>%
dplyr::filter(grepl("Metastatic|Primary Tumor", .data$sample_type.samples, ignore.case = TRUE))
# Check for 'Primary Tumor' or 'Metastatic' samples
if (nrow(tumor_samples) == 0) {
message("No 'Primary Tumor' or 'Metastatic' samples found.\n")
} else {
message("Number of 'Primary Tumor' or 'Metastatic' samples: ", nrow(tumor_samples), "\n")
}
# Create a data frame for normal samples
normal_samples <- phenotype_data %>%
dplyr::filter(grepl("normal", .data$sample_type.samples, ignore.case = TRUE))
if (nrow(normal_samples) == 0) {
message("No 'normal' samples found.\n")
} else {
message("Number of 'normal' samples:", nrow(normal_samples), "\n")
}
# Get the intersection of clinical information and expression matrix
tumor_common_samples <- intersect(rownames(tumor_samples), colnames(count_data_final))
# Extract corresponding expression matrix
tumor_expression_data <- count_data_final[, tumor_common_samples]
# Get the intersection of clinical information and expression matrix
normal_common_samples <- intersect(rownames(normal_samples), colnames(count_data_final))
# Extract corresponding expression matrix
normal_expression_data <- count_data_final[, normal_common_samples, drop = FALSE]
# Save results
tumor_output_path <- gsub("\\.rds$", "_tumor.rds", output_file_path)
normal_output_path <- gsub("\\.rds$", "_normal.rds", output_file_path)
saveRDS(tumor_expression_data, file = tumor_output_path)
saveRDS(normal_expression_data, file = normal_output_path)
result <- list(
tumor_tcga_data = tumor_expression_data,
normal_tcga_data = normal_expression_data
)
return(result)
}
================================================
FILE: R/HighlightByNode.R
================================================
#' Highlight Nodes in a Phylogenetic Tree with Custom Fill Colors and Transparency
#'
#' This function adds highlights to specific nodes in a phylogenetic tree represented by a `ggtree` object.
#' Users can specify the nodes to highlight along with custom fill colors, transparency, and extension options.
#'
#' @importFrom ggtree geom_hilight
#' @param ggtree_object A `ggtree` object representing the phylogenetic tree.
#' @param nodes A character vector specifying the nodes to highlight.
#' @param fill_colors A character vector specifying the fill colors for the highlighted nodes.
#' @param alpha_values A numeric vector specifying the transparency values for the highlighted nodes (between 0 and 1).
#' @param extend_values A logical vector specifying whether to extend the highlight to the whole clade below each node.
#' @return A modified `ggtree` object with the specified nodes highlighted.
#' @export
#'
#' @examples
#' plot_file <- system.file("extdata", "tree_plot.rds", package = "TransProR")
#' p2_plot <- readRDS(plot_file)
#'
#' # Please replace the following vectors with your specific values
#' nodes <- c(117, 129, 125, 127, 119,
#' 123, 139, 166, 124, 131, 217) # x-values of the nodes you want to highlight
#' fill_colors <- c("#CD6600", "#CD6600", "#CD6600",
#' "#CD6600", "#009933", "#009933",
#' "#009933", "#009933", "#9B30FF",
#' "#9B30FF", "#9B30FF") # Fill colors
#' alpha_values <- c(0.3, 0.3, 0.3, 0.3, 0.2, 0.3,
#' 0.3, 0.3, 0.3, 0.3, 0.3) # Transparency values
#' extend_values <- c(25, 24, 24, 25, 25, 25,
#' 24, 24, 25, 24, 24) # Values for the 'extend' parameter
#'
#' p2 <- highlight_by_node(
#' p2_plot,
#' nodes,
#' fill_colors,
#' alpha_values,
#' extend_values
#' )
highlight_by_node <- function(ggtree_object,
nodes,
fill_colors,
alpha_values,
extend_values) {
# Ensure that the lengths of `nodes`, `fill_colors`, `alpha_values`, and `extend_values` are consistent
if (!(length(nodes) == length(fill_colors) && length(nodes) == length(alpha_values) && length(nodes) == length(extend_values))) {
stop("Length of nodes, fill_colors, alpha_values, and extend_values must be the same.")
}
if (!requireNamespace("systemfonts", quietly = TRUE)) {
stop("ggplot2 is required to use the function. Please install it.", call. = FALSE)
}
# For each node, add the corresponding geom_hilight layer
layers <- lapply(seq_along(nodes), function(i) {
node <- nodes[i]
fill_color <- fill_colors[i]
alpha_val <- alpha_values[i]
extend_val <- extend_values[i]
# Create a geom_hilight layer
ggtree::geom_hilight(node = node, fill = fill_color, alpha = alpha_val, extend = extend_val)
})
# Add all layers to the `ggtree` object
for (layer in layers) {
ggtree_object <- ggtree_object + layer
}
return(ggtree_object)
}
================================================
FILE: R/HighlightGenes.R
================================================
#' Add Highlights for Genes on a Phylogenetic Tree
#'
#' This function adds highlights for specified genes on a phylogenetic tree object.
#'
#' @importFrom ggtree geom_point2 geom_hilight aes
#' @importFrom dplyr filter select pull
#' @importFrom rlang .data
#' @param ggtree_obj A ggtree object representing the phylogenetic tree.
#' @param genes_to_highlight A data frame containing gene names and corresponding colors to highlight.
#' @param hilight_extend Numeric value indicating the extension length for highlights.
#' @return A `ggtree` object with added highlights for specified genes.
#' @export
#'
#' @examples
#' plot_file <- system.file("extdata", "tree_plot.rds", package = "TransProR")
#' p2_plot <- readRDS(plot_file)
#'
#' selected_genes_deseq2_file <- system.file("extdata",
#' "selected_genes_deseq2.rds",
#' package = "TransProR")
#' selected_genes_deseq2 <- readRDS(selected_genes_deseq2_file)
#'
#' Diff_deseq2_file <- system.file("extdata", "Diff_deseq2.rds", package = "TransProR")
#' Diff_deseq2 <- readRDS(Diff_deseq2_file)
#'
#' result_deseq2 <- gene_color(selected_genes_deseq2, Diff_deseq2, "#0000EE", "#fc4746")
#'
#' add_gene_highlights_p3 <- highlight_genes(p2_plot, result_deseq2, hilight_extend = 26)
highlight_genes <- function(ggtree_obj, genes_to_highlight, hilight_extend = 18) {
# Ensure that the first argument is a `ggtree` object
if (!inherits(ggtree_obj, "ggtree")) {
stop("The first argument must be a ggtree object.")
}
# Ensure that the second argument is a data frame
if (!("data.frame" %in% class(genes_to_highlight))) {
stop("The second argument must be a data frame.")
}
if (!requireNamespace("systemfonts", quietly = TRUE)) {
stop("ggplot2 is required to use the function. Please install it.", call. = FALSE)
}
# Extract the data from the tree object and ensure it is a data frame
tree_data <- as.data.frame(ggtree_obj$data)
# Map gene names and colors to nodes in the tree and create geom_hilight and geom_point2 layers for each node
highlight_commands <- lapply(1:nrow(genes_to_highlight), function(i) {
gene <- genes_to_highlight$Symble[i]
color <- genes_to_highlight$color[i]
node <- dplyr::filter(tree_data, .data$label == gene) %>%
dplyr::select(node) %>%
dplyr::pull()
if (!is.na(node)) {
list(
ggtree::geom_hilight(node = node, fill = color, alpha = 0.3, extend = hilight_extend),
ggtree::geom_point2(ggtree::aes(subset = (.data$label == gene)), color = color, size = 2, alpha = 0.6)
)
} else {
warning(paste("Gene", gene, "not found in the ggtree object."))
NULL
}
})
# Remove NULL elements from `highlight_commands` since they may exist
highlight_commands <- Filter(Negate(is.null), highlight_commands)
# Apply the commands to the `ggtree object`
ggtree_obj <- ggtree_obj + do.call(c, highlight_commands)
return(ggtree_obj)
}
================================================
FILE: R/LimmaAnalyze.R
================================================
#' Differential Gene Expression Analysis using limma and voom
#'
#' This function performs differential gene expression analysis using the 'limma' package with voom normalization.
#' It reads tumor and normal expression data, merges them, filters low-expressed genes,
#' normalizes the data, performs limma analysis, and outputs the results along with information
#' on gene expression changes.
#'
#' @importFrom limma lmFit contrasts.fit eBayes topTable
#' @importFrom edgeR DGEList filterByExpr calcNormFactors
#' @importFrom dplyr mutate
#' @importFrom stats na.omit
#' @param tumor_file Path to the tumor data file (RDS format).
#' @param normal_file Path to the normal data file (RDS format).
#' @param output_file Path to save the output DEG data (RDS format).
#' @param logFC_threshold Threshold for log fold change for marking up/down-regulated genes.
#' @param p_value_threshold Threshold for p-value for filtering significant genes.
#' @return A data frame of differential expression results.
#' @references
#' limma:Linear Models for Microarray and RNA-Seq Data User’s Guide.
#' For more information, visit the page:
#' https://www.bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf
#' @export
#'
#' @examples
#' # Define file paths for tumor and normal data from the data folder
#' tumor_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
#' package = "TransProR")
#' normal_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
#' package = "TransProR")
#' output_file <- file.path(tempdir(), "DEG_limma_voom.rds")
#'
#' DEG_limma_voom <- limma_analyze(
#' tumor_file = tumor_file,
#' normal_file = normal_file,
#' output_file = output_file,
#' logFC_threshold = 2.5,
#' p_value_threshold = 0.01
#' )
#'
#' # View the top 5 rows of the result
#' head(DEG_limma_voom, 5)
limma_analyze <- function(tumor_file, normal_file, output_file, logFC_threshold = 2.5, p_value_threshold = 0.01) {
tumor <- readRDS(tumor_file)
normal <- readRDS(normal_file)
# Merge the datasets, ensuring both have genes as row names
all_count_exp <- merge(tumor, normal, by = "row.names")
all_count_exp <- tibble::column_to_rownames(all_count_exp, var = "Row.names")
# Define groups for tumor and normal samples
group <- c(rep('tumor', ncol(tumor)), rep('normal', ncol(normal)))
group <- factor(group, levels = c("normal", "tumor"))
group_table <- table(group)
message("Group Table:")
message(paste(names(group_table), group_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Create matrix
design <- model.matrix(~0 + factor(group))
colnames(design) <- levels(factor(group))
rownames(design) <- colnames(all_count_exp)
# Create DGEList object for gene expression data and group information
dge <- edgeR::DGEList(counts = all_count_exp, group = group)
# Filter lowly expressed genes
keep <- edgeR::filterByExpr(dge)
dge <- dge[keep, , keep.lib.sizes = FALSE]
# The first step (TMM) scales the raw counts to adjust for library size differences, while the second step (quantile normalization in voom) ensures that the overall distribution of gene expression values is consistent across samples.
# Normalize the data using the TMM method
dge <- edgeR::calcNormFactors(dge)
# Use voom method for normalization:Quantile Normalization
v <- limma::voom(dge, design, plot = FALSE, normalize = "quantile")
# Fit the linear model
fit <- limma::lmFit(v, design)
# Specify contrast
con <- paste(rev(levels(group)), collapse = "-")
# Create contrast matrix
cont.matrix <- limma::makeContrasts(contrasts = c(con), levels = design)
fit2 <- limma::contrasts.fit(fit, cont.matrix)
fit2 <- limma::eBayes(fit2)
# Get differential expression results
tempOutput <- limma::topTable(fit2, coef = con, n = Inf)
DEG_limma_voom <- stats::na.omit(tempOutput)
# Add 'change' column to mark up/down-regulated genes
k1 <- (DEG_limma_voom$P.Value < p_value_threshold) & (DEG_limma_voom$logFC < -logFC_threshold)
k2 <- (DEG_limma_voom$P.Value < p_value_threshold) & (DEG_limma_voom$logFC > logFC_threshold)
DEG_limma_voom <- dplyr::mutate(DEG_limma_voom, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
change_table <- table(DEG_limma_voom$change)
message("Change Table:")
message(paste(names(change_table), change_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Save results to the specified output file
#save(DEG_limma_voom, file = output_file)
saveRDS(DEG_limma_voom, file = output_file)
return(DEG_limma_voom)
}
================================================
FILE: R/LogTransform.R
================================================
#' Log transformation decision and application on data
#'
#' This function evaluates the need for a log transformation based on a set of criteria
#' and applies a log2 transformation if necessary.
#'
#' @author Dongyue Yu
#' @param data A numeric matrix or data frame.
#' @return The original data or the data transformed with log2.
#' @importFrom stats quantile
#' @export
#' @examples
#' file_path <- system.file("extdata",
#' "all_count_exp_test.csv",
#' package = "TransProR")
#' your_data <- read.csv(file_path,
#' row.names = 1) # Assuming first column is row names (e.g., gene names)
#'
#' TransformedData <- log_transform(data = your_data)
#'
log_transform <- function(data) {
# Calculate quantiles
qx <- as.numeric(quantile(data, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=TRUE))
# Define conditions for log transformation
LogC <- (qx[5] > 100) ||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
# Apply log transformation based on conditions
if (LogC) {
# Apply +1 to all values before log2 transformation
result <- log2(data + 1)
message("log2 transform finished")
} else {
result <- data
message("log2 transform not needed")
}
return(result)
}
================================================
FILE: R/MergeDensityFoldchange.R
================================================
#' Create high-density region plot with optional points, density rugs, and contours
#'
#' This function creates a high-density region plot using hdr methods to
#' add density rug and filled contours. It also adds a regression line
#' and Pearson correlation label. Points can be added to the plot optionally.
#'
#' @param data Data frame containing variables for plotting.
#' @param x_var Name of the x-axis variable as a string.
#' @param y_var Name of the y-axis variable as a string.
#' @param group_var Name of the grouping variable for color mapping as a string.
#' @param palette Color palette for the plot as a character vector.
#' @param show_points Logical, if TRUE adds points to the plot.
#' @param point_size Size of the points, relevant if show_points is TRUE.
#' @param point_alpha Transparency level of the points, relevant if show_points is TRUE.
#' @param x_lim Numeric vector of length 2, giving the x-axis limits.
#' @param y_lim Numeric vector of length 2, giving the y-axis limits.
#' @param cor_method Method to calculate correlation ("pearson" or "spearman").
#' @param line_size Size of the smoothing line.
#' @param cor_label_pos Vector of length 2 indicating the position of the correlation label (x and y).
#' @return A ggplot object representing the high-density region plot.
#' @importFrom ggplot2 ggplot aes_string geom_point geom_smooth scale_fill_manual scale_color_manual scale_x_continuous scale_y_continuous theme element_rect margin
#' @importFrom hrbrthemes theme_ipsum
#' @importFrom grid unit
#' @importFrom ggdensity geom_hdr geom_hdr_rug
#' @importFrom ggpubr stat_cor
#' @examples
#' combined_df_file <- system.file("extdata", "combined_df.rds", package = "TransProR")
#' combined_df <- readRDS(combined_df_file)
#' pal1 = c("#3949ab","#1e88e5","#039be5","#00897b","#43a047","#7cb342")
#'
#' all_density_foldchange_name1 <- merge_density_foldchange(
#' data = combined_df,
#' x_var = "log2FoldChange_1",
#' y_var = "log2FoldChange_2",
#' group_var = "name",
#' palette = pal1,
#' show_points = FALSE,
#' point_size = 2.5,
#' point_alpha = 0.1,
#' x_lim = c(0, 20),
#' y_lim = c(0, 20),
#' cor_method = "pearson",
#' line_size = 1.6,
#' cor_label_pos = c("left", "top")
#' )
#'
#' @export
merge_density_foldchange <- function(data, x_var, y_var, group_var,
palette = c("#3949ab","#1e88e5","#039be5","#00897b","#43a047","#7cb342"),
show_points = FALSE, point_size = 2.5, point_alpha = 0.2,
x_lim = c(0, 20), y_lim = c(0, 20),
cor_method = "pearson", line_size = 1.6,
cor_label_pos = c("left", 0.97)) {
# Begin constructing the ggplot
plot <- ggplot2::ggplot(data, ggplot2::aes_string(x = x_var, y = y_var, fill = group_var))
# Optionally add points
if (show_points) {
plot <- plot + ggplot2::geom_point(ggplot2::aes_string(color = group_var), shape = 21,
size = point_size, alpha = point_alpha)
}
# Add density rug and contours
plot <- plot + ggdensity::geom_hdr_rug() + ggdensity::geom_hdr()
# Add regression line and correlation label
plot <- plot +
ggplot2::geom_smooth(ggplot2::aes_string(x = x_var, y = y_var, color = group_var),
method = 'lm', level = 0.95, size = line_size) +
ggpubr::stat_cor(ggplot2::aes_string(color = group_var), method = cor_method,
label.x.npc = cor_label_pos[1], label.y.npc = cor_label_pos[2])
# Customize scales and theme
plot <- plot +
ggplot2::scale_fill_manual(values = palette) +
ggplot2::scale_color_manual(values = palette) +
ggplot2::scale_x_continuous(limits = x_lim, expand = c(0, 0)) +
ggplot2::scale_y_continuous(limits = y_lim, expand = c(0, 0)) +
hrbrthemes::theme_ipsum() +
ggplot2::theme(plot.margin = ggplot2::margin(10, 10, 10, 10),
plot.background = ggplot2::element_rect(fill = "white", color = "white"),
panel.spacing = grid::unit(2, "mm"))
# Return the ggplot object
return(plot)
}
================================================
FILE: R/MergeGtexTcga.R
================================================
#' Merge gene expression data from GTEx and TCGA datasets
#'
#' This function merges gene expression data obtained from the GTEx (Genotype-Tissue Expression) and TCGA (The Cancer Genome Atlas) datasets.
#' It is assumed that both datasets are in '.rds' format and have genes as row names. The merged dataset is saved as an RDS file at the specified output path.
#'
#' @param gtex_data_path A string that specifies the file path to the GTEx data saved in RDS format.
#' @param tcga_exp_path A string that specifies the file path to the TCGA expression data saved in RDS format.
#' This should be a data.frame with rows as genes and columns as samples.
#' @param output_path A string that specifies the path where the merged dataset should be saved.
#' The file is saved in '.rds' format. The default path is "./merged_gtex_tcga_data.rds".
#'
#' @details It is assumed that both datasets are in '.rds' format and have genes as row names.
#'
#' @return A data frame where rows represent genes and columns represent samples.
#' The data frame contains expression values from both GTEx and TCGA datasets.
#' It saves the merged dataset to the path specified by 'output_path'.
#'
#' @examples
#' tumor_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
#' package = "TransProR")
#' Normal_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
#' package = "TransProR")
#' ouput_file <- file.path(tempdir(), "all_data.rds")
#'
#' all_data <- merge_gtex_tcga(gtex_data_path = tumor_file,
#' tcga_exp_path = Normal_file,
#' output_path = ouput_file)
#'
#' @note CRITICAL: The 'output_path' parameter must end with '.rds' to be properly recognized by the function. It is also highly recommended
#' that the path includes specific identifiers related to the target samples. Please structure the 'output_path' following this pattern: './your_directory/merged.your_sample_type.gtex.tcga.data.rds'.
#'
#' @importFrom tibble column_to_rownames
#' @export
merge_gtex_tcga <- function(gtex_data_path,
tcga_exp_path,
output_path = "./merged_gtex_tcga_data.rds") {
# Load the GTEx data
gtex_data <- readRDS(gtex_data_path)
message("Number of GTEx samples:", ncol(gtex_data), "\n")
# Load the TCGA data
tcga.exp <- readRDS(tcga_exp_path)
message("Number of TCGA samples:", ncol(tcga.exp), "\n")
# Merge the datasets, ensuring both have genes as row names
all_data <- merge(gtex_data, tcga.exp, by = "row.names")
all_data <- tibble::column_to_rownames(all_data, var = "Row.names") # Set the row names
message("Number of samples after merging:", ncol(all_data), "\n")
# Save the merged dataset
saveRDS(all_data, file = output_path)
return(all_data)
}
================================================
FILE: R/MergeIDPosition.R
================================================
#' Merge Data Frames by Common Row Names with Additional Columns
#'
#' This function merges a list of data frames based on common row names. It adds an 'id' column to track the row order and a 'point_position' column calculated based on the maximum 'Count' value across all data frames. It filters data frames to include only common rows, sorts rows by the length of the 'Description' in descending order, and then merges them by rows.
#'
#' @param df_list A list of data frames, each with a 'Description' and 'Count' column and set row names.
#' @return A single data frame merged from the list, with additional 'id' and 'point_position' columns.
#' @importFrom dplyr arrange
#' @examples
#' df1 <- data.frame(Description = c("DataA", "DataB"), Count = c(10, 20), row.names = c("R1", "R2"))
#' df2 <- data.frame(Description = c("DataC", "DataD"), Count = c(30, 40), row.names = c("R1", "R3"))
#' df_list <- list(df1, df2)
#' combined_df_test <- merge_id_position(df_list)
#'
#' @export
merge_id_position <- function(df_list) {
# Find common row names across all data frames
common_row_names <- Reduce(intersect, lapply(df_list, row.names))
# Get the maximum value of the 'Count' column across all data frames
max_count <- max(sapply(df_list, function(df) max(as.numeric(df$Count), na.rm = TRUE)))
# Calculate the smallest multiple of 10 greater than the maximum count
ceiling_max_count <- ceiling(max_count / 10) * 10
# Initialize a list to store processed data frames
processed_dfs <- list()
number_of_dfs <- length(df_list)
# Process each data frame
for (i in seq_along(df_list)) {
# Filter for common rows
filtered_df <- df_list[[i]][common_row_names, , drop = FALSE]
# Sort 'Description' column by character length in descending order
filtered_df <- filtered_df %>% dplyr::arrange(desc(nchar(.data$Description)))
# Add 'id' column
filtered_df$id <- seq_len(nrow(filtered_df))
# Calculate 'point_position' value
point_position_value <- ceiling_max_count * i / number_of_dfs
# Add 'point_position' column
filtered_df$point_position <- point_position_value
# Append processed data frame to the list
processed_dfs[[i]] <- filtered_df
}
# Check if any data frames are empty after filtering
if (any(sapply(processed_dfs, nrow) == 0)) {
stop("One or more dataframes have no rows after filtering for common row names.")
}
# Merge data frames by rows
combined_df <- do.call(rbind, processed_dfs)
# Remove original row names and set new row names as NULL
rownames(combined_df) <- NULL
return(combined_df)
}
================================================
FILE: R/MergeMethodColor.R
================================================
#' Merge Data Frames with Specific Method and Color Columns
#'
#' This function takes a list of data frames, a method name, and a list of colors.
#' It adds a 'method' column and a 'test_color' column to each data frame, then merges all data frames by rows.
#' It ensures that the color list length matches the list of data frames.
#'
#' @param df_list A list of data frames, each containing at least 'Description' and 'Count' columns.
#' @param method_name A string representing the method name to be added to each data frame.
#' @param color_list A list of colors corresponding to each data frame for the 'test_color' column.
#' @return A single data frame merged from the list, with each originally provided data frame now having a 'method' and a 'test_color' column.
#' @importFrom dplyr bind_rows
#' @examples
#' df1 <- data.frame(Description = c("A", "B"), Count = c(10, 20))
#' df2 <- data.frame(Description = c("C", "D"), Count = c(30, 40))
#' df_list <- list(df1, df2)
#' method_name <- "Method1"
#' color_list <- c("Red", "Blue")
#' combined_df_test <- merge_method_color(df_list, method_name, color_list)
#'
#' @export
merge_method_color <- function(df_list, method_name, color_list) {
# Validate the length of color list matches the length of data frame list
if (length(color_list) != length(df_list)) {
stop("The length of the color list must match the length of the data frame list")
}
# Initialize a list to store processed data frames
processed_dfs <- list()
# Iterate over all data frames
for (i in seq_along(df_list)) {
# Extract 'Description' and 'Count' columns
temp_df <- df_list[[i]][, c("Description", "Count"), drop = FALSE]
# Add 'method' column
temp_df$method <- method_name
# Add 'test_color' column using corresponding color
temp_df$test_color <- color_list[i]
# Append the processed data frame to the list
processed_dfs[[i]] <- temp_df
}
# Combine all data frames by rows using bind_rows from dplyr
combined_df <- dplyr::bind_rows(processed_dfs)
return(combined_df)
}
================================================
FILE: R/NewGgraph.R
================================================
#' Generate a graphical representation of pathway gene maps
#'
#' This function merges multiple gene-pathway related dataframes, processes them
#' for graph creation, and visualizes the relationships in a dendrogram layout using
#' the provided node and edge gathering functions from the 'ggraph' package.
#'
#' @param BP_dataframe Dataframe for Biological Process.
#' @param BP_ids IDs for Biological Process.
#' @param KEGG_dataframe Dataframe for KEGG pathways.
#' @param KEGG_ids IDs for KEGG pathways.
#' @param MF_dataframe Dataframe for Molecular Function.
#' @param MF_ids IDs for Molecular Function.
#' @param REACTOME_dataframe Dataframe for REACTOME pathways.
#' @param REACTOME_ids IDs for REACTOME pathways.
#' @param CC_dataframe Dataframe for Cellular Component.
#' @param CC_ids IDs for Cellular Component.
#' @param DO_dataframe Dataframe for Disease Ontology.
#' @param DO_ids IDs for Disease Ontology.
#' @importFrom tidygraph tbl_graph
#' @importFrom ggraph ggraph geom_edge_diagonal geom_node_point geom_node_text scale_edge_colour_brewer node_angle
#' @importFrom ggplot2 theme element_rect scale_size scale_color_brewer coord_cartesian
#' @return A 'ggraph' object representing the pathway gene map visualization.
#' @export
#'
new_ggraph <- function(BP_dataframe, BP_ids, KEGG_dataframe, KEGG_ids,
MF_dataframe, MF_ids, REACTOME_dataframe, REACTOME_ids,
CC_dataframe, CC_ids, DO_dataframe, DO_ids) {
new_dataframe <- gene_map_pathway(BP_dataframe, BP_ids, KEGG_dataframe, KEGG_ids,
MF_dataframe, MF_ids, REACTOME_dataframe, REACTOME_ids,
CC_dataframe, CC_ids, DO_dataframe, DO_ids)
# Prepare the data for graph creation using 'ggraph'
index_ggraph <- c("type", "pathway", "gene") # columns other than the lowest level
nodes_ggraph <- gather_graph_node(new_dataframe, index = index_ggraph, root = "combination")
edges_ggraph <- gather_graph_edge(new_dataframe, index = index_ggraph, root = "combination")
# Create and plot the graph using 'tidygraph' and 'ggraph'
graph_ggraph <- tidygraph::tbl_graph(nodes = nodes_ggraph, edges = edges_ggraph)
plot <- ggraph::ggraph(graph_ggraph, layout = 'dendrogram', circular = TRUE) +
ggraph::geom_edge_diagonal(aes(color = .data$node1.node.branch, filter = .data$node1.node.level != "combination", alpha = .data$node1.node.level), edge_width = 1) +
ggraph::geom_node_point(aes(size = .data$node.size, color = .data$node.branch, filter = .data$node.level != "combination"), alpha = 0.45) +
ggplot2::scale_size(range = c(15, 90)) +
ggplot2::theme(legend.position = "none") +
ggraph::scale_edge_colour_brewer(palette= "Dark2") +
ggplot2::scale_color_brewer(palette = "Dark2") +
ggraph::geom_node_text(aes(x = 1.058 * .data$x, y = 1.058 * .data$y, label = .data$node.short_name, angle = -((-ggraph::node_angle(.data$x, .data$y) + 90) %% 180) + 60, filter = .data$leaf, color = .data$node.branch), size = 4, hjust = 'outward') +
ggraph::geom_node_text(aes(label = .data$node.short_name, filter = !.data$leaf & (.data$node.level == "type"), color = .data$node.branch), fontface = "bold", size = 8, family = "sans") +
ggraph::geom_node_text(aes(label = .data$node.short_name, filter = !.data$leaf & (.data$node.level == "pathway"), color = .data$node.branch, angle = -((-ggraph::node_angle(.data$x, .data$y) + 90) %% 180) + 36), fontface = "bold", size = 4.5, family = "sans") +
ggplot2::theme(panel.background = ggplot2::element_rect(fill = NA)) +
ggplot2::coord_cartesian(xlim = c(-1.3, 1.3), ylim = c(-1.3, 1.3))
return(plot)
}
================================================
FILE: R/PathwayCount.R
================================================
#' Count Genes Present in Pathways Above a Threshold
#'
#' This function filters pathways that meet a count threshold and then counts the presence of specified genes in those pathways.
#'
#' @importFrom dplyr filter
#' @importFrom rlang .data
#' @param GO A character vector of gene symbols.
#' @param count_threshold An integer specifying the count threshold for selecting pathways.
#' @param enrich_data A data frame containing pathway enrichment analysis results.
#' @return A data frame with columns "Symble" (gene symbol), "Description" (pathway description), and "Exists" (1 if gene is present, 0 otherwise).
#' @export
#'
#' @examples
#' # Simulated gene list
#' GO <- c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5")
#' # Simulated enrichment analysis data
#' enrich_data <- data.frame(
#' ID = c("GO:0001", "GO:0002", "GO:0003"),
#' Description = c("Pathway A", "Pathway B", "Pathway C"),
#' Count = c(10, 4, 6),
#' geneID = c("Gene1/Gene2/Gene3", "Gene4/Gene5", "Gene2/Gene6/Gene7")
#' )
#'
#' # Example usage
#' count_threshold <- 5
#' result_df <- pathway_count(GO, count_threshold, enrich_data)
#'
pathway_count <- function(GO, count_threshold, enrich_data) {
# Filter pathways meeting the count threshold
selected_pathways <- enrich_data %>%
dplyr::filter(.data$Count > count_threshold)
final_df <- data.frame(Symble = character(), Description = character(), Exists = integer())
# Iterate through each selected pathway
for (i in 1:nrow(selected_pathways)) {
pathway_info <- selected_pathways[i, ]
genes_in_pathway <- unlist(strsplit(as.character(pathway_info$geneID), "/"))
# Create a record for each gene in the current pathway
for (gene in GO) {
final_df <- rbind(final_df, data.frame(
Symble = gene,
Description = pathway_info$Description,
Exists = as.integer(gene %in% genes_in_pathway)
))
}
}
return(final_df)
}
================================================
FILE: R/PathwayDescription.R
================================================
#' Describe Genes Present in Selected Pathways
#'
#' This function identifies genes present in selected pathways based on gene enrichment analysis results.
#'
#' @importFrom dplyr filter
#' @importFrom rlang .data
#' @param GO A character vector of gene symbols.
#' @param selected_pathways_names A character vector specifying the names of selected pathways.
#' @param enrich_data A data frame containing pathway enrichment analysis results.
#' @return A data frame with columns "Symble" (gene symbol), "Description" (pathway description), and "Exists" (1 if gene is present, 0 otherwise).
#' @export
#'
#' @examples
#' GO <- c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5")
#' # Simulated enrichment analysis data
#' enrich_data <- data.frame(
#' ID = c("Pathway1", "Pathway2", "Pathway3", "Pathway4"),
#' Description = c("Apoptosis", "Cell Cycle", "Signal Transduction", "Metabolism"),
#' geneID = c("Gene1/Gene3", "Gene2/Gene4", "Gene1/Gene2/Gene3", "Gene5"),
#' Count = c(2, 2, 3, 1),
#' stringsAsFactors = FALSE
#' )
#'
#' # Example usage
#' result <- pathway_description(GO,
#' selected_pathways_names="Apoptosis",
#' enrich_data)
#'
pathway_description <- function(GO, selected_pathways_names, enrich_data) {
# Filter selected pathways
selected_pathways <- dplyr::filter(enrich_data, .data$Description %in% selected_pathways_names)
final_df <- data.frame(Symble = character(), Description = character(), Exists = integer())
# Iterate through each selected pathway
for (i in 1:nrow(selected_pathways)) {
pathway_info <- selected_pathways[i, ]
genes_in_pathway <- unlist(strsplit(as.character(pathway_info$geneID), "/"))
# Create a record for each gene in the current pathway
for (gene in GO) {
final_df <- rbind(final_df, data.frame(
Symble = gene,
Description = pathway_info$Description,
Exists = as.integer(gene %in% genes_in_pathway)
))
}
}
return(final_df)
}
================================================
FILE: R/PrepDeseq2.R
================================================
#' Prepare DESeq2 data for plotting
#'
#' This function reads a DESeq2 DEG data frame from an RDS file, filters it,
#' adjusts the log2FoldChange to absolute values, adds a pseudo-count to pvalues,
#' and transforms pvalues for plotting. The final data frame is returned and
#' optionally saved to a new RDS file.
#'
#' @param input_path Path to the RDS file containing the DESeq2 DEG data frame.
#' @param output_name Name for the processed data frame, also used as the RDS file name.
#' @return A data frame with processed DESeq2 DEG data.
#' @export
#' @examples
#' deseq2_file <- system.file("extdata",
#' "DEG_deseq2_test.rds",
#' package = "TransProR")
#' deseq2 <- prep_deseq2(deseq2_file)
#'
prep_deseq2 <- function(input_path, output_name = NULL) {
# Read the DESeq2 DEG data frame from an RDS file
DEG_deseq2 <- readRDS(input_path)
# Filter DEG data using the deg_filter function from the same package
DESeq2 <- deg_filter(DEG_deseq2)
DEG_deseq2 <- DEG_deseq2[rownames(DEG_deseq2) %in% DESeq2, ]
# Extract the Gene column as a regular column
DEG_deseq2$Gene <- rownames(DEG_deseq2)
# Select columns of interest
DEG_deseq2 <- DEG_deseq2[, c('log2FoldChange', 'pvalue', "change", 'Gene')]
# Adjust log2FoldChange values to absolute values
DEG_deseq2$log2FoldChange <- abs(DEG_deseq2$log2FoldChange)
# Add a small pseudo-count to pvalue to avoid log of zero
DEG_deseq2$pvalue <- DEG_deseq2$pvalue + .Machine$double.eps
# Transform p-value for plotting
DEG_deseq2$pvalue <- -log10(DEG_deseq2$pvalue)
# Rename columns
names(DEG_deseq2) <- c('logFC', 'Pvalue', "change", 'Gene')
# Optionally save the processed data frame as an RDS file
if (!is.null(output_name)) {
saveRDS(DEG_deseq2, paste0(output_name, ".Rdata"))
}
# Return the processed data frame
return(DEG_deseq2)
}
================================================
FILE: R/PrepEdgeR.R
================================================
#' Prepare edgeR DEG data for plotting
#'
#' This function reads an edgeR DEG data frame from an RDS file, filters it using
#' \code{\link{deg_filter}} function, adjusts the logFC to absolute values, adds a pseudo-count to PValue,
#' and transforms PValue for plotting. The final data frame is returned and
#' optionally saved to a new RDS file.
#'
#' @param input_path Path to the RDS file containing the edgeR DEG data frame.
#' @param output_name Name for the processed data frame, also used as the RDS file name.
#' @return A data frame with processed edgeR DEG data.
#' @export
#' @examples
#' edgeR_file <- system.file("extdata",
#' "DEG_edgeR_test.rds",
#' package = "TransProR")
#' edgeR <- prep_edgeR(edgeR_file)
#'
prep_edgeR <- function(input_path, output_name = NULL) {
# Read the edgeR DEG data frame from an RDS file
DEG_edgeR <- readRDS(input_path)
# Filter DEG data using the deg_filter function from the same package
edgeR <- deg_filter(DEG_edgeR)
DEG_edgeR <- DEG_edgeR[rownames(DEG_edgeR) %in% edgeR, ]
# Select columns of interest and adjust logFC values to absolute values
DEG_edgeR <- DEG_edgeR[, c('logFC', "PValue", "change")]
DEG_edgeR$logFC <- abs(DEG_edgeR$logFC)
# Add a small pseudo-count to PValue to avoid log of zero and transform PValue for plotting
DEG_edgeR$PValue <- DEG_edgeR$PValue + .Machine$double.eps
DEG_edgeR$PValue <- -log10(DEG_edgeR$PValue)
# Extract the Gene column from row names
DEG_edgeR$Gene <- rownames(DEG_edgeR)
# Rename columns
names(DEG_edgeR) <- c('logFC', 'Pvalue', "change", 'Gene')
# Optionally save the processed data frame as an RDS file
if (!is.null(output_name)) {
saveRDS(DEG_edgeR, paste0(output_name, ".Rdata"))
}
# Return the processed data frame
return(DEG_edgeR)
}
================================================
FILE: R/PrepLimma.R
================================================
#' Prepare limma-voom DEG data for plotting
#'
#' This function reads a limma-voom DEG data frame from an RDS file, filters it using
#' \code{\link{deg_filter}} function, adjusts the logFC to absolute values, adds a pseudo-count to P.Value,
#' and transforms P.Value for plotting. The final data frame is returned and
#' optionally saved to a new RDS file.
#'
#' @param input_path Path to the RDS file containing the limma-voom DEG data frame.
#' @param output_name Name for the processed data frame, also used as the RDS file name.
#' @return A data frame with processed limma-voom DEG data.
#' @export
#' @examples
#' limma_file <- system.file("extdata",
#' "DEG_limma_voom_test.rds",
#' package = "TransProR")
#' limma <- prep_limma(limma_file)
#'
prep_limma <- function(input_path, output_name = NULL) {
# Read the limma-voom DEG data frame from an RDS file
limma <- readRDS(input_path)
# Filter DEG data using the deg_filter function from the same package
limma_filter <- deg_filter(limma)
limma <- limma[rownames(limma) %in% limma_filter, ]
# Select columns of interest and adjust logFC values to absolute values
limma <- limma[, c('logFC', "P.Value", "change")]
limma$logFC <- abs(limma$logFC)
# Add a small pseudo-count to P.Value to avoid log of zero and transform P.Value for plotting
limma$P.Value <- limma$P.Value + .Machine$double.eps
limma$P.Value <- -log10(limma$P.Value)
# Extract the Gene column from row names
limma$Gene <- rownames(limma)
# Rename columns
names(limma) <- c('logFC', 'Pvalue', "change", 'Gene')
# Optionally save the processed data frame as an RDS file
if (!is.null(output_name)) {
saveRDS(limma, paste0(output_name, ".Rdata"))
}
# Return the processed data frame
return(limma)
}
================================================
FILE: R/PrepWilcoxon.R
================================================
#' Prepare Wilcoxon DEG data for plotting
#'
#' This function reads a Wilcoxon DEG data frame from an RDS file, filters it using
#' \code{\link{deg_filter}} function, adjusts the log2foldChange to absolute values, adds a pseudo-count to pValues,
#' and transforms pValues for plotting. The final data frame is returned and
#' optionally saved to a new RDS file.
#'
#' @param input_path Path to the RDS file containing the Wilcoxon DEG data frame.
#' @param output_name Optional; name for the processed data frame, also used as the RDS file name.
#' If not provided, the data frame will not be saved to file.
#' @return A data frame with processed Wilcoxon DEG data.
#' @export
#' @examples
#' wilcoxon_file <- system.file("extdata",
#' "Wilcoxon_rank_sum_testoutRst_test.rds",
#' package = "TransProR")
#' Wilcoxon <- prep_wilcoxon(wilcoxon_file)
#'
prep_wilcoxon <- function(input_path, output_name = NULL) {
# Read the Wilcoxon DEG data frame from an RDS file
Wilcoxon <- readRDS(input_path)
# Filter DEG data using the deg_filter function from the same package
Wilcoxon_filter <- deg_filter(Wilcoxon)
Wilcoxon <- Wilcoxon[rownames(Wilcoxon) %in% Wilcoxon_filter, ]
# Select columns of interest and adjust log2foldChange values to absolute values
Wilcoxon <- Wilcoxon[, c('log2foldChange', "pValues", "change")]
Wilcoxon$log2foldChange <- abs(Wilcoxon$log2foldChange)
# Add a small pseudo-count to pValues to avoid log of zero and transform pValues for plotting
Wilcoxon$pValues <- Wilcoxon$pValues + .Machine$double.eps
Wilcoxon$pValues <- -log10(Wilcoxon$pValues)
# Extract the Gene column from row names
Wilcoxon$Gene <- rownames(Wilcoxon)
# Rename columns
names(Wilcoxon) <- c('logFC', 'Pvalue', "change", 'Gene')
# Optionally save the processed data frame as an RDS file
if (!is.null(output_name) && nzchar(output_name)) {
saveRDS(Wilcoxon, paste0(output_name, ".Rdata"))
}
# Return the processed data frame
return(Wilcoxon)
}
================================================
FILE: R/ProcessHeatdata.R
================================================
#' Process Heatmap Data with Various Selection Options
#'
#' This function processes heatmap data (`heatdata`) based on a given selection option.
#' It allows customization of column names, selection of specific columns per group,
#' or averaging columns based on a common prefix.
#'
#' @param heatdata A data frame containing the heatmap data.
#' @param selection An integer specifying the processing method:
#' - 1: Use custom names for columns.
#' - 2: Select a given number of columns per group based on a prefix.
#' - 3: Calculate the average of columns per group based on a prefix.
#' @param custom_names A character vector of custom names for columns (used when `selection = 1`).
#' The length of this vector must match the number of columns in `heatdata`.
#' @param num_names_per_group An integer specifying the number of columns to select per group (used when `selection = 2`).
#' @param prefix_length An integer specifying the length of the prefix for grouping columns (used when `selection = 2` or `selection = 3`).
#' Default is 4.
#' @return A processed data frame based on the specified selection option.
#' @export
#'
#' @examples
#' # Example heatmap data frame
#' heatdata <- data.frame(
#' groupA_1 = c(1, 2, 3),
#' groupA_2 = c(4, 5, 6),
#' groupB_1 = c(7, 8, 9),
#' groupB_2 = c(10, 11, 12)
#' )
#'
#' # Selection 1: Use custom names for columns
#' custom_names <- c("Sample1", "Sample2", "Sample3", "Sample4")
#' processed_data1 <- process_heatdata(heatdata, selection = 1, custom_names = custom_names)
#'
#' # Selection 2: Select a given number of columns per group based on a prefix
#' processed_data2 <- process_heatdata(heatdata, selection = 2, num_names_per_group = 1)
#'
#' # Selection 3: Calculate the average of columns per group based on a prefix
#' processed_data3 <- process_heatdata(heatdata, selection = 3, prefix_length = 6)
process_heatdata <- function(heatdata,
selection = 1,
custom_names = NULL,
num_names_per_group = NULL,
prefix_length = 4) {
if (selection == 1) {
# Option 1: Use custom names for columns
if (length(custom_names) != ncol(heatdata)) {
stop("Length of custom_names must match number of columns in heatdata")
}
names(heatdata) <- custom_names
} else if (selection == 2) {
# Option 2: Select a given number of columns per group based on a prefix
group_names <- unique(substr(names(heatdata), 1, prefix_length)) # Get unique group names based on prefix
selected_columns <- integer(0)
new_names <- character(0)
for (group in group_names) {
group_cols <- grep(group, names(heatdata)) # Find columns for each group
num_selected <- min(length(group_cols), num_names_per_group)
selected_columns <- c(selected_columns, sample(group_cols, num_selected))
# Generate new names for selected columns
new_group_names <- paste(group, seq_len(num_selected), sep = "_")
new_names <- c(new_names, new_group_names)
}
heatdata <- heatdata[, selected_columns, drop = FALSE] # Keep only selected columns
names(heatdata) <- new_names
} else if (selection == 3) {
# Option 3: Calculate the average of columns per group based on a prefix
group_names <- unique(substr(names(heatdata), 1, prefix_length))
# Create a list to collect mean values for each group
mean_list <- list()
# Calculate mean values for each group and store them in the list
for (group in group_names) {
group_cols <- grep(group, names(heatdata), value = TRUE)
mean_list[[paste(group, "mean", sep = "_")]] <- rowMeans(heatdata[, group_cols], na.rm = TRUE)
}
# Convert list to data frame and set row names
heatdata <- as.data.frame(do.call(cbind, mean_list))
rownames(heatdata) <- rownames(heatdata)
} else {
stop("Invalid selection parameter")
}
return(heatdata)
}
================================================
FILE: R/SeekGtexOrgan.R
================================================
#' Load and Process GTEX Phenotype Data to Retrieve Primary Site Counts
#'
#' This function reads the GTEX phenotype data from a specified path, renames its columns for better readability,
#' and then returns a table of primary site counts.
#'
#' @param path The path to the GTEX phenotype data file. Default is "./download_data/GTEX_phenotype".
#'
#' @return A table representing the count of samples per primary site.
#' @importFrom utils read.table
#' @examples
#' # Get the file path to the example data in the package
#' path <- system.file("extdata", "GTEX_phenotype_test", package = "TransProR")
#' # Call the `seek_gtex_organ` function with the path and print the result
#' SeekGtexOrgan <- seek_gtex_organ(path = path)
#'
#' @export
seek_gtex_organ <- function(path = "./download_data/GTEX_phenotype") {
# Read GTEX phenotype data
# gtex.phe <- data.table::fread(path, header = TRUE, sep = '\t', data.table = FALSE)
gtex.phe <- utils::read.table(path,
header = TRUE,
sep = '\t',
stringsAsFactors = FALSE,
check.names = FALSE)
rownames(gtex.phe) <- gtex.phe$Sample
# Rename columns
colnames(gtex.phe) <- c("Sample", "body_site_detail (SMTSD)", "primary_site", "gender", "patient", "cohort")
# Create table of primary sites
primary_site_counts <- table(gtex.phe$primary_site)
return(primary_site_counts)
}
================================================
FILE: R/WilcoxonAnalyze.R
================================================
#' Differential Gene Expression Analysis Using Wilcoxon Rank-Sum Test
#'
#' This function performs differential gene expression analysis using Wilcoxon rank-sum tests.
#' It reads tumor and normal expression data, performs TMM normalization using 'edgeR', and uses Wilcoxon rank-sum tests to identify differentially expressed genes.
#'
#' @importFrom tibble column_to_rownames
#' @importFrom edgeR DGEList filterByExpr calcNormFactors cpm
#' @importFrom dplyr mutate
#' @importFrom stats wilcox.test p.adjust
#' @param tumor_file Path to the tumor data file (RDS format).
#' @param normal_file Path to the normal data file (RDS format).
#' @param output_file Path to save the output DEG data (RDS format).
#' @param logFC_threshold Threshold for log fold change for marking up/down-regulated genes.
#' @param fdr_threshold Threshold for FDR for filtering significant genes.
#' @return A data frame of differential expression results.
#' @references
#' Li, Y., Ge, X., Peng, F., Li, W., & Li, J. J. (2022). Exaggerated False Positives by Popular
#' Differential Expression Methods When Analyzing Human Population Samples. Genome Biology, 23(1), 79.
#' DOI: https://doi.org/10.1186/s13059-022-02648-4.
#' @export
#'
#' @examples
#' # Define file paths for tumor and normal data from the data folder
#' tumor_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds",
#' package = "TransProR")
#' normal_file <- system.file("extdata",
#' "removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds",
#' package = "TransProR")
#' output_file <- file.path(tempdir(), "Wilcoxon_rank_sum_testoutRst.rds")
#'
#' # Run the Wilcoxon rank sum test
#' outRst <- Wilcoxon_analyze(
#' tumor_file = tumor_file,
#' normal_file = normal_file,
#' output_file = output_file,
#' logFC_threshold = 2.5,
#' fdr_threshold = 0.01
#' )
#'
#' # View the top 5 rows of the result
#' head(outRst, 5)
Wilcoxon_analyze <- function(tumor_file,
normal_file,
output_file,
logFC_threshold = 2.5,
fdr_threshold = 0.05) {
# Read data
tumor <- readRDS(tumor_file)
normal <- readRDS(normal_file)
# Merge the datasets and set row names
all_count_exp <- merge(tumor, normal, by = "row.names")
all_count_exp <- tibble::column_to_rownames(all_count_exp, var = "Row.names")
# Define groups
group <- c(rep('tumor', ncol(tumor)), rep('normal', ncol(normal)))
group <- factor(group, levels = c("normal", "tumor"))
group_table <- table(group)
message("Group Table:")
message(paste(names(group_table), group_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# EdgeR TMM normalization
y <- edgeR::DGEList(counts = all_count_exp, group = group)
keep <- edgeR::filterByExpr(y)
y <- y[keep, keep.lib.sizes = FALSE]
# Perform TMM normalization and transfer to CPM (Counts Per Million)
y <- edgeR::calcNormFactors(y, method = "TMM")
count_norm <- edgeR::cpm(y)
count_norm <- as.data.frame(count_norm)
# Wilcoxon rank-sum test for each gene
pvalues <- sapply(1:nrow(count_norm), function(i) {
data <- cbind.data.frame(gene = as.numeric(t(count_norm[i, ])), group)
stats::wilcox.test(gene ~ group, data)$p.value
})
fdr <- stats::p.adjust(pvalues, method = "fdr")
# Calculate fold-change for each gene
conditionsLevel <- levels(group)
dataCon1 <- count_norm[, which(group == conditionsLevel[1])]
dataCon2 <- count_norm[, which(group == conditionsLevel[2])]
# The addition of a pseudo-count allows for robust statistical analysis of genes with low expression levels, while mitigating computational issues caused by zero expression values.
# It prevents the occurrence of negative infinity (-Inf) when the numerator is zero, and positive infinity (Inf) when the denominator is zero.
foldChanges <- log2((rowMeans(dataCon2) + 0.005) / (rowMeans(dataCon1) + 0.005))
#foldChanges <- log2(rowMeans(dataCon2) / rowMeans(dataCon1))
# Output results based on FDR threshold
outRst <- data.frame(log2foldChange = foldChanges, pValues = pvalues, FDR = fdr)
rownames(outRst) <- rownames(count_norm)
outRst <- na.omit(outRst)
# Mark up/down-regulated genes
k1 <- (outRst$FDR < fdr_threshold) & (outRst$log2foldChange < -logFC_threshold)
k2 <- (outRst$FDR < fdr_threshold) & (outRst$log2foldChange > logFC_threshold)
outRst <- dplyr::mutate(outRst, change = ifelse(k1, "down", ifelse(k2, "up", "stable")))
change_table <- table(outRst$change)
message("Change Table:")
message(paste(names(change_table), change_table, sep = ": ", collapse = "\n"))
# Add a space after the output for separation
message(" ")
# Save results
saveRDS(outRst, file = output_file)
return(outRst)
}
================================================
FILE: R/data.R
================================================
# The annotation file for the DATA in the data directory can all be commented in this file. Although LazyData: true is marked in the DESCRIPTION,
# it ensures that the data is not read initially but is automatically read when it is actually used. This automatic process requires this part of the file to ensure it.
#' All DEGs Venn Diagram Data
#'
#' A dataset containing the differentially expressed genes (DEGs) from four different statistical analysis methods: DESeq2, edgeR, limma, and Wilcoxon test.
#' This dataset is used for generating Venn diagrams to compare the overlap of DEGs identified by different methods.
#'
#' @format A list with the following components:
#' \describe{
#' \item{DESeq2}{A vector of gene IDs or gene symbols identified as DEGs by the DESeq2 method.}
#' \item{edgeR}{A vector of gene IDs or gene symbols identified as DEGs by the edgeR method.}
#' \item{limma}{A vector of gene IDs or gene symbols identified as DEGs by the limma method. }
#' \item{Wilcoxon_test}{A vector of gene IDs or gene symbols identified as DEGs by the Wilcoxon test method. }
#' }
#'
#' @source The data was derived from differential expression analyses performed on a gene expression dataset using four commonly used statistical methods (DESeq2, edgeR, limma, and Wilcoxon test).
#'
#' @usage data(all_degs_venn)
#'
#' @examples
#' data(all_degs_venn)
#' # Example of plotting a Venn diagram using the dataset
#'
#' edge_colors <- c("#1b62bb","#13822e","#332c3a","#9e2d39")
#' name_color <- c("#1b64bb","#13828e","#337c3a","#9e9d39")
#' fill_colors <- c("#e3f2fa", "#0288d1")
#'
#' Contrast_degs_venn <- Contrast_Venn(all_degs_venn, edge_colors, name_color, fill_colors)
#'
#' @keywords datasets
"all_degs_venn"
#' Phylogenetic Tree Object
#'
#' A dataset containing a phylogenetic tree object created using the `ggtree` package.
#' This tree represents the evolutionary relationships among a set of species or genes.
#'
#' @format A `ggtree` object.
#'
#' @source The phylogenetic tree was constructed based on sequence alignment data obtained from [Data Source, e.g., NCBI database, specific study, etc.].
#'
#' @usage data(gtree)
#'
#' @keywords datasets phylogenetics
"gtree"
================================================
FILE: R/utils-pipe.R
================================================
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
#' @param lhs A value or the magrittr placeholder.
#' @param rhs A function call using the magrittr semantics.
#' @return The result of calling `rhs(lhs)`.
NULL
================================================
FILE: R/zzz.R
================================================
.onAttach <- function(libname, pkgname) {
# Display ASCII art
ascii_art_path <- system.file("extdata", "ascii_art.txt", package = "TransProR")
if (file.exists(ascii_art_path)) {
ascii_art_lines <- readLines(ascii_art_path)
ascii_art <- paste(ascii_art_lines, collapse = "\n")
packageStartupMessage(ascii_art)
} else {
packageStartupMessage("Welcome to TransProR!")
}
}
================================================
FILE: README.md
================================================
# TransProR
[](https://cran.r-project.org/package=TransProR)
<!-- badges: start -->
<!-- badges: end -->
<img src="vignettes/image/TransProR_rlogo.png" alt="TransProR Logo" width="210" height="250" align="right">
Analysis and visualization of transcriptomic data are currently in progress. Future directions include multi-modal fusion, sparse learning, and the investigation of spatio-temporal effects.
## Installation
You can install the development version of TransProR like so:
``` r
install.packages("devtools")
devtools::install_github("SSSYDYSSS/TransProR", build_vignettes = TRUE)
install.packages("remotes")
remotes::install_github("SSSYDYSSS/TransProR", build_vignettes = TRUE)
```
## More examples see
TransProR Manual:https://sssydysss.github.io/TransProRBook/
## System Requirements
- R (>= 4.3.0)
## Example
This is a basic example which shows you how to solve a common problem:
``` r
library(TransProR)
## basic example code
```
## Citation
If you use TransPro in your research, please cite:
Dongyue Yu; Chen Li; Shuo Yan; Lujiale Guo; Jingyu Liang; Shengquan Chen*; Wenjun Bu* (2026). Comparative Evaluation of Differential Gene Selection Methods in Transcriptomics: Bias Correction and Visualization with TransPro. Manuscript in preparation.
**Correspondence:**
Shengquan Chen — School of Mathematical Sciences and LPMC, Nankai University, Tianjin 300071, China.
Wenjun Bu — Institute of Entomology, College of Life Sciences, Nankai University, Tianjin 300071, China.
## Code of Conduct
Please note that the TransProR project is released with a [Contributor Code of Conduct](https://contributor-covenant.org/version/2/1/CODE_OF_CONDUCT.html). By contributing to this project, you agree to abide by its terms. "\# TransProR"
================================================
FILE: TransProR.Rproj
================================================
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
AutoAppendNewline: Yes
StripTrailingWhitespace: Yes
BuildType: Package
PackageUseDevtools: Yes
PackageInstallArgs: --no-multiarch --with-keep.source
================================================
FILE: data-raw/MAKEDATA.R
================================================
# Data that the package will use, and is explorable by users:
usethis::use_data(x) # This will create a file at data/x.Rda
# Data that the package will use, and is not explorable by users, internal data:
usethis::use_data(y, internal = TRUE) # This will create a file at R/sysdata.rda. Note, this data does not require documentation, nor does it need to be exported, it is not visible to users.
================================================
FILE: dev/build.R
================================================
# pipe
usethis::use_pipe(export = TRUE)
## code to prepare MAKEDATA.R
usethis::use_data_raw("MAKEDATA")
devtools::load_all()
devtools::document()
devtools::check()
#BiocManager::install("BiocCheck")
#BiocCheck::BiocCheck()
# Ensure that all dependencies are correctly installed, and generate the pak lock file.
pak::pak()
pak::pak("jokergoo/ComplexHeatmap")# 单独安装
pak::lockfile_create()# for github action
file.rename("pkg.lock", ".github/pkg.lock")
# NEWS.md
usethis::use_news_md() #生成一个标准模板。
================================================
FILE: dev/dev.R
================================================
# dependency package
usethis::use_package("stats")
usethis::use_package("utils")
usethis::use_package("DESeq2")
usethis::use_package("BiocGenerics")
usethis::use_package("ggplot2")
usethis::use_package("ggpubr")
usethis::use_package("tibble")
# license
usethis::use_mit_license("Dongyue Yu")
# readme
usethis::use_readme_md()
# adds a Code of Conduct
usethis::use_code_of_conduct(contact = "yudongyue@mail.nankai.edu.cn")
# vignette
usethis::use_vignette("a_example_workflow") # optional
devtools::install(build_vignettes = TRUE) # optional
## a website
# Run once to configure your package to use pkgdown
usethis::use_pkgdown() # optional
pkgdown::build_site() # optional
## add git
usethis::use_git()
usethis::use_github()
# CI
usethis::use_github_action_check_standard()
# CRAN
# Spell check
devtools::spell_check()
# Regular local tests
devtools::check()
# rhub cross-platform tests
rhub::check_for_cran()
# Windows platform-only test
devtools::check_win_devel() # optional
# rhub Windows platform-only test
rhub::check_for_cran(
platform="windows-x86_64-devel",
env_vars=c(R_COMPILE_AND_INSTALL_PACKAGES = "always")
) # optional
# Check before release
devtools::release()
================================================
FILE: inst/extdata/GTEX_phenotype_test
================================================
Sample body_site_detail (SMTSD) _primary_site _gender _patient _cohort
GTEX-1117F-0226-SM-5GZZ7 Adipose - Subcutaneous Adipose Tissue female GTEX-1117F GTEX
GTEX-1117F-0426-SM-5EGHI Muscle - Skeletal Muscle female GTEX-1117F GTEX
GTEX-1117F-0526-SM-5EGHJ Artery - Tibial Blood Vessel female GTEX-1117F GTEX
GTEX-1117F-0626-SM-5N9CS Artery - Coronary Blood Vessel female GTEX-1117F GTEX
GTEX-1117F-0726-SM-5GIEN Heart - Atrial Appendage Heart female GTEX-1117F GTEX
GTEX-1117F-1326-SM-5EGHH Adipose - Visceral (Omentum) Adipose Tissue female GTEX-1117F GTEX
GTEX-1117F-2226-SM-5N9CH Ovary Ovary female GTEX-1117F GTEX
GTEX-1117F-2426-SM-5EGGH Uterus Uterus female GTEX-1117F GTEX
GTEX-1117F-2526-SM-5GZY6 Vagina Vagina female GTEX-1117F GTEX
GTEX-1117F-2826-SM-5GZXL Breast - Mammary Tissue Breast female GTEX-1117F GTEX
GTEX-1117F-2926-SM-5GZYI Skin - Not Sun Exposed (Suprapubic) Skin female GTEX-1117F GTEX
GTEX-1117F-3026-SM-5GZYU Minor Salivary Gland Salivary Gland female GTEX-1117F GTEX
GTEX-1117F-3226-SM-5N9CT Brain - Cortex Brain female GTEX-1117F GTEX
GTEX-111CU-0126-SM-5GZWZ Adrenal Gland Adrenal Gland male GTEX-111CU GTEX
GTEX-111CU-0226-SM-5GZXC Thyroid Thyroid male GTEX-111CU GTEX
GTEX-111CU-0326-SM-5GZXO Lung Lung male GTEX-111CU GTEX
GTEX-111CU-0426-SM-5GZY1 Spleen Spleen male GTEX-111CU GTEX
GTEX-111CU-0526-SM-5EGHK Pancreas Pancreas male GTEX-111CU GTEX
GTEX-111CU-0626-SM-5EGHL Esophagus - Muscularis Esophagus male GTEX-111CU GTEX
GTEX-111CU-0726-SM-5GZYD Esophagus - Mucosa Esophagus male GTEX-111CU GTEX
GTEX-111CU-0826-SM-5EGIJ Esophagus - Gastroesophageal Junction Esophagus male GTEX-111CU GTEX
GTEX-111CU-0926-SM-5EGIK Stomach Stomach male GTEX-111CU GTEX
GTEX-111CU-1026-SM-5EGIL Adipose - Visceral (Omentum) Adipose Tissue male GTEX-111CU GTEX
GTEX-111CU-1126-SM-5EGIM Skin - Not Sun Exposed (Suprapubic) Skin male GTEX-111CU GTEX
GTEX-111CU-1226-SM-5EGIN Colon - Sigmoid Colon male GTEX-111CU GTEX
GTEX-111CU-1326-SM-5NQ8L Small Intestine - Terminal Ileum Small Intestine male GTEX-111CU GTEX
GTEX-111CU-1426-SM-5GZYP Colon - Transverse Colon male GTEX-111CU GTEX
GTEX-111CU-1526-SM-5N9FS Prostate Prostate male GTEX-111CU GTEX
GTEX-111CU-1726-SM-5EGHM Testis Testis male GTEX-111CU GTEX
GTEX-111CU-1826-SM-5GZYN Adipose - Subcutaneous Adipose Tissue male GTEX-111CU GTEX
GTEX-111CU-1926-SM-5GZYZ Skin - Sun Exposed (Lower leg) Skin male GTEX-111CU GTEX
GTEX-111CU-2026-SM-5GZZC Muscle - Skeletal Muscle male GTEX-111CU GTEX
GTEX-111CU-2226-SM-5N9G5 Nerve - Tibial Nerve male GTEX-111CU GTEX
GTEX-111FC-0126-SM-5N9DL Skin - Sun Exposed (Lower leg) Skin male GTEX-111FC GTEX
GTEX-111FC-0226-SM-5N9B8 Adipose - Subcutaneous Adipose Tissue male GTEX-111FC GTEX
GTEX-111FC-0326-SM-5GZZ1 Muscle - Skeletal Muscle male GTEX-111FC GTEX
GTEX-111FC-0426-SM-5N9CV Artery - Tibial Blood Vessel male GTEX-111FC GTEX
GTEX-111FC-0526-SM-5GZZ8 Nerve - Tibial Nerve male GTEX-111FC GTEX
GTEX-111FC-0626-SM-5N9CU Heart - Atrial Appendage Heart male GTEX-111FC GTEX
GTEX-111FC-0826-SM-5GZWO Heart - Left Ventricle Heart male GTEX-111FC GTEX
GTEX-111FC-1026-SM-5GZX1 Thyroid Thyroid male GTEX-111FC GTEX
GTEX-111FC-1126-SM-5GZWU Lung Lung male GTEX-111FC GTEX
GTEX-111FC-1326-SM-5N9D9 Spleen Spleen male GTEX-111FC GTEX
GTEX-111FC-1426-SM-5N9C7 Adipose - Subcutaneous Adipose Tissue male GTEX-111FC GTEX
================================================
FILE: inst/extdata/TCGA-SKCM.GDC_phenotype_test.tsv
================================================
submitter_id.samples age_at_initial_pathologic_diagnosis batch_number bcr bcr_followup_barcode bcr_followup_uuid submitter_id breslow_depth_value day_of_dcc_upload day_of_form_completion days_to_initial_pathologic_diagnosis days_to_submitted_specimen_dx distant_metastasis_anatomic_site file_uuid followup_case_report_form_submission_reason history_of_neoadjuvant_treatment informed_consent_verified interferon_90_day_prior_excision_admin_indicator lactate_dehydrogenase_result lost_follow_up malignant_neoplasm_mitotic_count_rate melanoma_clark_level_value melanoma_origin_skin_anatomic_site melanoma_ulceration_indicator month_of_dcc_upload month_of_form_completion new_tumor_dx_prior_submitted_specimen_dx other_dx pathologic_M pathologic_N pathologic_T patient_id person_neoplasm_cancer_status postoperative_rx_tx primary_melanoma_at_diagnosis_count primary_neoplasm_melanoma_dx primary_tumor_multiple_present_ind prior_radiation_therapy prior_systemic_therapy prior_systemic_therapy_type radiation_therapy radiation_therapy_to_primary submitted_tumor_location subsequent_primary_melanoma_during_followup system_version tissue_prospective_collection_indicator tissue_retrospective_collection_indicator tissue_source_site weight withdrawn year_of_dcc_upload year_of_form_completion year_of_initial_pathologic_diagnosis days_to_index dbgap_registration_code disease_code pathology_report_file_name program project_code vial_number age_at_index.demographic days_to_birth.demographic days_to_death.demographic ethnicity.demographic gender.demographic race.demographic vital_status.demographic year_of_birth.demographic year_of_death.demographic age_at_diagnosis.diagnoses classification_of_tumor.diagnoses days_to_diagnosis.diagnoses days_to_last_follow_up.diagnoses icd_10_code.diagnoses last_known_disease_status.diagnoses morphology.diagnoses primary_diagnosis.diagnoses prior_malignancy.diagnoses prior_treatment.diagnoses progression_or_recurrence.diagnoses site_of_resection_or_biopsy.diagnoses synchronous_malignancy.diagnoses tissue_or_organ_of_origin.diagnoses tumor_grade.diagnoses tumor_stage.diagnoses year_of_diagnosis.diagnoses disease_type alcohol_history.exposures bmi.exposures height.exposures weight.exposures primary_site name.project project_id.project releasable.project bcr_id.tissue_source_site code.tissue_source_site name.tissue_source_site project.tissue_source_site days_to_collection.samples days_to_sample_procurement.samples initial_weight.samples is_ffpe.samples oct_embedded.samples preservation_method.samples sample_type.samples sample_type_id.samples state.samples tissue_type.samples
TCGA-D9-A4Z2-01A 50 262.74.0 Nationwide Children's Hospital TCGA-D9-A4Z2-F57868 AB5F6B14-DAAE-4D19-B227-EC6E2D5ED0D0 TCGA-D9-A4Z2 25 14 28 0 7 8FC5ACEA-E493-4C8D-A3AC-5A68B1B8FC26 No YES NO 10 IV Non-glabrous skin YES 3 3 NO No M0 N3 T4b A4Z2 WITH TUMOR NO 2 YES YES NO NO NO NO Primary Tumor NO 7th YES NO D9 54 false 2017 2014 2012 0 SKCM TCGA-D9-A4Z2.FEA752ED-7670-4D44-8402-009CF8356C44.pdf TCGA A 50.0 -18462.0 190.0 not hispanic or latino male white Dead 1962.0 18462.0 not reported 0.0 93.0 C44.5 not reported 8721/3 Nodular melanoma no No not reported Skin of trunk No Skin, NOS not reported stage iiic 2012.0 Nevi and Melanomas Not Reported 20.07733491969066 164.0 54.0 Skin Skin Cutaneous Melanoma TCG
gitextract_t_svwp8g/
├── .Rbuildignore
├── .github/
│ ├── .gitignore
│ └── workflows/
│ └── R-CMD-check.yaml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── R/
│ ├── AdjustAlphaScale.R
│ ├── CircosFruits.R
│ ├── CombatNormal.R
│ ├── CombatTumor.R
│ ├── CompareMerge.R
│ ├── Contrast_Venn.R
│ ├── DESeq2Analyze.R
│ ├── EdgeRAnalyze.R
│ ├── EnrichCirclize.R
│ ├── EnrichCircoBar.R
│ ├── EnrichPolarBubble.R
│ ├── EnrichmentSpiralize.R
│ ├── FacetDensityFoldchange.R
│ ├── FilterDiffGenes.R
│ ├── FourDegsVenn.R
│ ├── GatherGraphEdge.R
│ ├── GatherGraphNode.R
│ ├── GeneColor.R
│ ├── GeneHighlights.R
│ ├── GeneMapPathway.R
│ ├── GetGtexExp.R
│ ├── GetTcgaExp.R
│ ├── HighlightByNode.R
│ ├── HighlightGenes.R
│ ├── LimmaAnalyze.R
│ ├── LogTransform.R
│ ├── MergeDensityFoldchange.R
│ ├── MergeGtexTcga.R
│ ├── MergeIDPosition.R
│ ├── MergeMethodColor.R
│ ├── NewGgraph.R
│ ├── PathwayCount.R
│ ├── PathwayDescription.R
│ ├── PrepDeseq2.R
│ ├── PrepEdgeR.R
│ ├── PrepLimma.R
│ ├── PrepWilcoxon.R
│ ├── ProcessHeatdata.R
│ ├── SeekGtexOrgan.R
│ ├── WilcoxonAnalyze.R
│ ├── data.R
│ ├── utils-pipe.R
│ └── zzz.R
├── README.md
├── TransProR.Rproj
├── data/
│ ├── all_degs_venn.rda
│ └── gtree.rda
├── data-raw/
│ └── MAKEDATA.R
├── dev/
│ ├── build.R
│ └── dev.R
├── inst/
│ └── extdata/
│ ├── DEG_deseq2.rds
│ ├── DEG_deseq2_test.rds
│ ├── DEG_edgeR_test.rds
│ ├── DEG_limma_voom_test.rds
│ ├── Diff_deseq2.rds
│ ├── GTEX_phenotype_test
│ ├── SKCM_Skin_TCGA_exp_normal_test.rds
│ ├── SKCM_Skin_TCGA_exp_tumor_test.rds
│ ├── Skin_SKCM_Gtex_test.rds
│ ├── TCGA-SKCM.GDC_phenotype_test.tsv
│ ├── TCGA-SKCM.htseq_counts_test.tsv
│ ├── TCGA_gencode.v22.annotation.gene.probeMap_test
│ ├── Wilcoxon_rank_sum_testoutRst_test.rds
│ ├── all_count_exp_test.csv
│ ├── ascii_art.txt
│ ├── combined_df.rds
│ ├── gtex_gene_expected_count_test
│ ├── gtex_probeMap_gencode.v23.annotation.gene.probemap_test
│ ├── p_tree_test.rds
│ ├── removebatch_SKCM_Skin_Normal_TCGA_GTEX_count_test.rds
│ ├── removebatch_SKCM_Skin_TCGA_exp_tumor_test.rds
│ ├── selected_genes_deseq2.rds
│ └── tree_plot.rds
├── man/
│ ├── Combat_Normal.Rd
│ ├── Contrast_Venn.Rd
│ ├── DESeq2_analyze.Rd
│ ├── Wilcoxon_analyze.Rd
│ ├── add_boxplot.Rd
│ ├── add_new_tile_layer.Rd
│ ├── adjust_alpha_scale.Rd
│ ├── adjust_color_tone.Rd
│ ├── adjust_export_pathway.Rd
│ ├── all_degs_venn.Rd
│ ├── circos_fruits.Rd
│ ├── combat_tumor.Rd
│ ├── compare_merge.Rd
│ ├── create_base_plot.Rd
│ ├── deg_filter.Rd
│ ├── drawLegends.Rd
│ ├── edgeR_analyze.Rd
│ ├── enrich_circo_bar.Rd
│ ├── enrich_polar_bubble.Rd
│ ├── enrichment_circlize.Rd
│ ├── enrichment_spiral_plots.Rd
│ ├── extract_descriptions_counts.Rd
│ ├── extract_ntop_pathways.Rd
│ ├── extract_positive_pathways.Rd
│ ├── facet_density_foldchange.Rd
│ ├── filter_diff_genes.Rd
│ ├── four_degs_venn.Rd
│ ├── gather_graph_edge.Rd
│ ├── gather_graph_node.Rd
│ ├── gene_color.Rd
│ ├── gene_highlights.Rd
│ ├── gene_map_pathway.Rd
│ ├── get_gtex_exp.Rd
│ ├── get_tcga_exp.Rd
│ ├── gtree.Rd
│ ├── highlight_by_node.Rd
│ ├── highlight_genes.Rd
│ ├── limma_analyze.Rd
│ ├── log_transform.Rd
│ ├── merge_density_foldchange.Rd
│ ├── merge_gtex_tcga.Rd
│ ├── merge_id_position.Rd
│ ├── merge_method_color.Rd
│ ├── new_ggraph.Rd
│ ├── pathway_count.Rd
│ ├── pathway_description.Rd
│ ├── pipe.Rd
│ ├── prep_deseq2.Rd
│ ├── prep_edgeR.Rd
│ ├── prep_limma.Rd
│ ├── prep_wilcoxon.Rd
│ ├── process_heatdata.Rd
│ ├── seek_gtex_organ.Rd
│ ├── selectPathways.Rd
│ └── spiral_newrle.Rd
└── vignettes/
└── TransProR.Rmd
Condensed preview — 138 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,023K chars).
[
{
"path": ".Rbuildignore",
"chars": 116,
"preview": "^.*\\.Rproj$\n^\\.Rproj\\.user$\n^data-raw$\n^data-raw$\n^dev$\n^CODE_OF_CONDUCT\\.md$\n^LICENSE\\.md$\n^\\.github$\n^\\.RDataTmp$\n"
},
{
"path": ".github/.gitignore",
"chars": 7,
"preview": "*.html\n"
},
{
"path": ".github/workflows/R-CMD-check.yaml",
"chars": 1705,
"preview": "name: R-CMD-check\n\non:\n push:\n branches: [main, master]\n pull_request:\n branches: [main, master]\n\njobs:\n R-CMD-"
},
{
"path": ".gitignore",
"chars": 87,
"preview": ".Rproj.user\n.Rhistory\n.RData\n.Ruserdata\n.Rdata\n.httr-oauth\n.DS_Store\n.quarto\n.RDataTmp\n"
},
{
"path": "CODE_OF_CONDUCT.md",
"chars": 5250,
"preview": "# Contributor Covenant Code of Conduct\n\n## Our Pledge\n\nWe as members, contributors, and leaders pledge to make participa"
},
{
"path": "DESCRIPTION",
"chars": 2203,
"preview": "Package: TransProR\nType: Package\nTitle: Analysis and Visualization of Multi-Omics Data\nVersion: 1.0.4\nAuthors@R: person("
},
{
"path": "LICENSE",
"chars": 40,
"preview": "YEAR: 2023\nCOPYRIGHT HOLDER: Dongyue Yu\n"
},
{
"path": "LICENSE.md",
"chars": 1069,
"preview": "# MIT License\n\nCopyright (c) 2023 Dongyue Yu\n\nPermission is hereby granted, free of charge, to any person obtaining a co"
},
{
"path": "NAMESPACE",
"chars": 5044,
"preview": "# Generated by roxygen2: do not edit by hand\n\nexport(\"%>%\")\nexport(Combat_Normal)\nexport(Contrast_Venn)\nexport(DESeq2_an"
},
{
"path": "R/AdjustAlphaScale.R",
"chars": 1760,
"preview": "#' Adjust Alpha Scale for Data Visualization\n#'\n#' This function dynamically adjusts the transparency scale for visualiz"
},
{
"path": "R/CircosFruits.R",
"chars": 14103,
"preview": "#' Create a base plot with gene expression data on a phylogenetic tree\n#'\n#' This function creates a base plot using 'gg"
},
{
"path": "R/CombatNormal.R",
"chars": 6330,
"preview": "#' Process and Correct Batch Effects in TCGA's normal tissue and GTEX Data\n#'\n#' This function takes a TCGA's normal tis"
},
{
"path": "R/CombatTumor.R",
"chars": 4193,
"preview": "#' Process and Correct Batch Effects in Tumor Data\n#'\n#' This function takes a tumor data set, asks the user for specifi"
},
{
"path": "R/CompareMerge.R",
"chars": 2736,
"preview": "#' Compare and merge specific columns from two DEG data frames\n#'\n#' This function takes two DEG data frames, inner join"
},
{
"path": "R/Contrast_Venn.R",
"chars": 2202,
"preview": "#' Function to Create a Venn Diagram of DEGs with Custom Colors\n#'\n#' This function creates a Venn Diagram using the 'gg"
},
{
"path": "R/DESeq2Analyze.R",
"chars": 5691,
"preview": "#' Differential Gene Expression Analysis using 'DESeq2'\n#'\n#' 'DESeq2': Differential gene expression analysis based on t"
},
{
"path": "R/EdgeRAnalyze.R",
"chars": 4759,
"preview": "#' Differential Gene Expression Analysis using 'edgeR'\n#'\n#' This function performs differential gene expression analysi"
},
{
"path": "R/EnrichCirclize.R",
"chars": 11833,
"preview": "#' Adjust and Export Pathway Analysis Results\n#'\n#' This function processes a dataframe containing fgsea results. It adj"
},
{
"path": "R/EnrichCircoBar.R",
"chars": 7885,
"preview": "#' Extract and Count Descriptions with Specified Color\n#'\n#' This function filters a data frame for specified descriptio"
},
{
"path": "R/EnrichPolarBubble.R",
"chars": 4684,
"preview": "#' Enrichment Polar Bubble Plot\n#'\n#' This function creates a polar bubble plot using 'ggplot2'. It is designed to visua"
},
{
"path": "R/EnrichmentSpiralize.R",
"chars": 12778,
"preview": "#' Extract and Store Top Pathways for Each Sample\n#'\n#' This function processes a dataframe containing SSGSEA KEGG resul"
},
{
"path": "R/FacetDensityFoldchange.R",
"chars": 4870,
"preview": "#' Create faceted high-density region plots with optional points and density contours\n#'\n#' This function creates facete"
},
{
"path": "R/FilterDiffGenes.R",
"chars": 2028,
"preview": "#' Filter Differentially Expressed Genes\n#'\n#' This function filters a data frame to identify genes with significant dif"
},
{
"path": "R/FourDegsVenn.R",
"chars": 2226,
"preview": "#' Function to Filter Differentially Expressed Genes (DEGs)\n#'\n#' This function filters out genes based on their express"
},
{
"path": "R/GatherGraphEdge.R",
"chars": 1839,
"preview": "#' Gather graph edge from data frame\n#' Please note that this function is from the 'ggraph' package and has not been alt"
},
{
"path": "R/GatherGraphNode.R",
"chars": 2167,
"preview": "#' Gather graph nodes from a data frame\n#' Please note that this function is from the 'ggraph' package and has not been "
},
{
"path": "R/GeneColor.R",
"chars": 2264,
"preview": "#' Merge Genes with Color Information Based on Up/Down Regulation\n#'\n#' This function merges selected genes with differe"
},
{
"path": "R/GeneHighlights.R",
"chars": 2616,
"preview": "#' Add gene highlights to a ggtree object\n#'\n#' This function enhances a `ggtree` plot by adding highlights for specific"
},
{
"path": "R/GeneMapPathway.R",
"chars": 4725,
"preview": "#' Create Pathway-Gene Mapping Data Frame\n#'\n#' This function takes multiple data frames and pathway IDs, merging them i"
},
{
"path": "R/GetGtexExp.R",
"chars": 5425,
"preview": "#' Get GTEx Expression Data for Specific Organ\n#'\n#' This function retrieves gene expression data from the GTEx project "
},
{
"path": "R/GetTcgaExp.R",
"chars": 6664,
"preview": "#' TCGA Expression Data Processing\n#'\n#' This function processes expression data and phenotype information, separates tu"
},
{
"path": "R/HighlightByNode.R",
"chars": 3032,
"preview": "#' Highlight Nodes in a Phylogenetic Tree with Custom Fill Colors and Transparency\n#'\n#' This function adds highlights t"
},
{
"path": "R/HighlightGenes.R",
"chars": 3001,
"preview": "#' Add Highlights for Genes on a Phylogenetic Tree\n#'\n#' This function adds highlights for specified genes on a phylogen"
},
{
"path": "R/LimmaAnalyze.R",
"chars": 4808,
"preview": "#' Differential Gene Expression Analysis using limma and voom\n#'\n#' This function performs differential gene expression "
},
{
"path": "R/LogTransform.R",
"chars": 1314,
"preview": "#' Log transformation decision and application on data\n#'\n#' This function evaluates the need for a log transformation b"
},
{
"path": "R/MergeDensityFoldchange.R",
"chars": 4167,
"preview": "#' Create high-density region plot with optional points, density rugs, and contours\n#'\n#' This function creates a high-d"
},
{
"path": "R/MergeGtexTcga.R",
"chars": 2985,
"preview": "#' Merge gene expression data from GTEx and TCGA datasets\n#'\n#' This function merges gene expression data obtained from "
},
{
"path": "R/MergeIDPosition.R",
"chars": 2607,
"preview": "#' Merge Data Frames by Common Row Names with Additional Columns\n#'\n#' This function merges a list of data frames based "
},
{
"path": "R/MergeMethodColor.R",
"chars": 2065,
"preview": "#' Merge Data Frames with Specific Method and Color Columns\n#'\n#' This function takes a list of data frames, a method na"
},
{
"path": "R/NewGgraph.R",
"chars": 3674,
"preview": "#' Generate a graphical representation of pathway gene maps\n#'\n#' This function merges multiple gene-pathway related dat"
},
{
"path": "R/PathwayCount.R",
"chars": 1915,
"preview": "#' Count Genes Present in Pathways Above a Threshold\n#'\n#' This function filters pathways that meet a count threshold an"
},
{
"path": "R/PathwayDescription.R",
"chars": 2011,
"preview": "#' Describe Genes Present in Selected Pathways\n#'\n#' This function identifies genes present in selected pathways based o"
},
{
"path": "R/PrepDeseq2.R",
"chars": 1892,
"preview": "#' Prepare DESeq2 data for plotting\n#'\n#' This function reads a DESeq2 DEG data frame from an RDS file, filters it,\n#' a"
},
{
"path": "R/PrepEdgeR.R",
"chars": 1838,
"preview": "#' Prepare edgeR DEG data for plotting\n#'\n#' This function reads an edgeR DEG data frame from an RDS file, filters it us"
},
{
"path": "R/PrepLimma.R",
"chars": 1818,
"preview": "#' Prepare limma-voom DEG data for plotting\n#'\n#' This function reads a limma-voom DEG data frame from an RDS file, filt"
},
{
"path": "R/PrepWilcoxon.R",
"chars": 2050,
"preview": "#' Prepare Wilcoxon DEG data for plotting\n#'\n#' This function reads a Wilcoxon DEG data frame from an RDS file, filters "
},
{
"path": "R/ProcessHeatdata.R",
"chars": 3958,
"preview": "#' Process Heatmap Data with Various Selection Options\n#'\n#' This function processes heatmap data (`heatdata`) based on "
},
{
"path": "R/SeekGtexOrgan.R",
"chars": 1435,
"preview": "#' Load and Process GTEX Phenotype Data to Retrieve Primary Site Counts\n#'\n#' This function reads the GTEX phenotype dat"
},
{
"path": "R/WilcoxonAnalyze.R",
"chars": 4920,
"preview": "#' Differential Gene Expression Analysis Using Wilcoxon Rank-Sum Test\n#'\n#' This function performs differential gene exp"
},
{
"path": "R/data.R",
"chars": 2200,
"preview": "# The annotation file for the DATA in the data directory can all be commented in this file. Although LazyData: true is m"
},
{
"path": "R/utils-pipe.R",
"chars": 363,
"preview": "#' Pipe operator\n#'\n#' See \\code{magrittr::\\link[magrittr:pipe]{\\%>\\%}} for details.\n#'\n#' @name %>%\n#' @rdname pipe\n#' "
},
{
"path": "R/zzz.R",
"chars": 397,
"preview": ".onAttach <- function(libname, pkgname) {\n # Display ASCII art\n ascii_art_path <- system.file(\"extdata\", \"ascii_art.tx"
},
{
"path": "README.md",
"chars": 1830,
"preview": "# TransProR\n[](https://cran.r-project.org/package=TransPro"
},
{
"path": "TransProR.Rproj",
"chars": 356,
"preview": "Version: 1.0\n\nRestoreWorkspace: Default\nSaveWorkspace: Default\nAlwaysSaveHistory: Default\n\nEnableCodeIndexing: Yes\nUseSp"
},
{
"path": "data-raw/MAKEDATA.R",
"chars": 396,
"preview": "# Data that the package will use, and is explorable by users:\nusethis::use_data(x) # This will create a file at data/x.R"
},
{
"path": "dev/build.R",
"chars": 506,
"preview": "# pipe\nusethis::use_pipe(export = TRUE)\n\n## code to prepare MAKEDATA.R\nusethis::use_data_raw(\"MAKEDATA\")\n\ndevtools::load"
},
{
"path": "dev/dev.R",
"chars": 1192,
"preview": "# dependency package\nusethis::use_package(\"stats\")\nusethis::use_package(\"utils\")\nusethis::use_package(\"DESeq2\")\nusethis:"
},
{
"path": "inst/extdata/GTEX_phenotype_test",
"chars": 3319,
"preview": "Sample\tbody_site_detail (SMTSD)\t_primary_site\t_gender\t_patient\t_cohort\nGTEX-1117F-0226-SM-5GZZ7\tAdipose - Subcutaneous\tA"
},
{
"path": "inst/extdata/TCGA-SKCM.GDC_phenotype_test.tsv",
"chars": 45277,
"preview": "submitter_id.samples\tage_at_initial_pathologic_diagnosis\tbatch_number\tbcr\tbcr_followup_barcode\tbcr_followup_uuid\tsubmitt"
},
{
"path": "inst/extdata/TCGA-SKCM.htseq_counts_test.tsv",
"chars": 29128,
"preview": "Ensembl_ID\tTCGA-EE-A2GJ-06A\tTCGA-EE-A2GI-06A\tTCGA-WE-A8ZM-06A\tTCGA-DA-A1IA-06A\tTCGA-D3-A51H-06A\tTCGA-XV-A9VZ-01A\tTCGA-FS"
},
{
"path": "inst/extdata/TCGA_gencode.v22.annotation.gene.probeMap_test",
"chars": 198,
"preview": "id\tgene\tchrom\tchromStart\tchromEnd\tstrand\nENSG00000000005.5\tTNMD\tchrX\t100584802\t100599885\t+\nENSG00000000457.12\tSCYL3\tchr1"
},
{
"path": "inst/extdata/all_count_exp_test.csv",
"chars": 117993,
"preview": ",TCGA-D9-A4Z2-01A,TCGA-ER-A2NH-06A,TCGA-BF-A5EO-01A,TCGA-D9-A6EA-06A,TCGA-D9-A4Z3-01A,TCGA-GN-A26A-06A,TCGA-D3-A3BZ-06A,"
},
{
"path": "inst/extdata/ascii_art.txt",
"chars": 1444,
"preview": " _______ _____ _ _ _____ _____ _____ ____ _____ \n |__ __| __ \\ /\\ | \\ |"
},
{
"path": "inst/extdata/gtex_gene_expected_count_test",
"chars": 471887,
"preview": "sample\tGTEX-S4Q7-0003-SM-3NM8M\tGTEX-QV31-1626-SM-2S1QC\tGTEX-13QIC-0011-R1a-SM-5O9CJ\tGTEX-ZPCL-0126-SM-4WWC8\tGTEX-S33H-12"
},
{
"path": "inst/extdata/gtex_probeMap_gencode.v23.annotation.gene.probemap_test",
"chars": 319,
"preview": "id\tgene\tchrom\tchromStart\tchromEnd\tstrand\nENSG00000242268.2\tRP11-368I23.2\tchr3\t168903366\t168921996\t+\nENSG00000259041.1\tRP"
},
{
"path": "man/Combat_Normal.Rd",
"chars": 3339,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CombatNormal.R\n\\name{Combat_Normal}\n\\alias"
},
{
"path": "man/Contrast_Venn.Rd",
"chars": 1386,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/Contrast_Venn.R\n\\name{Contrast_Venn}\n\\alia"
},
{
"path": "man/DESeq2_analyze.Rd",
"chars": 2163,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/DESeq2Analyze.R\n\\name{DESeq2_analyze}\n\\ali"
},
{
"path": "man/Wilcoxon_analyze.Rd",
"chars": 2036,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/WilcoxonAnalyze.R\n\\name{Wilcoxon_analyze}\n"
},
{
"path": "man/add_boxplot.Rd",
"chars": 2050,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CircosFruits.R\n\\name{add_boxplot}\n\\alias{a"
},
{
"path": "man/add_new_tile_layer.Rd",
"chars": 2563,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CircosFruits.R\n\\name{add_new_tile_layer}\n\\"
},
{
"path": "man/adjust_alpha_scale.Rd",
"chars": 1274,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/AdjustAlphaScale.R\n\\name{adjust_alpha_scal"
},
{
"path": "man/adjust_color_tone.Rd",
"chars": 1119,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichmentSpiralize.R\n\\name{adjust_color_t"
},
{
"path": "man/adjust_export_pathway.Rd",
"chars": 1295,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichCirclize.R\n\\name{adjust_export_pathw"
},
{
"path": "man/all_degs_venn.Rd",
"chars": 1496,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{all_degs_venn}"
},
{
"path": "man/circos_fruits.Rd",
"chars": 3814,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CircosFruits.R\n\\name{circos_fruits}\n\\alias"
},
{
"path": "man/combat_tumor.Rd",
"chars": 2288,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CombatTumor.R\n\\name{combat_tumor}\n\\alias{c"
},
{
"path": "man/compare_merge.Rd",
"chars": 1919,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CompareMerge.R\n\\name{compare_merge}\n\\alias"
},
{
"path": "man/create_base_plot.Rd",
"chars": 1978,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/CircosFruits.R\n\\name{create_base_plot}\n\\al"
},
{
"path": "man/deg_filter.Rd",
"chars": 679,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/FourDegsVenn.R\n\\name{deg_filter}\n\\alias{de"
},
{
"path": "man/drawLegends.Rd",
"chars": 2282,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichCirclize.R\n\\name{drawLegends}\n\\alias"
},
{
"path": "man/edgeR_analyze.Rd",
"chars": 1946,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EdgeRAnalyze.R\n\\name{edgeR_analyze}\n\\alias"
},
{
"path": "man/enrich_circo_bar.Rd",
"chars": 3192,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichCircoBar.R\n\\name{enrich_circo_bar}\n\\"
},
{
"path": "man/enrich_polar_bubble.Rd",
"chars": 1310,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichPolarBubble.R\n\\name{enrich_polar_bub"
},
{
"path": "man/enrichment_circlize.Rd",
"chars": 2533,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichCirclize.R\n\\name{enrichment_circlize"
},
{
"path": "man/enrichment_spiral_plots.Rd",
"chars": 1374,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichmentSpiralize.R\n\\name{enrichment_spi"
},
{
"path": "man/extract_descriptions_counts.Rd",
"chars": 2114,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichCircoBar.R\n\\name{extract_description"
},
{
"path": "man/extract_ntop_pathways.Rd",
"chars": 1667,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichmentSpiralize.R\n\\name{extract_ntop_p"
},
{
"path": "man/extract_positive_pathways.Rd",
"chars": 1750,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichmentSpiralize.R\n\\name{extract_positi"
},
{
"path": "man/facet_density_foldchange.Rd",
"chars": 2492,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/FacetDensityFoldchange.R\n\\name{facet_densi"
},
{
"path": "man/filter_diff_genes.Rd",
"chars": 1474,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/FilterDiffGenes.R\n\\name{filter_diff_genes}"
},
{
"path": "man/four_degs_venn.Rd",
"chars": 621,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/FourDegsVenn.R\n\\name{four_degs_venn}\n\\alia"
},
{
"path": "man/gather_graph_edge.Rd",
"chars": 1334,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GatherGraphEdge.R\n\\name{gather_graph_edge}"
},
{
"path": "man/gather_graph_node.Rd",
"chars": 1475,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GatherGraphNode.R\n\\name{gather_graph_node}"
},
{
"path": "man/gene_color.Rd",
"chars": 1389,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GeneColor.R\n\\name{gene_color}\n\\alias{gene_"
},
{
"path": "man/gene_highlights.Rd",
"chars": 1089,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GeneHighlights.R\n\\name{gene_highlights}\n\\a"
},
{
"path": "man/gene_map_pathway.Rd",
"chars": 3464,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GeneMapPathway.R\n\\name{gene_map_pathway}\n\\"
},
{
"path": "man/get_gtex_exp.Rd",
"chars": 2377,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GetGtexExp.R\n\\name{get_gtex_exp}\n\\alias{ge"
},
{
"path": "man/get_tcga_exp.Rd",
"chars": 2535,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/GetTcgaExp.R\n\\name{get_tcga_exp}\n\\alias{ge"
},
{
"path": "man/gtree.Rd",
"chars": 594,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{gtree}\n\\alias{"
},
{
"path": "man/highlight_by_node.Rd",
"chars": 1972,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/HighlightByNode.R\n\\name{highlight_by_node}"
},
{
"path": "man/highlight_genes.Rd",
"chars": 1390,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/HighlightGenes.R\n\\name{highlight_genes}\n\\a"
},
{
"path": "man/limma_analyze.Rd",
"chars": 2015,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/LimmaAnalyze.R\n\\name{limma_analyze}\n\\alias"
},
{
"path": "man/log_transform.Rd",
"chars": 847,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/LogTransform.R\n\\name{log_transform}\n\\alias"
},
{
"path": "man/merge_density_foldchange.Rd",
"chars": 2395,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/MergeDensityFoldchange.R\n\\name{merge_densi"
},
{
"path": "man/merge_gtex_tcga.Rd",
"chars": 2349,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/MergeGtexTcga.R\n\\name{merge_gtex_tcga}\n\\al"
},
{
"path": "man/merge_id_position.Rd",
"chars": 1140,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/MergeIDPosition.R\n\\name{merge_id_position}"
},
{
"path": "man/merge_method_color.Rd",
"chars": 1296,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/MergeMethodColor.R\n\\name{merge_method_colo"
},
{
"path": "man/new_ggraph.Rd",
"chars": 1327,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/NewGgraph.R\n\\name{new_ggraph}\n\\alias{new_g"
},
{
"path": "man/pathway_count.Rd",
"chars": 1223,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/PathwayCount.R\n\\name{pathway_count}\n\\alias"
},
{
"path": "man/pathway_description.Rd",
"chars": 1343,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/PathwayDescription.R\n\\name{pathway_descrip"
},
{
"path": "man/pipe.Rd",
"chars": 427,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/utils-pipe.R\n\\name{\\%>\\%}\n\\alias{\\%>\\%}\n\\t"
},
{
"path": "man/prep_deseq2.Rd",
"chars": 926,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/PrepDeseq2.R\n\\name{prep_deseq2}\n\\alias{pre"
},
{
"path": "man/prep_edgeR.Rd",
"chars": 945,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/PrepEdgeR.R\n\\name{prep_edgeR}\n\\alias{prep_"
},
{
"path": "man/prep_limma.Rd",
"chars": 971,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/PrepLimma.R\n\\name{prep_limma}\n\\alias{prep_"
},
{
"path": "man/prep_wilcoxon.Rd",
"chars": 1085,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/PrepWilcoxon.R\n\\name{prep_wilcoxon}\n\\alias"
},
{
"path": "man/process_heatdata.Rd",
"chars": 2081,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/ProcessHeatdata.R\n\\name{process_heatdata}\n"
},
{
"path": "man/seek_gtex_organ.Rd",
"chars": 910,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/SeekGtexOrgan.R\n\\name{seek_gtex_organ}\n\\al"
},
{
"path": "man/selectPathways.Rd",
"chars": 1132,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichCirclize.R\n\\name{selectPathways}\n\\al"
},
{
"path": "man/spiral_newrle.Rd",
"chars": 1641,
"preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/EnrichmentSpiralize.R\n\\name{spiral_newrle}"
},
{
"path": "vignettes/TransProR.Rmd",
"chars": 1113,
"preview": "---\ntitle: \"TransProR: Analysis and visualization of transcriptomic data are currently in progress. Future directions in"
}
]
// ... and 17 more files (download for full content)
About this extraction
This page contains the full source code of the SSSYDYSSS/TransProR GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 138 files (928.3 KB), approximately 511.0k tokens. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.