Repository: stemangiola/tidyseurat Branch: master Commit: 5a96f9573251 Files: 94 Total size: 278.9 KB Directory structure: gitextract_po_cxl1f/ ├── .Rbuildignore ├── .coveralls.yml ├── .github/ │ ├── .gitignore │ ├── ISSUE_TEMPLATE/ │ │ └── bug_report.md │ └── workflows/ │ └── rworkflows.yml ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── NAMESPACE ├── R/ │ ├── attach.R │ ├── data.R │ ├── dplyr_methods.R │ ├── ggplot2_methods.R │ ├── methods.R │ ├── methods_DEPRECATED.R │ ├── pillar_utilities.R │ ├── plotly_methods.R │ ├── print_method.R │ ├── tibble_methods.R │ ├── tidyr_methods.R │ ├── utilities.R │ ├── utils-pipe.R │ └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── data/ │ ├── cell_type_df.rda │ └── pbmc_small_nested_interactions.rda ├── dev/ │ ├── code_comparison.Rmd │ ├── plot_seurat_structure.R │ ├── test_scrna_for_tidyseurat.rdata │ ├── use_cases_BioCAsia2021.R │ ├── workflow_article.R │ ├── workflow_create_integrated_pbmc.R │ └── workflow_figures.R ├── inst/ │ ├── CITATION │ └── NEWS.rd ├── man/ │ ├── add_class.Rd │ ├── aggregate_cells.Rd │ ├── arrange.Rd │ ├── as_tibble.Rd │ ├── bind_rows.Rd │ ├── cell_type_df.Rd │ ├── count.Rd │ ├── distinct.Rd │ ├── drop_class.Rd │ ├── extract.Rd │ ├── filter.Rd │ ├── formatting.Rd │ ├── fragments/ │ │ └── intro.Rmd │ ├── full_join.Rd │ ├── get_abundance_sc_long.Rd │ ├── get_abundance_sc_wide.Rd │ ├── ggplot.Rd │ ├── glimpse.Rd │ ├── group_by.Rd │ ├── group_split.Rd │ ├── inner_join.Rd │ ├── join_features.Rd │ ├── join_transcripts.Rd │ ├── left_join.Rd │ ├── mutate.Rd │ ├── nest.Rd │ ├── pbmc_small_nested_interactions.Rd │ ├── pipe.Rd │ ├── pivot_longer.Rd │ ├── plotly.Rd │ ├── pull.Rd │ ├── quo_names.Rd │ ├── rename.Rd │ ├── return_arguments_of.Rd │ ├── right_join.Rd │ ├── rowwise.Rd │ ├── sample_n.Rd │ ├── select.Rd │ ├── separate.Rd │ ├── slice.Rd │ ├── summarise.Rd │ ├── tbl_format_header.Rd │ ├── tidy.Rd │ ├── unite.Rd │ └── unnest.Rd ├── tests/ │ ├── testthat/ │ │ ├── test-dplyr.R │ │ ├── test-ggplotly_methods.R │ │ ├── test-methods.R │ │ ├── test-pillar.R │ │ ├── test-print.R │ │ ├── test-tidyr.R │ │ └── test-utilities.R │ └── testthat.R └── vignettes/ ├── figures_article.Rmd ├── introduction.Rmd └── tidyseurat.bib ================================================ FILE CONTENTS ================================================ ================================================ FILE: .Rbuildignore ================================================ ^.*\.Rproj$ ^\.Rproj\.user$ ^vignettes/introduction_cache$ ^doc$ ^Meta$ ^codecov\.yml$ ^dev$ ^README_cache$ ^README_files$ README.Rmd ^.git$ .coveralls.yml .travis.yml ^.github$ ^\.github$ _pkgdown.yml ^tidyseurat\.Rproj$ ================================================ FILE: .coveralls.yml ================================================ service_name: travis-pro repo_token: O4NscPehU4qrWznFtQRiyJJBIOyRgPzsB ================================================ FILE: .github/.gitignore ================================================ *.html ================================================ FILE: .github/ISSUE_TEMPLATE/bug_report.md ================================================ --- name: Bug report about: Create a report to help us improve title: '' labels: '' assignees: '' --- Thanks for submitting an issue. Please add the following information to the issue 1. Describe the issue/bug 2. Print out the input dataset immediately before the bug occurs 3. Paste the code immediately leading to the bug 4. Print out of the output, if any 5. Print out of the complete error/warning message, if any 6. sessionInfo() Thanks! ================================================ FILE: .github/workflows/rworkflows.yml ================================================ name: rworkflows 'on': push: branches: - master - main - devel - RELEASE_** pull_request: branches: - master - main - devel - RELEASE_** jobs: rworkflows: permissions: write-all runs-on: ${{ matrix.config.os }} name: ${{ matrix.config.os }} (${{ matrix.config.r }}) container: ${{ matrix.config.cont }} strategy: fail-fast: ${{ false }} matrix: config: - os: ubuntu-latest bioc: devel r: auto cont: ghcr.io/bioconductor/bioconductor_docker:devel rspm: ~ - os: macOS-latest bioc: release r: auto cont: ~ rspm: ~ - os: windows-latest bioc: release r: auto cont: ~ rspm: ~ steps: - uses: neurogenomics/rworkflows@master with: run_bioccheck: ${{ false }} run_rcmdcheck: ${{ true }} as_cran: ${{ true }} run_vignettes: ${{ true }} has_testthat: ${{ true }} run_covr: ${{ true }} run_pkgdown: ${{ true }} has_runit: ${{ false }} has_latex: ${{ false }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run_docker: ${{ false }} DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }} runner_os: ${{ runner.os }} cache_version: cache-v1 docker_registry: ghcr.io ================================================ FILE: .gitignore ================================================ .Rproj.user .Rhistory .RData .Ruserdata tidyseurat.Rproj README_cache/* vignettes/introduction_cache* tidyseurat.Rproj Meta doc dev/*csv dev/*rds dev/*rda dev/*pdf dev/dplyr-master/* tidyseurat.Rproj /doc/ /Meta/ ..Rcheck/* ================================================ FILE: .travis.yml ================================================ # Adapted from https://github.com/hadley/testthat/blob/master/.travis.yml # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r language: r cache: packages r: - bioc-release - bioc-devel env: - R_QPDF=true r_github_packages: - r-lib/covr after_success: - tar -C .. -xf $PKG_TARBALL - xvfb-run Rscript -e 'covr::codecov(type=c("tests", "vignettes", "examples"))' ================================================ FILE: DESCRIPTION ================================================ Type: Package Package: tidyseurat Title: Brings Seurat to the Tidyverse Version: 0.8.9 Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com", role = c("aut", "cre")), person("Maria", "Doyle", email = "Maria.Doyle@petermac.org", role = c("ctb")) ) Description: It creates an invisible layer that allow to see the 'Seurat' object as tibble and interact seamlessly with the tidyverse. License: GPL-3 Depends: R (>= 4.1.0), ttservice (>= 0.3.8), SeuratObject Imports: Seurat (>= 4.3.0), tibble, dplyr (>= 1.1.4), magrittr, tidyr (>= 1.2.0), ggplot2, rlang (>= 1.0.0), purrr, lifecycle, methods, plotly, tidyselect, utils, vctrs, pillar, stringr, cli, fansi, Matrix, generics Suggests: testthat, knitr, GGally, markdown, rbibutils VignetteBuilder: knitr RdMacros: lifecycle Biarch: true biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics Encoding: UTF-8 LazyData: true RoxygenNote: 7.3.3 URL: https://github.com/stemangiola/tidyseurat, https://stemangiola.github.io/tidyseurat/ BugReports: https://github.com/stemangiola/tidyseurat/issues ================================================ FILE: NAMESPACE ================================================ # Generated by roxygen2: do not edit by hand S3method(add_count,Seurat) S3method(add_count,default) S3method(arrange,Seurat) S3method(as_tibble,Seurat) S3method(bind_cols,Seurat) S3method(bind_rows,Seurat) S3method(count,Seurat) S3method(distinct,Seurat) S3method(extract,Seurat) S3method(filter,Seurat) S3method(full_join,Seurat) S3method(ggplot,Seurat) S3method(glimpse,tidyseurat) S3method(group_by,Seurat) S3method(group_split,Seurat) S3method(inner_join,Seurat) S3method(join_transcripts,Seurat) S3method(join_transcripts,default) S3method(left_join,Seurat) S3method(mutate,Seurat) S3method(nest,Seurat) S3method(pivot_longer,Seurat) S3method(plot_ly,Seurat) S3method(plot_ly,tbl_df) S3method(print,Seurat) S3method(pull,Seurat) S3method(rename,Seurat) S3method(right_join,Seurat) S3method(rowwise,Seurat) S3method(sample_frac,Seurat) S3method(sample_n,Seurat) S3method(select,Seurat) S3method(separate,Seurat) S3method(slice,Seurat) S3method(slice_head,Seurat) S3method(slice_max,Seurat) S3method(slice_min,Seurat) S3method(slice_sample,Seurat) S3method(slice_tail,Seurat) S3method(summarise,Seurat) S3method(summarize,Seurat) S3method(tbl_format_header,tidySeurat) S3method(tidy,Seurat) S3method(unite,Seurat) S3method(unnest,tidyseurat_nested) export("%>%") export(add_count) export(get_abundance_sc_long) export(get_abundance_sc_wide) export(join_transcripts) export(plot_ly) export(unnest_seurat) exportMethods(join_features) importFrom(Matrix,rowSums) importFrom(Seurat,Assays) importFrom(Seurat,DietSeurat) importFrom(Seurat,GetAssayData) importFrom(Seurat,SplitObject) importFrom(Seurat,VariableFeatures) importFrom(SeuratObject,"DefaultAssay<-") importFrom(SeuratObject,DefaultAssay) importFrom(dplyr,arrange) importFrom(dplyr,contains) importFrom(dplyr,count) importFrom(dplyr,distinct) importFrom(dplyr,distinct_at) importFrom(dplyr,everything) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_by_drop_default) importFrom(dplyr,group_rows) importFrom(dplyr,group_split) importFrom(dplyr,inner_join) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,pull) importFrom(dplyr,rename) importFrom(dplyr,right_join) importFrom(dplyr,rowwise) importFrom(dplyr,sample_frac) importFrom(dplyr,sample_n) importFrom(dplyr,select) importFrom(dplyr,select_if) importFrom(dplyr,slice) importFrom(dplyr,slice_head) importFrom(dplyr,slice_max) importFrom(dplyr,slice_min) importFrom(dplyr,slice_sample) importFrom(dplyr,slice_tail) importFrom(dplyr,summarise) importFrom(dplyr,summarize) importFrom(dplyr,vars) importFrom(fansi,strwrap_ctl) importFrom(generics,tidy) importFrom(ggplot2,aes) importFrom(ggplot2,ggplot) importFrom(lifecycle,deprecate_warn) importFrom(magrittr,"%$%") importFrom(magrittr,"%>%") importFrom(magrittr,equals) importFrom(methods,.hasSlot) importFrom(methods,getMethod) importFrom(methods,is) importFrom(pillar,align) importFrom(pillar,get_extent) importFrom(pillar,style_subtle) importFrom(pillar,tbl_format_header) importFrom(plotly,plot_ly) importFrom(purrr,imap) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_chr) importFrom(purrr,map_int) importFrom(purrr,reduce) importFrom(purrr,when) importFrom(rlang,":=") importFrom(rlang,check_dots_used) importFrom(rlang,dots_values) importFrom(rlang,enexpr) importFrom(rlang,enquo) importFrom(rlang,enquos) importFrom(rlang,expr) importFrom(rlang,flatten_if) importFrom(rlang,is_spliced) importFrom(rlang,names2) importFrom(rlang,quo_name) importFrom(rlang,quo_squash) importFrom(rlang,sym) importFrom(stats,setNames) importFrom(stringr,regex) importFrom(stringr,str_detect) importFrom(tibble,as_tibble) importFrom(tibble,column_to_rownames) importFrom(tibble,enframe) importFrom(tibble,glimpse) importFrom(tibble,rowid_to_column) importFrom(tidyr,extract) importFrom(tidyr,nest) importFrom(tidyr,pivot_longer) importFrom(tidyr,separate) importFrom(tidyr,spread) importFrom(tidyr,unite) importFrom(tidyr,unnest) importFrom(tidyselect,eval_select) importFrom(ttservice,aggregate_cells) importFrom(ttservice,bind_cols) importFrom(ttservice,bind_rows) importFrom(ttservice,join_features) importFrom(utils,packageDescription) importFrom(utils,tail) importFrom(vctrs,new_data_frame) ================================================ FILE: R/attach.R ================================================ core <- c("dplyr", "tidyr", "ttservice", "ggplot2") core_unloaded <- function() { search <- paste0("package:", core) core[!search %in% search()] } # Attach the package from the same library it was loaded from before. # [source: https://github.com/tidy-biology/tidyverse/issues/171] same_library <- function(pkg) { loc <- if (pkg %in% loadedNamespaces()) dirname(getNamespaceInfo(pkg, "path")) library(pkg, lib.loc=loc, character.only=TRUE, warn.conflicts=FALSE) } tidyverse_attach <- function() { to_load <- core_unloaded() suppressPackageStartupMessages( lapply(to_load, same_library)) invisible(to_load) } ================================================ FILE: R/data.R ================================================ #' Cell types of 80 PBMC single cells #' #' A dataset containing the barcodes and cell types of 80 PBMC single cells. #' #' @format A tibble containing 80 rows and 2 columns. #' Cells are a subsample of the Peripheral Blood Mononuclear Cells (PBMC) #' dataset of 2,700 single cell. Cell types were identified with SingleR. #' \describe{ #' \item{cell}{cell identifier, barcode} #' \item{first.labels}{cell type} #' } #' @source \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html} #' @usage data(cell_type_df) #' @return `tibble` "cell_type_df" #' Intercellular ligand-receptor interactions for #' 38 ligands from a single cell RNA-seq cluster. #' #' A dataset containing ligand-receptor interactions within a sample. #' There are 38 ligands from a single cell cluster versus 35 receptors #' in 6 other clusters. #' #' @format A `tibble` containing 100 rows and 9 columns. #' Cells are a subsample of the PBMC dataset of 2,700 single cells. #' Cell interactions were identified with `SingleCellSignalR`. #' \describe{ #' \item{sample}{sample identifier} #' \item{ligand}{cluster and ligand identifier} #' \item{receptor}{cluster and receptor identifier} #' \item{ligand.name}{ligand name} #' \item{receptor.name}{receptor name} #' \item{origin}{cluster containing ligand} #' \item{destination}{cluster containing receptor} #' \item{interaction.type}{type of interation, paracrine or autocrine} #' \item{LRscore}{interaction score} #' } #' @source \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html} #' @usage data(pbmc_small_nested_interactions) #' @return `tibble` "pbmc_small_nested_interactions" ================================================ FILE: R/dplyr_methods.R ================================================ #' @name arrange #' @rdname arrange #' @inherit dplyr::arrange #' @family single table verbs #' #' @examples #' data(pbmc_small) #' pbmc_small |> #' arrange(nFeature_RNA) #' #' @importFrom tibble as_tibble #' @importFrom dplyr arrange #' @export arrange.Seurat <- function(.data, ..., .by_group=FALSE) { # DEPRECATE deprecate_warn( when="0.7.5", what="arrange()", details="tidyseurat says: arrange() is temporarly deprected as it is not clear that Seurat allows reordering of cells." ) # .cell_ordered <- # .data %>% # as_tibble() %>% # dplyr::arrange( ..., .by_group=.by_group ) %>% # pull(!!c_(.data)$symbol) # # .data[,.cell_ordered] .data } #' @name bind_rows #' @rdname bind_rows #' @inherit ttservice::bind_rows #' #' @examples #' data(pbmc_small) #' tt <- pbmc_small #' ttservice::bind_rows(tt, tt) #' #' tt_bind <- tt |> select(nCount_RNA ,nFeature_RNA) #' tt |> ttservice::bind_cols(tt_bind) #' #' @importFrom rlang dots_values #' @importFrom rlang flatten_if #' @importFrom rlang is_spliced #' @importFrom ttservice bind_rows #' @export bind_rows.Seurat <- function(..., .id=NULL, add.cell.ids=NULL) { tts <- flatten_if(dots_values(...), is_spliced) # Strange error for Seurat merge # GetResidualSCTModel # close to a line as such # slot(object=object[[assay]], name="SCTModel.list") # So I have to delete any sample of size 1 if I have calculated SCT # if() # GetAssayData(object, layer='SCTModel.list', assay="SCT") %>% # map(~ .x@cell.attributes %>% nrow) # Check if cell with same name merge(tts[[1]], y=tts[[2]], add.cell.ids=add.cell.ids) } #' @importFrom rlang flatten_if #' @importFrom rlang is_spliced #' @importFrom rlang dots_values #' @importFrom ttservice bind_cols bind_cols_ <- function(..., .id=NULL){ tts <- flatten_if(dots_values(...), is_spliced) tts[[1]]@meta.data <- bind_cols(tts[[1]][[]], tts[[2]], .id=.id) tts[[1]] } #' @rdname bind_rows #' @aliases bind_cols #' @export bind_cols.Seurat <- bind_cols_ #' @name distinct #' @rdname distinct #' @inherit dplyr::distinct #' #' @examples #' data("pbmc_small") #' pbmc_small |> distinct(groups) #' #' @importFrom dplyr distinct #' @export distinct.Seurat <- function (.data, ..., .keep_all=FALSE) { message(data_frame_returned_message) distinct_columns <- (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist) # Deprecation of special column names if(is_sample_feature_deprecated_used(.data, distinct_columns)){ .data= ping_old_special_column_into_metadata(.data) } .data %>% as_tibble() %>% dplyr::distinct(..., .keep_all=.keep_all) } #' @name filter #' @rdname filter #' @inherit dplyr::filter #' #' @examples #' data("pbmc_small") #' pbmc_small |> filter(groups == "g1") #' #' # Learn more in ?dplyr_eval #' #' @importFrom purrr map #' @importFrom dplyr filter #' @export filter.Seurat <- function (.data, ..., .preserve=FALSE) { # Deprecation of special column names if(is_sample_feature_deprecated_used( .data, (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist) )){ .data= ping_old_special_column_into_metadata(.data) } new_meta <- .data %>% as_tibble() %>% dplyr::filter( ..., .preserve=.preserve) %>% as_meta_data(.data) # Error if size == 0 if(nrow(new_meta) == 0) stop("tidyseurat says: the resulting data", " container is empty. Seurat does not allow for empty containers.") new_obj <- subset(.data, cells=rownames(new_meta)) %>% # Clean empty slots clean_seurat_object() new_obj } #' @name group_by #' @rdname group_by #' @inherit dplyr::group_by #' #' @examples #' data("pbmc_small") #' pbmc_small |> group_by(groups) #' #' @importFrom dplyr group_by_drop_default #' @importFrom dplyr group_by #' @export group_by.Seurat <- function (.data, ..., .add=FALSE, .drop=group_by_drop_default(.data)) { message(data_frame_returned_message) # Deprecation of special column names if(is_sample_feature_deprecated_used( .data, (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist) )){ .data <- ping_old_special_column_into_metadata(.data) } .data %>% as_tibble() %>% dplyr::group_by( ..., .add=.add, .drop=.drop) } #' @name summarise #' @aliases summarize #' @inherit dplyr::summarise #' @family single table verbs #' #' @examples #' data(pbmc_small) #' pbmc_small |> summarise(mean(nCount_RNA)) #' #' @importFrom dplyr summarise #' @importFrom purrr map #' @export summarise.Seurat <- function (.data, ...) { message(data_frame_returned_message) # Deprecation of special column names if(is_sample_feature_deprecated_used( .data, (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist) )){ .data= ping_old_special_column_into_metadata(.data) } .data %>% as_tibble() %>% dplyr::summarise( ...) } #' @name summarise #' @rdname summarise #' @importFrom dplyr summarize #' @export summarize.Seurat <- summarise.Seurat #' @name mutate #' @rdname mutate #' @inherit dplyr::mutate #' @family single table verbs #' #' @examples #' data(pbmc_small) #' pbmc_small |> mutate(nFeature_RNA=1) #' #' @importFrom rlang enquos #' @importFrom dplyr mutate #' @importFrom purrr map #' @export mutate.Seurat <- function(.data, ...) { # Check that we are not modifying a key column cols <- enquos(...) %>% names # Deprecation of special column names .cols <- enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist() if (is_sample_feature_deprecated_used(.data, .cols)) { .data <- ping_old_special_column_into_metadata(.data) } .view_only_cols <- c( get_special_columns(.data), get_needed_columns(.data)) .test <- cols |> intersect(.view_only_cols) |> length() if (.test) { stop("tidyseurat says:", " you are trying to mutate a column that is view only", " ", paste(.view_only_cols, collapse=", "), " (it is not present in the colData).", " If you want to mutate a view-only column, make a copy", " (e.g. mutate(new_column=", cols[1], ")) and mutate that one.") } .data@meta.data <- .data %>% as_tibble %>% dplyr::mutate( ...) %>% as_meta_data(.data) .data } #' @name rename #' @rdname rename #' @inherit dplyr::rename #' @family single table verbs #' #' @examples #' data(pbmc_small) #' pbmc_small |> rename(s_score=nFeature_RNA) #' #' @importFrom Seurat DietSeurat #' @importFrom tidyselect eval_select #' @importFrom dplyr rename #' @export rename.Seurat <- function(.data, ...) { # Check that we are not modifying a key column read_only_columns <- c( get_needed_columns(.data), get_special_columns(.data)) # Small df to be more efficient df <- DietSeurat(.data, features = rownames(.data)[1])[,1] |> suppressWarnings() |> as_tibble() # What columns we are going to create cols_from <- tidyselect::eval_select(expr(c(...)), df) |> names() # What are the columns before renaming original_columns <- df |> colnames() # What the column after renaming would be new_colums <- df |> rename(...) |> colnames() # What column you are impacting changed_columns <- original_columns |> setdiff(new_colums) # Check that you are not impacting any read-only columns if (any(changed_columns %in% read_only_columns)) { stop("tidyseurat says:", " you are trying to rename a column that is view only", " ", paste(changed_columns, collapse=", "), " (it is not present in the colData).", " If you want to rename a view-only column, make a copy", " (e.g., mutate(", cols_from[1], "=", changed_columns[1], ")).") } .data@meta.data <- dplyr::rename( .data[[]], ...) .data } #' @name rowwise #' @rdname rowwise #' @inherit dplyr::rowwise #' #' @examples #' # TODO #' #' @importFrom dplyr rowwise #' @export rowwise.Seurat <- function(data, ...) { message(data_frame_returned_message) data %>% as_tibble() %>% dplyr::rowwise(...) } #' @name left_join #' @rdname left_join #' @inherit dplyr::left_join #' #' @examples #' data(pbmc_small) #' tt <- pbmc_small #' tt |> left_join(tt |> #' distinct(groups) |> #' mutate(new_column=1:2)) #' #' @importFrom dplyr left_join #' @importFrom dplyr count #' @export left_join.Seurat <- function (x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { # Deprecation of special column names .cols <- if (!is.null(by)) by else colnames(y) if (is_sample_feature_deprecated_used(x, .cols)) { x <- ping_old_special_column_into_metadata(x) } x %>% as_tibble() %>% dplyr::left_join( y, by=by, copy=copy, suffix=suffix, ...) %>% when( # If duplicated cells returns tibble count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ { message(duplicated_cell_names) (.) }, # Otherwise return updated tidyseurat ~ { x@meta.data <- (.) %>% as_meta_data(x) x } ) } #' @name inner_join #' @rdname inner_join #' @inherit dplyr::inner_join #' #' @examples #' data(pbmc_small) #' tt <- pbmc_small #' tt |> inner_join(tt |> #' distinct(groups) |> #' mutate(new_column=1:2) |> #' slice(1)) #' #' @importFrom dplyr inner_join #' @importFrom dplyr pull #' @export inner_join.Seurat <- function (x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { # Deprecation of special column names .cols <- if (!is.null(by)) by else colnames(y) if (is_sample_feature_deprecated_used(x, .cols)) { x <- ping_old_special_column_into_metadata(x) } x %>% as_tibble() %>% dplyr::inner_join( y, by=by, copy=copy, suffix=suffix, ...) %>% when( # If duplicated cells returns tibble count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ { message(duplicated_cell_names) (.) }, # Otherwise return updated tidyseurat ~ { new_obj <- subset(x, cells= pull(., c_(x)$name)) new_obj@meta.data <- (.) %>% as_meta_data(new_obj) new_obj } ) } #' @name right_join #' @rdname right_join #' @inherit dplyr::right_join #' #' @examples #' data(pbmc_small) #' tt <- pbmc_small #' tt |> right_join(tt |> #' distinct(groups) |> #' mutate(new_column=1:2) |> #' slice(1)) #' #' @importFrom dplyr right_join #' @importFrom dplyr pull #' @export right_join.Seurat <- function (x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { # Deprecation of special column names .cols <- if (!is.null(by)) by else colnames(y) if (is_sample_feature_deprecated_used(x, .cols)) { x <- ping_old_special_column_into_metadata(x) } x %>% as_tibble() %>% dplyr::right_join( y, by=by, copy=copy, suffix=suffix, ...) %>% when( # If duplicated cells returns tibble count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ { message(duplicated_cell_names) (.) }, # Otherwise return updated tidyseurat ~ { new_obj <- subset(x, cells=(.) %>% pull(c_(x)$name)) new_obj@meta.data <- (.) %>% as_meta_data(new_obj) new_obj } ) } #' @name full_join #' @rdname full_join #' @inherit dplyr::full_join #' #' @examples #' data(pbmc_small) #' tt <- pbmc_small #' tt |> full_join(tibble::tibble(groups="g1", other=1:4)) #' #' @importFrom dplyr full_join #' @export full_join.Seurat <- function (x, y, by=NULL, copy=FALSE, suffix=c(".x", ".y"), ...) { # Deprecation of special column names .cols <- if (!is.null(by)) by else colnames(y) if (is_sample_feature_deprecated_used(x, .cols)) { x <- ping_old_special_column_into_metadata(x) } x %>% as_tibble() %>% dplyr::full_join( y, by=by, copy=copy, suffix=suffix, ...) %>% when( # If duplicated cells returns tibble count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ { message(duplicated_cell_names) (.) }, # Otherwise return updated tidyseurat ~ { x@meta.data <- (.) %>% as_meta_data(x) x } ) } #' @name slice #' @rdname slice #' @aliases slice_head slice_tail #' slice_sample slice_min slice_max #' @inherit dplyr::slice #' @family single table verbs #' #' @examples #' data(pbmc_small) #' pbmc_small |> slice(1) #' #' # Slice group-wise using .by #' pbmc_small |> slice(1:2, .by=groups) #' #' @importFrom dplyr slice #' @importFrom tibble rowid_to_column #' @export slice.Seurat <- function (.data, ..., .by=NULL, .preserve=FALSE) { row_number___ <- NULL idx <- .data[[]] |> select(-everything(), {{ .by }}) |> rowid_to_column(var='row_number___') |> slice(..., .by={{ .by }}, .preserve=.preserve) |> pull(row_number___) if (length(idx) == 0) { stop("tidyseurat says: the resulting data container is empty.", " Seurat does not allow for empty containers.") } new_obj <- subset(.data, cells=colnames(.data)[idx]) new_obj } #' @name slice_sample #' @rdname slice #' @inherit dplyr::slice_sample #' @examples #' #' # slice_sample() allows you to random select with or without replacement #' pbmc_small |> slice_sample(n=5) #' #' # if using replacement, and duplicate cells are returned, a tibble will be #' # returned because duplicate cells cannot exist in Seurat objects #' pbmc_small |> slice_sample(n=1, replace=TRUE) # returns Seurat #' pbmc_small |> slice_sample(n=100, replace=TRUE) # returns tibble #' #' # weight by a variable #' pbmc_small |> slice_sample(n=5, weight_by=nCount_RNA) #' #' # sample by group #' pbmc_small |> slice_sample(n=5, by=groups) #' #' # sample using proportions #' pbmc_small |> slice_sample(prop=0.10) #' #' @importFrom dplyr slice_sample #' @export slice_sample.Seurat <- function(.data, ..., n=NULL, prop=NULL, by=NULL, weight_by=NULL, replace=FALSE) { # Solve CRAN NOTES cell <- NULL . <- NULL lifecycle::signal_superseded("1.0.0", "sample_n()", "slice_sample()") if (!is.null(n)) new_meta <- .data[[]] |> as_tibble(rownames=c_(.data)$name) |> select(-everything(), c_(.data)$name, {{ by }}, {{ weight_by }}) |> slice_sample(..., n=n, by={{ by }}, weight_by={{ weight_by }}, replace=replace) else if (!is.null(prop)) new_meta <- .data[[]] |> as_tibble(rownames=c_(.data)$name) |> select(-everything(), c_(.data)$name, {{ by }}, {{ weight_by }}) |> slice_sample(..., prop=prop, by={{ by }}, weight_by={{ weight_by }}, replace=replace) else stop("tidyseurat says: you should provide `n` or `prop` arguments") count_cells <- new_meta %>% select(!!c_(.data)$symbol) %>% count(!!c_(.data)$symbol) .max_cell_count <- ifelse(nrow(count_cells)==0, 0, max(count_cells$n)) # If repeated cells due to replacement if (.max_cell_count |> gt(1)){ message("tidyseurat says: When sampling with replacement", " a data frame is returned for independent data analysis.") .data |> as_tibble() |> right_join(new_meta %>% select(!!c_(.data)$symbol), by=c_(.data)$name) } else { new_obj <- subset(.data, cells=new_meta %>% pull(!!c_(.data)$symbol)) new_obj } } #' @name slice_head #' @rdname slice #' @inherit dplyr::slice_head #' @examples #' #' # First rows based on existing order #' pbmc_small |> slice_head(n=5) #' #' @importFrom dplyr slice_head #' @importFrom tibble rowid_to_column #' @export slice_head.Seurat <- function(.data, ..., n, prop, by=NULL) { row_number___ <- NULL idx <- .data[[]] |> select(-everything(), {{ by }}) |> rowid_to_column(var='row_number___') |> slice_head(..., n=n, prop=prop, by={{ by }}) |> pull(row_number___) if (length(idx) == 0) { stop("tidyseurat says: the resulting data container is empty.", " Seurat does not allow for empty containers.") } new_obj <- subset(.data, cells=colnames(.data)[idx]) new_obj } #' @name slice_tail #' @rdname slice #' @inherit dplyr::slice_tail #' @examples #' #' # Last rows based on existing order #' pbmc_small |> slice_tail(n=5) #' #' @importFrom dplyr slice_tail #' @importFrom tibble rowid_to_column #' @export slice_tail.Seurat <- function(.data, ..., n, prop, by=NULL) { row_number___ <- NULL idx <- .data[[]] |> select(-everything(), {{ by }}) |> rowid_to_column(var='row_number___') |> slice_tail(..., n=n, prop=prop, by={{ by }}) |> pull(row_number___) if (length(idx) == 0) { stop("tidyseurat says: the resulting data container is empty.", " Seurat does not allow for empty containers.") } new_obj <- subset(.data, cells=colnames(.data)[idx]) new_obj } #' @name slice_min #' @rdname slice #' @inherit dplyr::slice_min #' @examples #' #' # Rows with minimum and maximum values of a metadata variable #' pbmc_small |> slice_min(nFeature_RNA, n=5) #' #' # slice_min() and slice_max() may return more rows than requested #' # in the presence of ties. #' pbmc_small |> slice_min(nFeature_RNA, n=2) #' #' # Use with_ties=FALSE to return exactly n matches #' pbmc_small |> slice_min(nFeature_RNA, n=2, with_ties=FALSE) #' #' # Or use additional variables to break the tie: #' pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n=2) #' #' # Use by for group-wise operations #' pbmc_small |> slice_min(nFeature_RNA, n=5, by=groups) #' #' @importFrom dplyr slice_min #' @importFrom tibble rowid_to_column #' @export slice_min.Seurat <- function(.data, order_by, ..., n, prop, by=NULL, with_ties=TRUE, na_rm=FALSE) { row_number___ <- NULL order_by_variables <- return_arguments_of(!!enexpr(order_by)) idx <- .data[[]] |> select(-everything(), !!!order_by_variables, {{ by }}) |> rowid_to_column(var ='row_number___') |> slice_min( order_by={{ order_by }}, ..., n=n, prop=prop, by={{ by }}, with_ties=with_ties, na_rm=na_rm ) |> pull(row_number___) if (length(idx) == 0) { stop("tidyseurat says: the resulting data container is empty.", " Seurat does not allow for empty containers.") } new_obj <- subset(.data, cells=colnames(.data)[idx]) new_obj } #' @name slice_max #' @rdname slice #' @inherit dplyr::slice_max #' @examples #' #' # Rows with minimum and maximum values of a metadata variable #' pbmc_small |> slice_max(nFeature_RNA, n=5) #' #' @importFrom dplyr slice_max #' @importFrom tibble rowid_to_column #' @export slice_max.Seurat <- function(.data, order_by, ..., n, prop, by=NULL, with_ties=TRUE, na_rm=FALSE) { row_number___ <- NULL order_by_variables <- return_arguments_of(!!enexpr(order_by)) idx <- .data[[]] |> select(-everything(), !!!order_by_variables, {{ by }}) |> rowid_to_column(var ='row_number___') |> slice_max( order_by={{ order_by }}, ..., n=n, prop=prop, by={{ by }}, with_ties=with_ties, na_rm=na_rm ) |> pull(row_number___) if (length(idx) == 0) { stop("tidyseurat says: the resulting data container is empty.", " Seurat does not allow for empty containers.") } new_obj <- subset(.data, cells=colnames(.data)[idx]) new_obj } #' @name select #' @rdname select #' @inherit dplyr::select #' #' @examples #' data(pbmc_small) #' pbmc_small |> select(cell, orig.ident) #' #' @importFrom dplyr select #' @export select.Seurat <- function (.data, ...) { # Deprecation of special column names .cols <- enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist() if (is_sample_feature_deprecated_used(.data, .cols)) { .data <- ping_old_special_column_into_metadata(.data) } .data %>% as_tibble() %>% select_helper(...) %>% when( # If key columns are missing (get_needed_columns(.data) %in% colnames(.)) %>% all %>% `!` ~ { message("tidyseurat says: Key columns are missing.", " A data frame is returned for independent data analysis.") (.) }, # If valid seurat meta data ~ { .data@meta.data <- (.) %>% as_meta_data(.data) .data } ) } #' @name sample_n #' @rdname sample_n #' @aliases sample_frac #' @inherit dplyr::sample_n #' #' @examples #' data(pbmc_small) #' pbmc_small |> sample_n(50) #' pbmc_small |> sample_frac(0.1) #' #' @importFrom dplyr sample_n #' @export sample_n.Seurat <- function(tbl, size, replace=FALSE, weight=NULL, .env=NULL, ...) { # Solve CRAN NOTES cell <- NULL . <- NULL lifecycle::signal_superseded("1.0.0", "sample_n()", "slice_sample()") new_meta <- tbl[[]] %>% as_tibble(rownames=c_(tbl)$name) %>% dplyr::sample_n(size, replace=replace, weight=weight, .env=.env, ...) count_cells <- new_meta %>% select(!!c_(tbl)$symbol) %>% count(!!c_(tbl)$symbol) # If repeted cells if (count_cells$n %>% max() %>% gt(1)){ message("tidyseurat says: When sampling with replacement", " a data frame is returned for independent data analysis.") tbl %>% as_tibble() %>% right_join(new_meta %>% select(!!c_(tbl)$symbol), by=c_(tbl)$name) } else { new_obj <- subset(tbl, cells=new_meta %>% pull(!!c_(tbl)$symbol)) new_obj@meta.data <- new_meta %>% data.frame(row.names=pull(.,!!c_(tbl)$symbol), check.names=FALSE) %>% select(- !!c_(tbl)$symbol) new_obj } } #' @rdname sample_n #' @importFrom dplyr sample_frac #' @export sample_frac.Seurat <- function(tbl, size=1, replace=FALSE, weight=NULL, .env=NULL, ...) { # Solve CRAN NOTES cell <- NULL . <- NULL lifecycle::signal_superseded("1.0.0", "sample_frac()", "slice_sample()") new_meta <- tbl[[]] %>% as_tibble(rownames=c_(tbl)$name) %>% dplyr::sample_frac(size, replace=replace, weight=weight, .env=.env, ...) count_cells <- new_meta %>% select(!!c_(tbl)$symbol) %>% count(!!c_(tbl)$symbol) # If repeted cells if (count_cells$n %>% max() %>% gt(1)){ message("tidyseurat says: When sampling with replacement", " a data frame is returned for independent data analysis.") tbl %>% as_tibble() %>% right_join(new_meta %>% select(!!c_(tbl)$symbol), by=c_(tbl)$name) } else { new_obj <- subset(tbl, cells=new_meta %>% pull(!!c_(tbl)$symbol)) new_obj@meta.data <- new_meta %>% data.frame(row.names=pull(.,!!c_(tbl)$symbol), check.names=FALSE) %>% select(- !!c_(tbl)$symbol) new_obj } } #' Count observations by group #' #' @description #' `count()` lets you quickly count the unique values of one or more variables: #' `df %>% count(a, b)` is roughly equivalent to #' `df %>% group_by(a, b) %>% summarise(n = n())`. #' `count()` is paired with `tally()`, a lower-level helper that is equivalent #' to `df %>% summarise(n = n())`. Supply `wt` to perform weighted counts, #' switching the summary from `n = n()` to `n = sum(wt)`. #' #' `add_count()` and `add_tally()` are equivalents to `count()` and `tally()` #' but use `mutate()` instead of `summarise()` so that they add a new column #' with group-wise counts. #' #' @param x A data frame, data frame extension (e.g. a tibble), or a #' lazy data frame (e.g. from dbplyr or dtplyr). #' @param ... <[`data-masking`][dplyr_data_masking]> Variables to group by. #' @param wt <[`data-masking`][dplyr_data_masking]> Frequency weights. #' Can be `NULL` or a variable: #' #' * If `NULL` (the default), counts the number of rows in each group. #' * If a variable, computes `sum(wt)` for each group. #' @param sort If `TRUE`, will show the largest groups at the top. #' @param name The name of the new column in the output. #' #' If omitted, it will default to `n`. If there's already a column called `n`, #' it will error, and require you to specify the name. #' @param .drop For `count()`: if `FALSE` will include counts for empty groups #' (i.e. for levels of factors that don't exist in the data). #' @return #' An object of the same type as `.data`. `count()` and `add_count()` #' group transiently, so the output has the same groups as the input. #' @name count #' @rdname count #' @examples #' data(pbmc_small) #' pbmc_small |> count(groups) #' #' @importFrom dplyr count #' @export count.Seurat <- function(x, ..., wt=NULL, sort=FALSE, name=NULL, .drop=group_by_drop_default(x)) { message("tidyseurat says: A data frame is", " returned for independent data analysis.") # Deprecation of special column names .cols <- enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist() if (is_sample_feature_deprecated_used(x, .cols)) { x <- ping_old_special_column_into_metadata(x) } x %>% as_tibble() %>% dplyr::count(..., wt=!!enquo(wt), sort=sort, name=name, .drop=.drop) } #' @export #' @rdname count add_count <- function(x, ..., wt=NULL, sort=FALSE, name=NULL) { UseMethod("add_count") } #' @export #' @rdname count add_count.default <- function(x, ..., wt=NULL, sort=FALSE, name=NULL) { if (is.null(name)) name <- "n" .out <- x %>% dplyr::group_by(..., .add = TRUE) %>% dplyr::mutate(!!rlang::sym(name) := if (is.null(wt)) dplyr::n() else sum(!!enquo(wt), na.rm = TRUE)) %>% dplyr::ungroup() if (sort) .out <- dplyr::arrange(.out, dplyr::desc(!!rlang::sym(name))) .out } #' @rdname count #' @aliases add_count #' @importFrom rlang sym #' @export add_count.Seurat <- function(x, ..., wt=NULL, sort=FALSE, name=NULL) { # Deprecation of special column names .cols <- enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist() if (is_sample_feature_deprecated_used(x, .cols)) { x <- ping_old_special_column_into_metadata(x) } if (is.null(name)) name <- "n" .out <- x %>% as_tibble %>% dplyr::group_by(..., .add = TRUE) %>% dplyr::mutate(!!sym(name) := if (is.null(wt)) dplyr::n() else sum(!!enquo(wt), na.rm = TRUE)) %>% dplyr::ungroup() if (sort) .out <- dplyr::arrange(.out, dplyr::desc(!!sym(name))) x@meta.data <- .out %>% as_meta_data(x) x } #' @name pull #' @rdname pull #' @inherit dplyr::pull #' #' @examples #' data(pbmc_small) #' pbmc_small |> pull(groups) #' #' @importFrom dplyr pull #' @export pull.Seurat <- function(.data, var=-1, name=NULL, ...) { var <- enquo(var) name <- enquo(name) message("tidyseurat says: A data frame is", " returned for independent data analysis.") # Deprecation of special column names if(is_sample_feature_deprecated_used( .data, quo_name(var) )){ .data <- ping_old_special_column_into_metadata(.data) } .data %>% as_tibble() %>% dplyr::pull( var=!!var, name=!!name, ...) } #' @name group_split #' @rdname group_split #' @inherit dplyr::group_split #' #' @examples #' data(pbmc_small) #' pbmc_small |> group_split(groups) #' #' @importFrom rlang check_dots_used #' @importFrom dplyr group_by #' @importFrom dplyr group_rows #' @importFrom dplyr group_split #' @export group_split.Seurat <- function(.tbl, ..., .keep = TRUE) { var_list <- enquos(...) group_list <- .tbl |> as_tibble() |> dplyr::group_by(!!!var_list) groups <- group_list |> dplyr::group_rows() v <- vector(mode = "list", length = length(groups)) for (i in seq_along(v)) { v[[i]] <- .tbl[,groups[[i]]] if(.keep == FALSE) { v[[i]] <- select(v[[i]], !(!!!var_list)) } } v } ================================================ FILE: R/ggplot2_methods.R ================================================ #' @name ggplot #' @rdname ggplot #' @inherit ggplot2::ggplot #' @title Create a new \code{ggplot} from a \code{tidyseurat} #' @return `ggplot` #' #' @examples #' library(ggplot2) #' data(pbmc_small) #' pbmc_small |> #' ggplot(aes(groups, nCount_RNA)) + #' geom_boxplot() #' #' @importFrom purrr map #' @importFrom rlang quo_name #' @importFrom ggplot2 aes ggplot #' @export ggplot.Seurat <- function(data=NULL, mapping=aes(), ..., environment=parent.frame()) { # Deprecation of special column names .cols <- mapping %>% unlist() %>% map(~ quo_name(.x)) %>% unlist() %>% as.character() if (is_sample_feature_deprecated_used(data, .cols)) { data <- ping_old_special_column_into_metadata(data) } data %>% as_tibble() %>% ggplot2::ggplot(mapping=mapping) } ================================================ FILE: R/methods.R ================================================ #' @importFrom methods getMethod setMethod( f="show", signature="Seurat", definition=function(object) { if (isTRUE(x=getOption(x="restore_Seurat_show", default=FALSE))) { f <- getMethod( f="show", signature="Seurat", where=asNamespace(ns="SeuratObject")) f(object=object) } else { print(object) } } ) setClass("tidyseurat", contains="Seurat") #' @importFrom generics tidy #' @title tidy for Seurat objects #' @name tidy #' @description tidy for Seurat objects #' @param x A Seurat object #' @param ... Additional arguments (not used) #' @return A tidyseurat object #' @importFrom lifecycle deprecate_warn #' @export tidy.Seurat <- function(x, ...){ # DEPRECATE deprecate_warn( when="0.2.0", what="tidy()", details="tidyseurat says: tidy() is not needed anymore." ) return(x) } #' @name join_features #' @rdname join_features #' @inherit ttservice::join_features #' @aliases join_features,Seurat-method #' #' @param .data A tidyseurat object #' @param assay assay name to extract feature abundance #' @param slot slot name to extract feature abundance #' #' @return A `tidyseurat` object #' containing information for the specified features. #' #' @examples #' data(pbmc_small) #' pbmc_small %>% join_features( #' features=c("HLA-DRA", "LYZ")) #' #' @importFrom magrittr "%>%" #' @importFrom dplyr contains #' @importFrom dplyr everything #' @importFrom ttservice join_features #' @export setMethod("join_features", "Seurat", function(.data, features=NULL, all=FALSE, exclude_zeros=FALSE, shape="wide", assay=NULL, slot="data", ...) { .feature = NULL if(shape == "long") .data |> left_join( get_abundance_sc_long( .data=.data, features=features, all=all, exclude_zeros=exclude_zeros, assay=assay, slot=slot, ... ), by=c_(.data)$name ) %>% select(!!c_(.data)$symbol, .feature, contains(".abundance"), everything()) else .data |> left_join( get_abundance_sc_wide( .data=.data, features=features, all=all, assay=assay, slot=slot, ... ), by=c_(.data)$name ) }) #' @name aggregate_cells #' @rdname aggregate_cells #' @inherit ttservice::aggregate_cells #' @aliases aggregate_cells,Seurat-method #' #' @param .data A tidyseurat object #' #' @examples #' data(pbmc_small) #' pbmc_small_pseudo_bulk <- pbmc_small |> #' aggregate_cells(c(groups, letter.idents), assays="RNA") #' #' @importFrom rlang enquo #' @importFrom magrittr "%>%" #' @importFrom tibble enframe #' @importFrom Matrix rowSums #' @importFrom ttservice aggregate_cells #' @importFrom SeuratObject DefaultAssay #' @importFrom Seurat DietSeurat #' @importFrom Seurat GetAssayData #' @importFrom purrr map_int setMethod("aggregate_cells", "Seurat", function(.data, .sample=NULL, slot="data", assays=NULL, aggregation_function=Matrix::rowSums, ...){ # Solve NOTE data <- NULL .feature <- NULL .sample <- enquo(.sample) # Subset only wanted assays if(!is.null(assays)){ DefaultAssay(.data) <- assays[1] .data = .data |> DietSeurat(assays = assays) } .data %>% nest(data=-!!.sample) %>% mutate(.aggregated_cells=map_int(data, ~ ncol(.x))) %>% mutate( data=map(data, ~ # Loop over assays map2(.x@assays, names(.x@assays), # Get counts ~ GetAssayData_robust(.x, layer=slot) %>% aggregation_function(na.rm=T) %>% tibble::enframe( name=".feature", value=sprintf("%s", .y) ) %>% mutate(.feature=as.character(.feature)) ) %>% Reduce(function(...) full_join(..., by=c(".feature")), .), .progress = TRUE )) %>% left_join( .data %>% as_tibble() %>% subset_tidyseurat(!!.sample)) %>% unnest(data) %>% tidyr::unite(".sample", !!.sample, sep="___", remove=FALSE) |> select(.feature, .sample, names(.data@assays), everything()) |> drop_class("tidyseurat_nested") }) ================================================ FILE: R/methods_DEPRECATED.R ================================================ #' (DEPRECATED) Extract and join information for transcripts. #' #' #' @description join_transcripts() extracts and joins information for specified transcripts #' #' @importFrom rlang enquo #' @importFrom magrittr "%>%" #' #' @name join_transcripts #' @rdname join_transcripts #' #' @param .data A tidyseurat object #' @param transcripts A vector of transcript identifiers to join #' @param all If TRUE return all #' @param exclude_zeros If TRUE exclude zero values #' @param shape Format of the returned table "long" or "wide" #' @param ... Parameters to pass to join wide, i.e. assay name to extract transcript abundance from #' #' @details DEPRECATED, please use join_features() #' #' @return A `tbl` containing the information.for the specified transcripts #' #' @examples #' #' print("DEPRECATED") #' #' #' @export #' join_transcripts <- function(.data, transcripts = NULL, all = FALSE, exclude_zeros = FALSE, shape = "wide", ...) { UseMethod("join_transcripts", .data) } #' @export join_transcripts.default <- function(.data, transcripts = NULL, all = FALSE, exclude_zeros = FALSE, shape = "wide", ...) { print("tidyseurat says: This function cannot be applied to this object") } #' @export join_transcripts.Seurat <- function(.data, transcripts = NULL, all = FALSE, exclude_zeros = FALSE, shape = "wide", ...) { deprecate_warn("0.2.1", "join_transcripts()", "tidyseurat::join_features()") .data %>% join_features(features = transcripts, all = all, exclude_zeros = exclude_zeros, shape = shape, ...) } ================================================ FILE: R/pillar_utilities.R ================================================ NBSP <- "\U00A0" pillar___format_comment <- function (x, width) { if (length(x) == 0L) { return(character()) } map_chr(x, pillar___wrap, prefix="# ", width=min(width, cli::console_width())) } #' @importFrom fansi strwrap_ctl pillar___strwrap2 <- function (x, width, indent) { fansi::strwrap_ctl(x, width=max(width, 0), indent=indent, exdent=indent + 2) } pillar___wrap <- function (..., indent=0, prefix="", width) { x <- paste0(..., collapse="") wrapped <- pillar___strwrap2(x, width - get_extent(prefix), indent) wrapped <- paste0(prefix, wrapped) wrapped <- gsub(NBSP, " ", wrapped) paste0(wrapped, collapse="\n") } ================================================ FILE: R/plotly_methods.R ================================================ #' @name plotly #' @rdname plotly #' @inherit plotly::plot_ly #' @return `plotly` #' #' @examples #' data(pbmc_small) #' plot_ly(pbmc_small) #' #' @importFrom plotly plot_ly #' @export plot_ly <- function(data=data.frame(), ..., type=NULL, name=NULL, color=NULL, colors=NULL, alpha=NULL, stroke=NULL, strokes=NULL, alpha_stroke=1, size=NULL, sizes=c(10, 100), span=NULL, spans=c(1, 20), symbol=NULL, symbols=NULL, linetype=NULL, linetypes=NULL, split=NULL, frame=NULL, width=NULL, height=NULL, source="A") { UseMethod("plot_ly") } #' @rdname plotly #' @export plot_ly.tbl_df <- function(data=data.frame(), ..., type=NULL, name=NULL, color=NULL, colors=NULL, alpha=NULL, stroke=NULL, strokes=NULL, alpha_stroke=1, size=NULL, sizes=c(10, 100), span=NULL, spans=c(1, 20), symbol=NULL, symbols=NULL, linetype=NULL, linetypes=NULL, split=NULL, frame=NULL, width=NULL, height=NULL, source="A") { data %>% # This is a trick to not loop the call drop_class("tbl_df") %>% plotly::plot_ly(..., type=type, name=name, color=color, colors=colors, alpha=alpha, stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, size=size, sizes=sizes, span=span, spans=spans, symbol=symbol, symbols=symbols, linetype=linetype, linetypes=linetypes, split=split, frame=frame, width=width, height=height, source=source) } #' @rdname plotly #' @export plot_ly.Seurat <- function(data=data.frame(), ..., type=NULL, name=NULL, color=NULL, colors=NULL, alpha=NULL, stroke=NULL, strokes=NULL, alpha_stroke=1, size=NULL, sizes=c(10, 100), span=NULL, spans=c(1, 20), symbol=NULL, symbols=NULL, linetype=NULL, linetypes=NULL, split=NULL, frame=NULL, width=NULL, height=NULL, source="A") { data %>% # This is a trick to not loop the call as_tibble() %>% plot_ly(..., type=type, name=name, color=color, colors=colors, alpha=alpha, stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke, size=size, sizes=sizes, span=span, spans=spans, symbol=symbol, symbols=symbols, linetype=linetype, linetypes=linetypes, split=split, frame=frame, width=width, height=height, source=source) } ================================================ FILE: R/print_method.R ================================================ # This file is a replacement of the unexported functions in the tibble # package, in order to specify "tibble abstraction in the header" #' @name tbl_format_header #' @rdname tbl_format_header #' @inherit pillar::tbl_format_header #' #' @examples #' # TODO #' #' @importFrom rlang names2 #' @importFrom pillar align #' @importFrom pillar get_extent #' @importFrom pillar style_subtle #' @importFrom pillar tbl_format_header #' @export tbl_format_header.tidySeurat <- function(x, setup, ...){ number_of_features <- x |> attr("number_of_features") assay_names <- x |> attr("assay_names") active_assay <- x |> attr("active_assay") named_header <- setup$tbl_sum # Change name names(named_header) <- "A Seurat-tibble abstraction" if (all(names2(named_header) == "")) { header <- named_header } else { header <- paste0( align(paste0(names2(named_header), ":"), space=NBSP), " ", named_header) %>% # Add further info single-cell append(sprintf( "\033[90m Features=%s | Cells=%s | Active assay=%s | Assays=%s\033[39m", number_of_features, nrow(x), active_assay, assay_names %>% paste(collapse=", ") ), after=1) } style_subtle(pillar___format_comment(header, width=setup$width)) } #' @name formatting #' @rdname formatting #' @aliases print #' @inherit tibble::formatting #' @return Prints a message to the console describing #' the contents of the `tidyseurat`. #' #' @param ... Passed on to \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}. #' @param n_extra Number of extra columns to print abbreviated information for, #' if the width is too small for the entire tibble. If `NULL`, the default, #' will print information about at most `tibble.max_extra_cols` extra columns. #' @examples #' data(pbmc_small) #' print(pbmc_small) #' #' @importFrom vctrs new_data_frame #' @importFrom Seurat GetAssayData #' @importFrom Seurat Assays #' @export print.Seurat <- function(x, ..., n=NULL, width=NULL, n_extra=NULL) { x |> as_tibble(n_dimensions_to_return=5) |> new_data_frame(class=c("tidySeurat", "tbl")) %>% add_attr(GetAssayData(x) %>% nrow, "number_of_features") %>% add_attr(Assays(x) , "assay_names") %>% add_attr(x@active.assay , "active_assay") %>% print() invisible(x) } ================================================ FILE: R/tibble_methods.R ================================================ #' @name as_tibble #' @rdname as_tibble #' @inherit tibble::as_tibble #' @return `tibble` #' #' @examples #' data(pbmc_small) #' pbmc_small |> as_tibble() #' #' @importFrom tibble as_tibble #' @importFrom purrr reduce #' @importFrom purrr map #' @importFrom tidyr spread #' @importFrom tibble enframe #' @export as_tibble.Seurat <- function(x, ..., .name_repair=c("check_unique", "unique", "universal", "minimal"), rownames=NULL){ x[[]] %>% tibble::as_tibble(rownames=c_(x)$name) %>% # Attach reduced dimensions when( # Only if I have reduced dimensions and special datasets length(x@reductions) > 0 ~ (.) %>% left_join( get_special_datasets(x, ...) %>% map(~ .x %>% when( # If row == 1 do a trick dim(.) %>% is.null ~ { (.) %>% tibble::enframe() %>% spread(name, value) %>% mutate(!!c_(x)$symbol := rownames(x[[]])) }, # Otherwise continue normally ~ as_tibble(., rownames=c_(x)$name) )) %>% reduce(left_join, by=c_(x)$name), by=c_(x)$name ), # Otherwise skip ~ (.) ) } #' @name glimpse #' @rdname glimpse #' @inherit pillar::glimpse #' #' @examples #' data(pbmc_small) #' pbmc_small |> glimpse() #' #' @importFrom tibble glimpse #' @export glimpse.tidyseurat <- function(x, width=NULL, ...){ x %>% as_tibble() %>% tibble::glimpse(width=width, ...) } ================================================ FILE: R/tidyr_methods.R ================================================ #' @name unnest #' @rdname unnest #' @inherit tidyr::unnest #' @aliases unnest_seurat #' @return `tidyseurat` #' #' @examples #' data(pbmc_small) #' pbmc_small |> #' nest(data=-groups) |> #' unnest(data) #' #' @importFrom rlang quo_name #' @importFrom purrr imap #' @importFrom tidyr unnest #' @export unnest.tidyseurat_nested <- function(data, cols, ..., keep_empty=FALSE, ptype=NULL, names_sep=NULL, names_repair="check_unique", .drop, .id, .sep, .preserve) { cols <- enquo(cols) unnest_seurat(data, !!cols, ..., keep_empty=keep_empty, ptype=ptype, names_sep=names_sep, names_repair=names_repair) } #' @rdname unnest #' @importFrom tidyr unnest #' @importFrom purrr when #' @importFrom rlang quo_name #' @importFrom purrr imap #' @export unnest_seurat <- function(data, cols, ..., keep_empty=FALSE, ptype=NULL, names_sep=NULL, names_repair="check_unique", .drop, .id, .sep, .preserve) { # Need this otherwise crashes map .data_ <- data cols <- enquo(cols) .data_ %>% when( # If my only column to unnest is tidyseurat pull(., !!cols) %>% .[[1]] %>% is("Seurat") %>% any ~ { # Do my trick to unnest list_seurat <- mutate(., !!cols := imap( !!cols, ~ .x %>% bind_cols_( .data_ %>% select(-!!cols) %>% slice(rep(.y, nrow(as_tibble(.x)))) ) )) %>% pull(!!cols) list_seurat[[1]] %>% # Bind only if length list > 1 when( length(list_seurat)>1 ~ bind_rows(., list_seurat[2:length(list_seurat)]), ~ (.) ) }, # Else do normal stuff ~ (.) %>% drop_class("tidyseurat_nested") %>% tidyr::unnest(!!cols, ..., keep_empty=keep_empty, ptype=ptype, names_sep=names_sep, names_repair=names_repair) %>% add_class("tidyseurat_nested")) } #' @name nest #' @rdname nest #' @inherit tidyr::nest #' @return `tidyseurat_nested` #' #' @examples #' data(pbmc_small) #' pbmc_small |> #' nest(data=-groups) |> #' unnest(data) #' #' @importFrom tidyr nest #' @importFrom magrittr equals #' @importFrom rlang enquos #' @importFrom Seurat SplitObject #' @importFrom Seurat DietSeurat #' @importFrom rlang := #' @export nest.Seurat <- function (.data, ..., .names_sep=NULL) { cols <- enquos(...) col_name_data <- names(cols) # Deprecation of special column names .cols <- enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist() if (is_sample_feature_deprecated_used(.data, .cols)) { .data <- ping_old_special_column_into_metadata(.data) } my_data__ <- .data # This is for getting the column names dummy_nested <- my_data__ |> DietSeurat(features = rownames(my_data__)[1:2], assays = DefaultAssay(my_data__)) |> suppressWarnings() |> to_tib() %>% tidyr::nest(...) split_by_column <- dummy_nested |> select(-col_name_data) |> colnames() # If nesting on one group use the fast split if (split_by_column |> length() |> identical(1L)) my_data__ |> SplitObject(split.by=split_by_column) |> map(~ .x |> select(-split_by_column)) |> enframe(name=split_by_column, value=col_name_data) |> # Coerce to tidyseurat_nested for unnesting add_class("tidyseurat_nested") # If arbitrary nest is needed use the slow one else my_data__ %>% # This is needed otherwise nest goes into loop and fails to_tib %>% tidyr::nest(...) %>% mutate( !!as.symbol(col_name_data) := map( !!as.symbol(col_name_data), ~ my_data__ %>% # Subset cells filter(!!c_(.data)$symbol %in% pull(.x, !!c_(.data)$symbol)) %>% # Subset columns select(colnames(.x)) )) |> # Coerce to tidyseurat_nested for unnesting add_class("tidyseurat_nested") } #' @name extract #' @rdname extract #' @inherit tidyr::extract #' @return `tidyseurat` #' #' @examples #' data(pbmc_small) #' pbmc_small |> #' extract(groups, #' into="g", #' regex="g([0-9])", #' convert=TRUE) #' #' @importFrom tidyr extract #' @export extract.Seurat <- function (data, col, into, regex="([[:alnum:]]+)", remove=TRUE, convert=FALSE, ...) { col <- enquo(col) # Deprecation of special column names if (is_sample_feature_deprecated_used( data, c(quo_name(col), into) )) { data= ping_old_special_column_into_metadata(data) } data@meta.data <- data %>% as_tibble() %>% tidyr::extract(col=!!col, into=into, regex=regex, remove=remove, convert=convert, ...) %>% as_meta_data(data) data } #' @name pivot_longer #' @rdname pivot_longer #' @inherit tidyr::pivot_longer #' @return `tidyseurat` #' #' @export #' @examples #' data(pbmc_small) #' pbmc_small |> pivot_longer( #' cols=c(orig.ident, groups), #' names_to="name", values_to="value") #' #' @importFrom rlang check_dots_used #' @importFrom tidyr pivot_longer #' @export pivot_longer.Seurat <- function(data, cols, names_to="name", names_prefix=NULL, names_sep=NULL, names_pattern=NULL, names_ptypes=NULL, names_transform=NULL, names_repair="check_unique", values_to="value", values_drop_na=FALSE, values_ptypes=NULL, values_transform=NULL, ...) { cols <- enquo(cols) message(data_frame_returned_message) # Deprecation of special column names if (is_sample_feature_deprecated_used( data, c(quo_names(cols)) )) { data= ping_old_special_column_into_metadata(data) } data %>% as_tibble() %>% tidyr::pivot_longer(!!cols, names_to=names_to, names_prefix=names_prefix, names_sep=names_sep, names_pattern=names_pattern, names_ptypes=names_ptypes, names_transform=names_transform, names_repair=names_repair, values_to=values_to, values_drop_na=values_drop_na, values_ptypes=values_ptypes, values_transform=values_transform, ...) } #' @name unite #' @rdname unite #' @inherit tidyr::unite #' @return `tidyseurat` #' #' @examples #' data(pbmc_small) #' pbmc_small |> unite( #' col="new_col", #' c("orig.ident", "groups")) #' #' @importFrom rlang enquo enquos quo_name #' @importFrom tidyr unite #' @export unite.Seurat <- function(data, col, ..., sep="_", remove=TRUE, na.rm=FALSE) { # Check that we are not modifying a key column cols <- enquo(col) # Deprecation of special column names .cols <- enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist() if (is_sample_feature_deprecated_used(data, .cols)) { data <- ping_old_special_column_into_metadata(data) } .view_only_cols <- c( get_special_columns(data), get_needed_columns(data)) .test <- intersect( quo_names(cols), .view_only_cols) if (remove && length(.test)) { stop("tidyseurat says:", " you are trying to rename a column", " that is view only ", paste(.view_only_cols, collapse=", "), " (it is not present in the colData).", " If you want to mutate a view-only column,", " make a copy and mutate that one.") } data@meta.data <- data %>% as_tibble() %>% tidyr::unite(!!cols, ..., sep=sep, remove=remove, na.rm=na.rm) %>% as_meta_data(data) data } #' @name separate #' @rdname separate #' @inherit tidyr::separate #' @return `tidyseurat` #' #' @examples #' data(pbmc_small) #' un <- pbmc_small |> unite("new_col", c(orig.ident, groups)) #' un |> separate(new_col, c("orig.ident", "groups")) #' #' @importFrom tidyr separate #' @export separate.Seurat <- function(data, col, into, sep="[^[:alnum:]]+", remove=TRUE, convert=FALSE, extra="warn", fill="warn", ...) { # Check that we are not modifying a key column cols <- enquo(col) # Deprecation of special column names if(is_sample_feature_deprecated_used( data, c(quo_names(cols)) )) { data= ping_old_special_column_into_metadata(data) } .view_only_cols <- c( get_special_columns(data), get_needed_columns(data)) .test <- intersect( quo_names(cols), .view_only_cols) if (remove && length(.test)) { stop("tidyseurat says:", " you are trying to rename a column", " that is view only ", paste(.view_only_cols, collapse=", "), "(it is not present in the colData).", " If you want to mutate a view-only column,", " make a copy and mutate that one.") } data@meta.data = data %>% as_tibble() %>% tidyr::separate(!!cols, into=into, sep=sep, remove=remove, convert=convert, extra=extra, fill=fill, ...) %>% as_meta_data(data) data } ================================================ FILE: R/utilities.R ================================================ #' @importFrom tibble as_tibble #' #' @keywords internal #' #' @param .data A tidyseurat #' #' @noRd to_tib <- function(.data) { .data[[]] %>% as_tibble(rownames=c_(.data)$name) } # Greater than gt <- function(a, b) { a > b } # Smaller than st <- function(a, b) { a < b } # Negation not <- function(is) { !is } # Raise to the power pow <- function(a, b) { a^b } # Equals eq <- function(a, b) { a == b } prepend <- function(x, values, before=1) { n <- length(x) stopifnot(before > 0 && before <= n) if (before == 1) { c(values, x) } else { c(x[seq_len(before-1)], values, x[seq(before, n)]) } } #' Add class to abject #' #' @keywords internal #' #' @param var A tibble #' @param name A character name of the attribute #' #' @return A tibble with an additional attribute add_class <- function(var, name) { if (!name %in% class(var)) class(var) <- prepend(class(var), name) return(var) } #' Remove class to abject #' #' @keywords internal #' #' @param var A tibble #' @param name A character name of the class #' #' @return A tibble with an additional attribute #' @keywords internal drop_class <- function(var, name) { class(var) <- class(var)[!class(var) %in% name] return(var) } #' get abundance wide #' #' @keywords internal #' #' @importFrom magrittr "%$%" #' @importFrom utils tail #' @importFrom Seurat GetAssayData #' @importFrom Seurat DietSeurat #' @importFrom SeuratObject DefaultAssay<- #' @importFrom stats setNames #' #' @param .data A tidyseurat #' @param features A character #' @param all A boolean #' @param assay assay name to extract feature abundance #' @param slot slot in the assay, e.g. `data` and `scale.data` #' @param prefix prefix for the feature names #' #' @return A Seurat object #' @examples #' data(pbmc_small) #' pbmc_small %>% #' get_abundance_sc_wide(features=c("HLA-DRA", "LYZ")) #' #' @export get_abundance_sc_wide <- function(.data, features=NULL, all=FALSE, assay=.data@active.assay, slot="data", prefix="") { # Solve CRAN warnings . <- NULL assays <- NULL counts <- NULL if (is.null(assay)) { assay <- .data@active.assay } # Check if output would be too big without forcing if( length(VariableFeatures(.data)) == 0 & is.null(features) & all == FALSE ) { stop("Your object do not contain variable trancript labels,\n", " feature argument is empty and all argument is set to FALSE.\n", " Either:\n", " 1. use detect_variable_features() to select variable feature\n", " 2. pass an array of features names\n", " 3. set all=TRUE (this will output a very large object;", " does your computer have enough RAM?)\n") } # Get variable features if existing if( length(VariableFeatures(.data)) > 0 & is.null(features) & all == FALSE ) variable_genes <- VariableFeatures(.data) # Else else variable_genes <- NULL # Eliminate unneeded assays. # This because if a gene is not in an assay I am not interested about # this could cause an unneeded error DefaultAssay(.data) <- assay .data = .data |> DietSeurat(assays = assay) # Just grub last assay .data |> GetAssayData(assay = assay, layer=slot) %>% when( variable_genes %>% is.null %>% `!` ~ (.)[ toupper(rownames(.)) %in% toupper(variable_genes),,drop=FALSE], features %>% is.null %>% `!` ~ (.)[ toupper(rownames(.)) %in% toupper(features),,drop=FALSE], ~ stop("tidyseurat says: It is not convenient to", " extract all genes, you should have either variable", " features or feature list to extract.") ) |> as.matrix() |> t() |> as_tibble(rownames=c_(.data)$name) %>% # Add prefix setNames(c(c_(.data)$name, sprintf("%s%s", prefix, colnames(.)[-1]))) } #' get abundance long #' #' @keywords internal #' #' @importFrom magrittr "%$%" #' @importFrom Seurat VariableFeatures #' @importFrom tidyr pivot_longer #' @importFrom tibble as_tibble #' @importFrom purrr when #' @importFrom purrr map2 #' #' @param .data A tidyseurat #' @param features A character #' @param all A boolean #' @param exclude_zeros A boolean #' @param assay assay name to extract feature abundance #' @param slot slot in the assay, e.g. `data` and `scale.data` #' #' @return A Seurat object #' @examples #' data(pbmc_small) #' pbmc_small %>% #' get_abundance_sc_long(features=c("HLA-DRA", "LYZ")) #' #' @export get_abundance_sc_long <- function(.data, features=NULL, all=FALSE, exclude_zeros=FALSE, assay=Assays(.data), slot="data"){ # Solve CRAN warnings . <- NULL if (is.null(assay)) { assay <- Assays(.data) } # Check if output would be too big without forcing if ( length(VariableFeatures(.data)) == 0 & is.null(features) & all == FALSE ) { stop("Your object do not contain variable trancript labels,\n", " feature argument is empty and all argument is set to FALSE.\n", " Either:\n", " 1. use detect_variable_features() to select variable feature\n", " 2. pass an array of features names\n", " 3. set all=TRUE (this will output a very large object;", " does your computer have enough RAM?)\n") } # Get variable features if existing if( length(VariableFeatures(.data)) > 0 & is.null(features) & all == FALSE ) variable_genes <- VariableFeatures(.data) # Else else variable_genes <- NULL .data@assays %>% .[assay] %>% # Take active assay map2(assay, ~ .x %>% GetAssayData(layer = slot) %>% when( variable_genes %>% is.null %>% `!` ~ (.)[variable_genes,, drop=FALSE], features %>% is.null %>% `!` ~ (.)[ toupper(rownames((.))) %in% toupper(features), , drop=FALSE], all ~ (.), ~ stop("tidyseurat says: It is not convenient to", " extract all genes, you should have either variable", " features or feature list to extract.") ) %>% # Replace 0 with NA when(exclude_zeros ~ (.) %>% { x=(.); x[x == 0] <- NA; x }, ~ (.)) %>% data.frame(check.names=FALSE) %>% as_tibble(rownames=".feature") %>% tidyr::pivot_longer( cols= - .feature, names_to=c_(.data)$name, values_to=".abundance" %>% paste(.y, sep="_"), values_drop_na=TRUE ) #%>% #mutate_if(is.character, as.factor) %>% ) %>% Reduce(function(...) full_join(..., by=c(".feature", c_(.data)$name)), .) } #' @importFrom dplyr select_if #' @importFrom tibble column_to_rownames #' #' @keywords internal #' #' @param .data A tibble #' @param seurat_object A tidyseurat #' #' @noRd as_meta_data <- function(.data, seurat_object){ # Solve CRAN warnings . <- NULL col_to_exclude <- get_special_columns(seurat_object) .data %>% select_if(!colnames(.) %in% col_to_exclude) %>% #select(-one_of(col_to_exclude)) %>% column_to_rownames(c_(seurat_object)$name) } #' @importFrom purrr map_chr #' #' @keywords internal #' #' @param seurat_object A tidyseurat #' #' @noRd get_special_columns <- function(seurat_object){ get_special_datasets(seurat_object) %>% map(~ .x %>% colnames ) %>% unlist %>% as.character } get_special_datasets <- function(seurat_object, n_dimensions_to_return=Inf){ seurat_object@reductions %>% map(~ .x@cell.embeddings[, 1:min(n_dimensions_to_return, ncol(.x@cell.embeddings)), drop=FALSE]) } get_needed_columns <- function(.data){ c(c_(.data)$name) } #' Convert array of quosure (e.g. c(col_a, col_b)) into character vector #' #' @keywords internal #' #' @importFrom rlang quo_name #' @importFrom rlang quo_squash #' #' @param v A array of quosures (e.g. c(col_a, col_b)) #' #' @return A character vector quo_names <- function(v) { v <- quo_name(quo_squash(v)) gsub('^c\\(|`|\\)$', '', v) %>% strsplit(', ') %>% unlist } #' returns variables from an expression #' @param expression an expression #' @importFrom rlang enexpr #' @return list of symbols return_arguments_of <- function(expression){ variables <- enexpr(expression) |> as.list() if(length(variables) > 1) { variables <- variables[-1] # removes first element which is function } variables } #' @importFrom purrr when #' @importFrom dplyr select #' @importFrom rlang expr select_helper <- function(.data, ...){ loc <- tidyselect::eval_select(expr(c(...)), .data) dplyr::select( .data, loc) } data_frame_returned_message <- paste( "tidyseurat says:", "A data frame is returned for independent data analysis.") duplicated_cell_names <- paste( "tidyseurat says:", "This operation lead to duplicated cell names.", "A data frame is returned for independent data analysis.") #' @importFrom methods .hasSlot clean_seurat_object <- function(.data){ . <- NULL if (.hasSlot(.data, "images")) .data@images <- map(.data@images, ~ .x %>% when((.)@coordinates %>% nrow() %>% gt(0) ~ (.))) %>% # Drop NULL Filter(Negate(is.null), .) .data@assays <- .data@assays %>% map(~ { my_assay=.x if (.hasSlot(., "SCTModel.list")) my_assay@SCTModel.list = map(my_assay@SCTModel.list, ~ .x %>% when((.)@cell.attributes %>% nrow() %>% gt(0) ~ (.))) %>% # Drop NULL Filter(Negate(is.null), .) my_assay }) .data } # This function is used for the change of special sample column to .sample # Check if "sample" is included in the query and # is not part of any other existing annotation #' @importFrom stringr str_detect #' @importFrom stringr regex is_sample_feature_deprecated_used <- function(.data, user_columns, use_old_special_names=FALSE) { cell <- any(str_detect(user_columns, regex("\\bcell\\b"))) .cell <- any(str_detect(user_columns, regex("\\W*(\\.cell)\\W*"))) old_standard_is_used <- !"cell" %in% colnames(.data@meta.data) && ("cell" %in% user_columns || (cell && !.cell)) if (old_standard_is_used) { warning("tidyseurat says:", " from version 1.3.1, the special columns including", " cell id (colnames(se)) has changed to \".cell\".", " This dataset is returned with the old-style vocabulary (cell),", " however, we suggest to update your workflow", " to reflect the new vocabulary (.cell).") use_old_special_names <- TRUE } use_old_special_names } get_special_column_name_symbol <- function(name){ list(name=name, symbol=as.symbol(name)) } # Key column names ping_old_special_column_into_metadata <- function(.data){ .data@misc$cell__ <- get_special_column_name_symbol("cell") .data } get_special_column_name_cell <- function(name){ list(name=name, symbol=as.symbol(name)) } cell__ <- get_special_column_name_symbol(".cell") c_ <- function(x){ # Check if old deprecated columns are used if("cell__" %in% names(x@misc)) cell__ <- x@misc$cell__ return(cell__) } #' Add attribute to abject #' #' @keywords internal #' @noRd #' #' @importFrom dplyr vars #' #' @param var A tibble #' @param attribute An object #' @param name A character name of the attribute #' #' @return A tibble with an additional attribute add_attr <- function(var, attribute, name) { attr(var, name) <- attribute var } #' Get specific annotation columns #' #' @keywords internal #' @noRd #' #' @importFrom rlang enquo #' @importFrom purrr map #' @importFrom dplyr distinct_at #' @importFrom magrittr equals #' @importFrom dplyr vars #' #' @param .data A tibble #' @param .col A vector of column names #' #' @return A character get_specific_annotation_columns <- function(.data, .col) { # Comply with CRAN NOTES . <- NULL # Make col names .col <- enquo(.col) # x-annotation df n_x <- .data |> distinct_at(vars(!!.col)) |> nrow() # element wise columns .data |> select(-!!.col) |> colnames() |> map(~ { n_.x <- .data |> distinct_at(vars(!!.col, .x)) |> nrow() if (n_.x == n_x) .x else NULL }) %>% # Drop NULL { (.)[lengths((.)) != 0] } |> unlist() } subset_tidyseurat <- function(.data, .column) { # Make col names .column <- enquo(.column) # Check if column present if (.data |> select(!!.column) |> colnames() %in% colnames(.data) %>% all %>% `!`) stop("tidyseurat says: some of the .column specified", " do not exist in the input data frame.") .data %>% # Selecting the right columns select(!!.column, get_specific_annotation_columns(.data, !!.column)) %>% distinct() } #' @importFrom Seurat GetAssayData #' @importFrom methods is GetAssayData_robust = function(seurat_assay, layer = NULL){ if( seurat_assay |> is("Assay5") & seurat_assay |> ncol() == 1 ){ m = seurat_assay@layers[[layer]] |> as.matrix() rownames(m) = rownames(seurat_assay) colnames(m) = colnames(seurat_assay) m } else GetAssayData(seurat_assay, layer=layer) } ================================================ FILE: R/utils-pipe.R ================================================ #' Pipe operator #' #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. #' #' @name %>% #' @rdname pipe #' @keywords internal #' @export #' @importFrom magrittr %>% #' @usage lhs \%>\% rhs #' @examples #' data(pbmc_small) #' pbmc_small %>% print() #' @return void NULL ================================================ FILE: R/zzz.R ================================================ #' @importFrom utils packageDescription .onAttach = function(libname, pkgname) { version = packageDescription(pkgname, fields = "Version") msg = paste0("======================================== ", pkgname, " version ", version, " If you use TIDYSEURAT in published research, please cite: Mangiola et al. Interfacing Seurat with the R tidy universe. Bioinformatics 2021. This message can be suppressed by: suppressPackageStartupMessages(library(tidyseurat)) To restore the Seurat default display use options(\"restore_Seurat_show\" = TRUE) ======================================== ") packageStartupMessage(msg) # Attach tidyverse attached <- tidyverse_attach() } # rv = R.Version() # if(getRversion() >= "4.0.0" && as.numeric(rv$`svn rev`) >= 77889) { # unitType = get("unitType", envir = asNamespace("grid")) # } else { # unitType = function(x, recurse = TRUE) attr(x, "unit") # } ================================================ FILE: README.Rmd ================================================ --- title: "tidyseurat - part of tidytranscriptomics" output: github_document always_allow_html: true --- [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![R build status](https://github.com/stemangiola/tidyseurat/workflows/R-CMD-check/badge.svg)](https://github.com/stemangiola/tidyseurat/actions/) Watch the video ```{r echo=FALSE} knitr::opts_chunk$set( fig.path = "man/figures/") ``` ```{r include=FALSE} # Set path to plotly screenshot. We don't run the plotly code chunk as most servers do not have javascript libraries needed for interactive plotting screenshot <- "man/figures/plotly.png" # The chunk below uses Rmd in man/fragments to avoid duplication, as the content is shared with the vignette and README. As suggested here: https://www.garrickadenbuie.com/blog/dry-vignette-and-readme/ visual_cue <- "man/figures/logo_interaction-01.png" ``` ```{r child="man/fragments/intro.Rmd"} ``` ================================================ FILE: README.md ================================================ tidyseurat - part of tidytranscriptomics ================ [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![R build status](https://github.com/stemangiola/tidyseurat/workflows/R-CMD-check/badge.svg)](https://github.com/stemangiola/tidyseurat/actions/) Watch the video **Brings Seurat to the tidyverse!** website: [stemangiola.github.io/tidyseurat/](https://stemangiola.github.io/tidyseurat/) Please also have a look at - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy single-cell RNA sequencing analysis - [tidySummarizedExperiment](https://tidyomics.github.io/tidySummarizedExperiment/) for tidy bulk RNA sequencing analysis - [tidybulk](https://tidyomics.github.io/tidybulk/) for tidy bulk RNA-seq analysis - [tidygate](https://github.com/stemangiola/tidygate/) for adding custom gate information to your tibble - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles
visual cue
# Introduction tidyseurat provides a bridge between the Seurat single-cell package \[@butler2018integrating; @stuart2019comprehensive\] and the tidyverse \[@wickham2019welcome\]. It creates an invisible layer that enables viewing the Seurat object as a tidyverse tibble, and provides Seurat-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. ## Functions/utilities available | Seurat-compatible Functions | Description | |-----------------------------|-------------| | `all` | | | tidyverse Packages | Description | |--------------------|--------------------------------------| | `dplyr` | All `dplyr` APIs like for any tibble | | `tidyr` | All `tidyr` APIs like for any tibble | | `ggplot2` | `ggplot` like for any tibble | | `plotly` | `plot_ly` like for any tibble | | Utilities | Description | |----|----| | `tidy` | Add `tidyseurat` invisible layer over a Seurat object | | `as_tibble` | Convert cell-wise information to a `tbl_df` | | `join_features` | Add feature-wise information, returns a `tbl_df` | | `aggregate_cells` | Aggregate cell gene-transcription abundance as pseudobulk tissue | ## Installation From CRAN ``` r install.packages("tidyseurat") ``` From Github (development) ``` r devtools::install_github("stemangiola/tidyseurat") ``` ``` r library(dplyr) library(tidyr) library(purrr) library(magrittr) library(ggplot2) library(Seurat) library(tidyseurat) ``` ## Create `tidyseurat`, the best of both worlds! This is a seurat object but it is evaluated as tibble. So it is fully compatible both with Seurat and tidyverse APIs. ``` r pbmc_small = SeuratObject::pbmc_small ``` **It looks like a tibble** ``` r pbmc_small ``` ## # A Seurat-tibble abstraction: 80 × 15 ## # [90mFeatures=230 | Cells=80 | Active assay=RNA | Assays=RNA[0m ## .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups ## ## 1 ATGC… SeuratPro… 70 47 0 A g2 ## 2 CATG… SeuratPro… 85 52 0 A g1 ## 3 GAAC… SeuratPro… 87 50 1 B g2 ## 4 TGAC… SeuratPro… 127 56 0 A g2 ## 5 AGTC… SeuratPro… 173 53 0 A g2 ## 6 TCTG… SeuratPro… 70 48 0 A g1 ## 7 TGGT… SeuratPro… 64 36 0 A g1 ## 8 GCAG… SeuratPro… 72 45 0 A g1 ## 9 GATA… SeuratPro… 52 36 0 A g1 ## 10 AATG… SeuratPro… 100 41 0 A g1 ## # ℹ 70 more rows ## # ℹ 8 more variables: RNA_snn_res.1 , PC_1 , PC_2 , PC_3 , ## # PC_4 , PC_5 , tSNE_1 , tSNE_2 **But it is a Seurat object after all** ``` r pbmc_small@assays ``` ## $RNA ## Assay data with 230 features for 80 cells ## Top 10 variable features: ## PPBP, IGLL5, VDAC3, CD1C, AKR1C3, PF4, MYL9, GNLY, TREML1, CA2 # Preliminary plots Set colours and theme for plots. ``` r # Use colourblind-friendly colours friendly_cols <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC") # Set theme my_theme <- list( scale_fill_manual(values = friendly_cols), scale_color_manual(values = friendly_cols), theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size = 12), legend.position = "bottom", aspect.ratio = 1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) ) ``` We can treat `pbmc_small` effectively as a normal tibble for plotting. Here we plot number of features per cell. ``` r pbmc_small %>% ggplot(aes(nFeature_RNA, fill = groups)) + geom_histogram() + my_theme ``` ![](man/figures/plot1-1.png) Here we plot total features per cell. ``` r pbmc_small %>% ggplot(aes(groups, nCount_RNA, fill = groups)) + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + my_theme ``` ![](man/figures/plot2-1.png) Here we plot abundance of two features for each group. ``` r pbmc_small %>% join_features(features = c("HLA-DRA", "LYZ"), shape = "long") %>% ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) + geom_boxplot(outlier.shape = NA) + geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) + scale_y_log10() + my_theme ``` ![](man/figures/unnamed-chunk-15-1.png) # Preprocess the dataset Also you can treat the object as Seurat object and proceed with data processing. ``` r pbmc_small_pca <- pbmc_small %>% SCTransform(verbose = FALSE) %>% FindVariableFeatures(verbose = FALSE) %>% RunPCA(verbose = FALSE) pbmc_small_pca ``` ## # A Seurat-tibble abstraction: 80 × 17 ## # [90mFeatures=220 | Cells=80 | Active assay=SCT | Assays=RNA, SCT[0m ## .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups ## ## 1 ATGC… SeuratPro… 70 47 0 A g2 ## 2 CATG… SeuratPro… 85 52 0 A g1 ## 3 GAAC… SeuratPro… 87 50 1 B g2 ## 4 TGAC… SeuratPro… 127 56 0 A g2 ## 5 AGTC… SeuratPro… 173 53 0 A g2 ## 6 TCTG… SeuratPro… 70 48 0 A g1 ## 7 TGGT… SeuratPro… 64 36 0 A g1 ## 8 GCAG… SeuratPro… 72 45 0 A g1 ## 9 GATA… SeuratPro… 52 36 0 A g1 ## 10 AATG… SeuratPro… 100 41 0 A g1 ## # ℹ 70 more rows ## # ℹ 10 more variables: RNA_snn_res.1 , nCount_SCT , ## # nFeature_SCT , PC_1 , PC_2 , PC_3 , PC_4 , ## # PC_5 , tSNE_1 , tSNE_2 If a tool is not included in the tidyseurat collection, we can use `as_tibble` to permanently convert `tidyseurat` into tibble. ``` r pbmc_small_pca %>% as_tibble() %>% select(contains("PC"), everything()) %>% GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) + my_theme ``` ![](man/figures/pc_plot-1.png) # Identify clusters We proceed with cluster identification with Seurat. ``` r pbmc_small_cluster <- pbmc_small_pca %>% FindNeighbors(verbose = FALSE) %>% FindClusters(method = "igraph", verbose = FALSE) pbmc_small_cluster ``` ## # A Seurat-tibble abstraction: 80 × 19 ## # [90mFeatures=220 | Cells=80 | Active assay=SCT | Assays=RNA, SCT[0m ## .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups ## ## 1 ATGC… SeuratPro… 70 47 0 A g2 ## 2 CATG… SeuratPro… 85 52 0 A g1 ## 3 GAAC… SeuratPro… 87 50 1 B g2 ## 4 TGAC… SeuratPro… 127 56 0 A g2 ## 5 AGTC… SeuratPro… 173 53 0 A g2 ## 6 TCTG… SeuratPro… 70 48 0 A g1 ## 7 TGGT… SeuratPro… 64 36 0 A g1 ## 8 GCAG… SeuratPro… 72 45 0 A g1 ## 9 GATA… SeuratPro… 52 36 0 A g1 ## 10 AATG… SeuratPro… 100 41 0 A g1 ## # ℹ 70 more rows ## # ℹ 12 more variables: RNA_snn_res.1 , nCount_SCT , ## # nFeature_SCT , SCT_snn_res.0.8 , seurat_clusters , ## # PC_1 , PC_2 , PC_3 , PC_4 , PC_5 , tSNE_1 , ## # tSNE_2 Now we can interrogate the object as if it was a regular tibble data frame. ``` r pbmc_small_cluster %>% count(groups, seurat_clusters) ``` ## # A tibble: 6 × 3 ## groups seurat_clusters n ## ## 1 g1 0 23 ## 2 g1 1 17 ## 3 g1 2 4 ## 4 g2 0 17 ## 5 g2 1 13 ## 6 g2 2 6 We can identify cluster markers using Seurat. ``` r # Identify top 10 markers per cluster markers <- pbmc_small_cluster %>% FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>% group_by(cluster) %>% top_n(10, avg_log2FC) # Plot heatmap pbmc_small_cluster %>% DoHeatmap( features = markers$gene, group.colors = friendly_cols ) ``` # Reduce dimensions We can calculate the first 3 UMAP dimensions using the Seurat framework. ``` r pbmc_small_UMAP <- pbmc_small_cluster %>% RunUMAP(reduction = "pca", dims = 1:15, n.components = 3L) ``` And we can plot them using 3D plot using plotly. ``` r pbmc_small_UMAP %>% plot_ly( x = ~`UMAP_1`, y = ~`UMAP_2`, z = ~`UMAP_3`, color = ~seurat_clusters, colors = friendly_cols[1:4] ) ```
screenshot plotly
## Cell type prediction We can infer cell type identities using *SingleR* \[@aran2019reference\] and manipulate the output using tidyverse. ``` r # Get cell type reference data blueprint <- celldex::BlueprintEncodeData() # Infer cell identities cell_type_df <- GetAssayData(pbmc_small_UMAP, slot = 'counts', assay = "SCT") %>% log1p() %>% Matrix::Matrix(sparse = TRUE) %>% SingleR::SingleR( ref = blueprint, labels = blueprint$label.main, method = "single" ) %>% as.data.frame() %>% as_tibble(rownames = "cell") %>% select(cell, first.labels) ``` ``` r # Join UMAP and cell type info pbmc_small_cell_type <- pbmc_small_UMAP %>% left_join(cell_type_df, by = "cell") # Reorder columns pbmc_small_cell_type %>% select(cell, first.labels, everything()) ``` We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification. ``` r pbmc_small_cell_type %>% count(seurat_clusters, first.labels) ``` We can easily reshape the data for building information-rich faceted plots. ``` r pbmc_small_cell_type %>% # Reshape and add classifier column pivot_longer( cols = c(seurat_clusters, first.labels), names_to = "classifier", values_to = "label" ) %>% # UMAP plots for cell type and cluster ggplot(aes(UMAP_1, UMAP_2, color = label)) + geom_point() + facet_wrap(~classifier) + my_theme ``` We can easily plot gene correlation per cell category, adding multi-layer annotations. ``` r pbmc_small_cell_type %>% # Add some mitochondrial abundance values mutate(mitochondrial = rnorm(n())) %>% # Plot correlation join_features(features = c("CST3", "LYZ"), shape = "wide") %>% ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) + geom_point() + facet_wrap(~first.labels, scales = "free") + scale_x_log10() + scale_y_log10() + my_theme ``` # Nested analyses A powerful tool we can use with tidyseurat is `nest`. We can easily perform independent analyses on subsets of the dataset. First we classify cell types in lymphoid and myeloid; then, nest based on the new classification ``` r pbmc_small_nested <- pbmc_small_cell_type %>% filter(first.labels != "Erythrocytes") %>% mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>% nest(data = -cell_class) pbmc_small_nested ``` Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly. ``` r pbmc_small_nested_reanalysed <- pbmc_small_nested %>% mutate(data = map( data, ~ .x %>% FindVariableFeatures(verbose = FALSE) %>% RunPCA(npcs = 10, verbose = FALSE) %>% FindNeighbors(verbose = FALSE) %>% FindClusters(method = "igraph", verbose = FALSE) %>% RunUMAP(reduction = "pca", dims = 1:10, n.components = 3L, verbose = FALSE) )) pbmc_small_nested_reanalysed ``` Now we can unnest and plot the new classification. ``` r pbmc_small_nested_reanalysed %>% # Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets. mutate(data = map(data, ~ .x %>% as_tibble())) %>% unnest(data) %>% # Define unique clusters unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>% # Plotting ggplot(aes(UMAP_1, UMAP_2, color = cluster)) + geom_point() + facet_wrap(~cell_class) + my_theme ``` # Aggregating cells Sometimes, it is necessary to aggregate the gene-transcript abundance from a group of cells into a single value. For example, when comparing groups of cells across different samples with fixed-effect models. In tidyseurat, cell aggregation can be achieved using the `aggregate_cells` function. ``` r pbmc_small %>% aggregate_cells(groups, assays = "RNA") ``` ================================================ FILE: _pkgdown.yml ================================================ template: bootstrap: 5 ================================================ FILE: codecov.yml ================================================ comment: false coverage: status: project: default: target: auto threshold: 1% patch: default: target: auto threshold: 1% ================================================ FILE: dev/code_comparison.Rmd ================================================ --- title: "Code comparison with Seurat" author: "Stefano Mangiola" date: "`r Sys.Date()`" package: tidyseurat output: html_vignette: toc_float: true vignette: > %\VignetteEngine{knitr::knitr} %\VignetteIndexEntry{Code comparison with Seurat} %\usepackage[UTF-8]{inputenc} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` # Case study code comparison ## Calculate gamma-delta signature and plot tidyseurat ```{r} seurat_obj <- readRDS("dev/PBMC_tidy_clean_scaled_UMAP_cluster_cell_type.rds") seurat_obj = seurat_obj %>% filter(first.labels == "T_cells") %>% RunPCA() %>% RunUMAP(dims=1:30) %>% mutate(type=case_when(sample %in% c("GSE115189", "SRR11038995", "SRR7244582") ~ "A", TRUE ~ "B")) ``` ```{r} library(tidygate) library(ggplot2) library(purrr) library(patchwork) # Calculate gamma delta signature seurat_obj_sig = seurat_obj %>% join_features( features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide", assay = "SCT" ) %>% mutate(signature_score = scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to=c(0,1)) - scales::rescale(CD8A + CD8B, to=c(0,1)) ) p1 = seurat_obj_sig %>% # Subsample add_count(sample, name = "tot_cells") %>% mutate(min_cells = min(tot_cells)) %>% group_by(sample) %>% sample_n(min_cells) %>% # Plot pivot_longer(cols=c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B", "signature_score")) %>% mutate(value = case_when(value>0 ~ value)) %>% group_by(name) %>% mutate(value = scale(value)) %>% ggplot(aes(UMAP_1, UMAP_2, color=value)) + geom_point(shape=".") + facet_grid(type~name) + scale_color_viridis_c() + custom_theme # Test differential abundance p2 = seurat_obj_sig %>% # Gating mutate(gamma_delta = gate_chr( UMAP_1, UMAP_2, .color = signature_score, .size=0.1 )) %>% # Calculate proportions add_count(sample, name = "tot_cells") %>% count(sample, type, tot_cells, gamma_delta) %>% mutate(frac = n/tot_cells) %>% filter(gamma_delta == 1) %>% # Plot ggplot(aes(type, frac)) + geom_boxplot() + geom_point() + custom_theme p = p1 / (p2 | plot_spacer()) + plot_layout(guides = "collect") ggsave("dev/summary_statistics.pdf", p, device = "pdf", width = 183, height = 150, units = "mm", useDingbats=FALSE) ``` Seurat ```{r} library(Seurat) library(gatepoints) library(dplyr) # Calculate gamma delta signature signature_score_1 = seurat_obj[c("CD3D", "TRDC", "TRGC1", "TRGC2"),] %>% GetAssayData(assay="SCT", slot="data") %>% colSums() %>% scales::rescale(to=c(0,1)) signature_score_2 = seurat_obj[c("CD8A", "CD8B"),] %>% GetAssayData(assay="SCT", slot="data") %>% colSums() %>% scales::rescale(to=c(0,1)) seurat_obj$signature_score = signature_score_1 - signature_score_2 # Subsample splits = colnames(seurat_obj) %>% split(seurat_obj$sample) min_size = splits %>% sapply(length) %>% min() cell_subset = splits %>% lapply(function(x) sample(x, min_size)) %>% unlist() seurat_obj = seurat_obj[,cell_subset] # Plot DefaultAssay(seurat_obj) = "SCT" seurat_obj %>% FeaturePlot( features = c("signature_score", "CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), split.by = "type", min.cutoff = 0.1 ) # Gating p = FeaturePlot(seurat_obj, features = "signature_score") seurat_obj$within_gate = colnames(seurat_obj) %in% CellSelector(plot = p) # Calculate proportions seurat_obj[[]] %>% add_count(sample, name = "tot_cells") %>% count(sample, type, tot_cells, within_gate) %>% mutate(frac = n/tot_cells) %>% filter(within_gate == T) %>% # Plot ggplot(aes(type, frac)) + geom_boxplot() + geom_point() ``` ================================================ FILE: dev/plot_seurat_structure.R ================================================ library( DataExplorer ) plot_str(pbmc_small, type = "r" ) plot_str(pbmc_small , type="d") plot_str(pbmc_small %>% join_features("CD3G"), type = "r" ) plot_str(pbmc_small %>% join_features("CD3G"), type = "d" ) ================================================ FILE: dev/use_cases_BioCAsia2021.R ================================================ library(tidyverse) library(glue) tibble( observation = glue("observation {1:100}"), variable_1 = rep("...", 100), variable_2 = rep("...", 100), variable_3 = map(1:100, ~ tibble(a = 1:10, b = 1:10)), variable_4 = map(1:100, ~ ggplot()), variable_5 = map(1:100, ~ lm(y ~ x, data = data.frame(x=1:10, y=1:10))), variable_6 = map(1:100, ~ pbmc_small), variable_7 = map(1:100, ~ tidySingleCellExperiment::pbmc_small) ) my_vector = seq(1, 20); # Imperative my_vector_modified = c() for(i in 1:length(my_vector)) { my_vector_modified[i] = my_vector[i] * 2L } # Functional my_vector_modified = my_vector |> map_int(~ .x * 2L) df = data.frame(a= rep("a", ncol(SeuratObject::pbmc_small)), b= rep("b", ncol(SeuratObject::pbmc_small))) rownames(df) = colnames(SeuratObject::pbmc_small) info = rep(1, ncol(SeuratObject::pbmc_small)) SeuratObject::pbmc_small |> AddMetaData(info, "info") colData(tidySingleCellExperiment::pbmc_small) |> cbind() # Subsampling single_cell_data |> add_count(sample, name = "tot_cells") |> mutate(median_cells = min(tot_cells)) |> nest(data = -c(sample, median_cells)) |> mutate(data = map2(data, median_cells, ~ sample_n(.x, .y, replace = TRUE))) |> unnest(data) # Define cell categories for analysis plotting single_cell_data |> mutate(cell_differentiation = case_when( curated_cell_type_pretty %in% c("B immature", "B mem") ~ "B", curated_cell_type_pretty %in% c("pDC") ~ "pDC", cell_differentiation == "lymphoid" ~ "T+NK", cell_differentiation == "myeloid" ~ "Myeloid" ) ) |> mutate( curated_cell_type_pretty = if_else( curated_cell_type_pretty %in% c("T gd1", "T gd2"), "gamma_delta" , curated_cell_type_pretty ) ) # Quality control # Gating gamma delta seurat_obj_sig = seurat_obj |> join_features( features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), shape = "wide", assay = "SCT" ) |> mutate(signature_score = scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) - scales::rescale(CD8A + CD8B, to=c(0,1)) ) |> Seurat::FeaturePlot(signature_score) |> mutate( gate = tidygate::gate_int(UMAP_1, UMAP_2) ) |> filter(gate == 1) %>% NormalizeData() |> FindVariableFeatures( nfeatures = 100) split_group(sample) %>% RunFastMNN() |> RunUMAP(reduction = "mnn", dims = 1:20) |> FindNeighbors( dims = 1:20, reduction = "mnn") |> FindClusters( resolution = 0.3) |> # gamma_delta_df = # readRDS("cancer_only_analyses/integrated_counts_curated.rds") |> # # {.x = (.); DefaultAssay(.x) = "RNA"; .x} |> # filter(curated_cell_type_pretty %in% c("T gd1", "T gd2")) |> # # { # .x= (.) # DefaultAssay(.x) = "RNA" # .x[["SCT"]] = NULL # .x[["integrated"]] = NULL # .x # } |> # NormalizeData() |> # FindVariableFeatures( nfeatures = 100) |> # mutate(batch_to_eliminate = sample) |> # nest(data = -batch_to_eliminate) |> # pull(data) |> # RunFastMNN() |> # RunUMAP(reduction = "mnn", dims = 1:20) |> # FindNeighbors( dims = 1:20, reduction = "mnn") |> # FindClusters( resolution = 0.3) |> # mutate(gate = tidygate::gate_int(UMAP_1, UMAP_2, how_many_gates = 2, gate_list = readRDS("file66175abbca44.rds"))) |> # tidysc::adjust_abundance(~ 1) |> # mutate(gamma_delta = case_when( # gate == 0 ~ "T gd vd2", # gate == 1 ~ "T gd vd1 LGALS1", # gate == 2 ~ "T gd vd1", # )) ================================================ FILE: dev/workflow_article.R ================================================ # Article workflow library(tidyverse) library(Seurat) library(SingleR) library(plotly) library(tidyHeatmap) library(tidyseurat) options(future.globals.maxSize = 50068 * 1024^2) PBMC <- readRDS("dev/PBMC_integrated.rds") # Polishing PBMC_clean <- PBMC_ # Clean groups mutate(Phase = Phase %>% str_remove("^phase_")) %>% # Extract sample extract(sample, "sample", "./data/seurat/outs/([a-zA-Z0-9]+)") # PBMC_clean = PBMC_clean %>% nest(data = -sample) %>% mutate(data = map(data, ~ .x %>% sample_n(200))) %>% unnest(data) # Scaling # PBMC_clean_scaled <- # PBMC_clean %>% # SCTransform(verbose = FALSE) %>% # FindVariableFeatures(verbose = FALSE) # Dimensionality reduction PBMC_clean_scaled_UMAP <- PBMC_clean %>% RunPCA(verbose = FALSE) %>% RunUMAP(reduction = "pca", dims = 1:15, n.components = 3L) # Clustering PBMC_clean_scaled_UMAP_cluster <- PBMC_clean_scaled_UMAP %>% FindNeighbors(verbose = FALSE) %>% FindClusters(method = "igraph", verbose = FALSE) # Cell_type classification Manual markers <- PBMC_clean_scaled_UMAP_cluster %>% FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>% group_by(cluster) %>% top_n(10, avg_logFC) # Cell_type classification Automatic # Get cell type reference data hpca <- HumanPrimaryCellAtlasData() # Infer cell identities cell_type_df <- # extracting counts from Seurat object GetAssayData(PBMC_clean_scaled_UMAP_cluster, layer = 'counts', assay = "SCT") %>% log1p() %>% # SingleR SingleR( ref = hpca, labels = hpca$label.main, method = "cluster", clusters = PBMC_clean_scaled_UMAP_cluster %>% pull(seurat_clusters) ) %>% # Formatting results as.data.frame() %>% as_tibble(rownames = "seurat_clusters") %>% select(seurat_clusters, first.labels) # Infer cell identities - cell wise cell_type_df_single <- # extracting counts from Seurat object GetAssayData(PBMC_clean_scaled_UMAP_cluster, layer = 'counts', assay = "SCT") %>% log1p() %>% # SingleR SingleR( ref = hpca, labels = hpca$label.main, method = "single" ) %>% # Formatting results as.data.frame() %>% as_tibble(rownames = "cell") %>% select(cell, first.labels_single = first.labels) # Join UMAP and cell type info PBMC_clean_scaled_UMAP_cluster_cell_type <- PBMC_clean_scaled_UMAP_cluster %>% left_join( cell_type_df, by = "seurat_clusters" ) %>% left_join( cell_type_df_single, by = "cell" ) # Markers PBMC_clean_scaled_UMAP_cluster_cell_type %>% FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>% group_by(cluster) %>% top_n(10, avg_logFC) %>% saveRDS("dev/PBMC_marker_df.rds") # Nesting PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(1000) %>% # Label lymphoid and myeloid tidyseurat::filter(first.labels != "Platelets") %>% tidyseurat::mutate(cell_class = if_else( `first.labels` %in% c("Macrophage", "Monocyte"), "myeloid", "lymphoid" ) ) %>% # Nesting nest(data = -cell_class) %>% # Identification of variable gene features mutate(variable_genes = map_chr( data, ~ .x %>% FindVariableFeatures() %>% RunPCA(verbose = FALSE) %>% FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>% pull(gene) %>% head() %>% paste(collapse=", ") )) # # Reorder columns # PBMC_clean_scaled_UMAP_cluster_cell_type %>% # count(seurat_clusters, first.labels_cluster = first.labels) saveRDS(PBMC_clean_scaled_UMAP_cluster_cell_type, "dev/PBMC_clean_scaled_UMAP_cluster_cell_type.rds") ================================================ FILE: dev/workflow_create_integrated_pbmc.R ================================================ # Article workflow library(tidyverse) library(Seurat) library(SingleR) library(plotly) # library(future) # plan(multisession, workers=10) options(future.globals.maxSize = 50068 * 1024^2) library(tidyseurat) friendly_cols <- dittoSeq::dittoColors() # PBMC = PBMC %>% # select(1:11, -old.ident) %>% # mutate(sample = sprintf("./data/seurat/outs/%s", sample)) %>% # mutate(Phase = sprintf("phase_%s", Phase)) PBMC <- readRDS("dev/PBMC.rds") PBMC_clean_scaled <- PBMC_ mutate(grouping = sample) %>% nest(sample_df = -grouping) %>% mutate(sample_df = map( sample_df,~ SCTransform(.x))) my_features = PBMC_clean_scaled$sample_df %>% SelectIntegrationFeatures(nfeatures = 2000) PBMC_integrated = PBMC_clean_scaled$sample_df %>% PrepSCTIntegration(anchor.features = my_features) %>% FindIntegrationAnchors( normalization.method = "SCT", anchor.features = my_features ) %>% IntegrateData(normalization.method = "SCT") PBMC_integrated %>% saveRDS("dev/PBMC_integrated.rds") ================================================ FILE: dev/workflow_figures.R ================================================ # Article workflow library(tidyverse) library(Seurat) library(SingleR) library(plotly) library(tidyHeatmap) library(ggalluvial) library(ggplot2) library(tidyseurat) options(future.globals.maxSize = 50068 * 1024^2) # Use colourblind-friendly colours friendly_cols <- dittoSeq::dittoColors() # Set theme custom_theme <- list( scale_fill_manual(values = friendly_cols), scale_color_manual(values = friendly_cols), theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size = 9), legend.position = "bottom", strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1) ) ) PBMC_clean_scaled_UMAP_cluster_cell_type <- readRDS("dev/PBMC_tidy_clean_scaled_UMAP_cluster_cell_type.rds") p1 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% pivot_longer( c(mito.fraction, S.Score, G2M.Score), names_to="property", values_to="Value" ) %>% mutate(property = factor(property, levels = c("mito.fraction", "G2M.Score", "S.Score"))) %>% ggplot(aes(sample, Value)) + geom_boxplot(outlier.size = 0.5 ) + facet_wrap(~property, scales = "free_y" ) + custom_theme + theme(aspect.ratio=1) p2 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(20000) %>% ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) + geom_point(size=0.05, alpha=0.2) + custom_theme + theme(aspect.ratio=1) PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(20000) %>% plot_ly( x = ~`UMAP_1`, y = ~`UMAP_2`, z = ~`UMAP_3`, color = ~seurat_clusters, colors = friendly_cols[1:24],sizes = 50, size = 1 ) markers = readRDS("dev/PBMC_marker_df.rds") p3 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% arrange(first.labels) %>% mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>% join_features(features=c("CD3D", "HLA-DRB1")) %>% ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) + geom_density_ridges(bandwidth = 0.2) + facet_wrap(~ .feature, nrow = 2) + coord_flip() + custom_theme # Plot heatmap p4 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(2000) %>% DoHeatmap( features = markers$gene, group.colors = friendly_cols ) p5 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(1000) %>% join_features(features=markers$gene) %>% mutate(seurat_clusters = as.integer(seurat_clusters)) %>% filter(seurat_clusters<10) %>% group_by(seurat_clusters) %>% # Plot heatmap heatmap( .row = .feature, .column = .cell, .value = .abundance_SCT, palette_grouping = list(rep("black",9)), palette_value = circlize::colorRamp2(c(-1.5, 0, 1.5), c("purple", "black", "yellow")), # ComplexHeatmap parameters row_gap = unit(0.1, "mm"), column_gap = unit(0.1, "mm") ) %>% # Add annotation add_tile(sample, palette = friendly_cols[1:7]) %>% add_point(PC_1) p6 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% tidyseurat::unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>% pivot_longer( c(seurat_clusters, first.labels_single), names_to = "classification", values_to = "value" ) %>% ggplot(aes(x = classification, stratum = value, alluvium = cell, fill = first.labels, label = value)) + scale_x_discrete(expand = c(1, 1)) + geom_flow() + geom_stratum(alpha = .5) + # geom_text(stat = "stratum", size = 3) + geom_text_repel(stat = "stratum", size = 3, nudge_x = 0.05, direction = "y", angle = 0, vjust = 0, segment.size = 0.2 ) + scale_fill_manual(values = friendly_cols) + #guides(fill = FALSE) + coord_flip() + theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size = 9), legend.position = "bottom", strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1) ) ggsave("dev/summary_statistics.pdf", p1, device = "pdf", width = 183/3, height = 50, units = "mm", useDingbats=FALSE) ggsave("dev/UMAP_2D.pdf", p2, device = "pdf", width = 89, height = 100, units = "mm", useDingbats=FALSE) ggsave("dev/violin.pdf", p3, device = "pdf", width = 89, height = 100, units = "mm", useDingbats=FALSE) save_pdf(p5, filename = "dev/UMAPheatmap.pdf", width = 183+50, height = 150, units = "mm") ggsave("dev/alluvial.pdf", p6, device = "pdf", width = 89, height = 100, units = "mm", useDingbats=FALSE) ================================================ FILE: inst/CITATION ================================================ citHeader("To cite tidyseurat in publications use:") bibentry( bibtype = "Article", title = "Interfacing Seurat with the R tidy universe", author = as.person(" Stefano Mangiola [aut], Maria A Doyle [aut], Anthony T Papenfuss Anthony [aut]"), journal = "Bioinformatics", year = "2021", volume = "btab404", publisher = "Oxford Press", url = "https://doi.org/10.1093/bioinformatics/btab404" ) ================================================ FILE: inst/NEWS.rd ================================================ \name{NEWS} \title{News for Package \pkg{tidyseurat}} \section{Changes in version 0.8.9}{ \itemize{ \item CRAN fix: \code{add_count()} now uses \code{count(..., .add = TRUE)} instead of \code{dplyr::add_count()}, avoiding the defunct \code{.drop} argument (dplyr 1.0.0+). }} \section{Changes in version 0.8.8}{ \itemize{ \item Removed deprecated \code{.drop} argument from \code{add_count.Seurat()} to align with dplyr's API changes \item Added generic methods for \code{add_count()} including a default method }} \section{Changes in version 0.5.1, Development}{ \itemize{ \item Change default shape parameter in join_features() and join_transcripts() from "long" to "wide", resulting in a return type of Seurat by default \item Update documentation and tests accordingly }} \section{Changes in version 0.5.0, CRAN Release}{ \itemize{ \item Rely of ttservice package for shared function with tidySingleCellExperiment to avoid clash \item Use .cell for cell column name to avoid errors when cell column is defined by the user }} ================================================ FILE: man/add_class.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utilities.R \name{add_class} \alias{add_class} \title{Add class to abject} \usage{ add_class(var, name) } \arguments{ \item{var}{A tibble} \item{name}{A character name of the attribute} } \value{ A tibble with an additional attribute } \description{ Add class to abject } \keyword{internal} ================================================ FILE: man/aggregate_cells.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{aggregate_cells} \alias{aggregate_cells} \alias{aggregate_cells,Seurat-method} \title{Aggregate cells} \usage{ \S4method{aggregate_cells}{Seurat}( .data, .sample = NULL, slot = "data", assays = NULL, aggregation_function = Matrix::rowSums, ... ) } \arguments{ \item{.data}{A tidyseurat object} \item{.sample}{A vector of variables by which cells are aggregated} \item{slot}{The slot to which the function is applied} \item{assays}{The assay to which the function is applied} \item{aggregation_function}{The method of cell-feature value aggregation} \item{...}{Used for future extendibility} } \value{ A tibble object } \description{ Combine cells into groups based on shared variables and aggregate feature counts. } \examples{ data(pbmc_small) pbmc_small_pseudo_bulk <- pbmc_small |> aggregate_cells(c(groups, letter.idents), assays="RNA") } ================================================ FILE: man/arrange.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{arrange} \alias{arrange} \alias{arrange.Seurat} \title{Order rows using column values} \usage{ \method{arrange}{Seurat}(.data, ..., .by_group = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables, or functions of variables. Use \code{\link[dplyr:desc]{desc()}} to sort a variable in descending order.} \item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to grouped data frames only.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item All rows appear in the output, but (usually) in a different place. \item Columns are not modified. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ \code{arrange()} orders the rows of a data frame by the values of selected columns. Unlike other dplyr verbs, \code{arrange()} largely ignores grouping; you need to explicitly mention grouping variables (or use \code{.by_group = TRUE}) in order to group by them, and functions of variables are evaluated once per data frame, not once per group. } \details{ \subsection{Missing values}{ Unlike base sorting with \code{sort()}, \code{NA} are: \itemize{ \item always sorted to the end for local data, even when wrapped with \code{desc()}. \item treated differently for remote data, depending on the backend. } } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}. } \examples{ data(pbmc_small) pbmc_small |> arrange(nFeature_RNA) } \seealso{ Other single table verbs: \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} ================================================ FILE: man/as_tibble.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tibble_methods.R \name{as_tibble} \alias{as_tibble} \alias{as_tibble.Seurat} \title{Coerce lists, matrices, and more to data frames} \usage{ \method{as_tibble}{Seurat}( x, ..., .name_repair = c("check_unique", "unique", "universal", "minimal"), rownames = NULL ) } \arguments{ \item{x}{A data frame, list, matrix, or other object that could reasonably be coerced to a tibble.} \item{...}{Unused, for extensibility.} \item{.name_repair}{Treatment of problematic column names: \itemize{ \item \code{"minimal"}: No name repair or checks, beyond basic existence, \item \code{"unique"}: Make sure names are unique and not empty, \item \code{"check_unique"}: (default value), no name repair, but check they are \code{unique}, \item \code{"universal"}: Make the names \code{unique} and syntactic \item \code{"unique_quiet"}: Same as \code{"unique"}, but "quiet" \item \code{"universal_quiet"}: Same as \code{"universal"}, but "quiet" \item a function: apply custom name repair (e.g., \code{.name_repair = make.names} for names in the style of base R). \item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}} } This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. See there for more details on these terms and the strategies used to enforce them.} \item{rownames}{How to treat existing row names of a data frame or matrix: \itemize{ \item \code{NULL}: remove row names. This is the default. \item \code{NA}: keep row names. \item A string: the name of a new column. Existing rownames are transferred into this column and the \code{row.names} attribute is deleted. No name repair is applied to the new column name, even if \code{x} already contains a column of that name. Use \code{as_tibble(rownames_to_column(...))} to safeguard against this case. } Read more in \link[tibble]{rownames}.} } \value{ `tibble` } \description{ \code{as_tibble()} turns an existing object, such as a data frame or matrix, into a so-called tibble, a data frame with class \code{\link[tibble]{tbl_df}}. This is in contrast with \code{\link[tibble:tibble]{tibble()}}, which builds a tibble from individual columns. \code{as_tibble()} is to \code{\link[tibble:tibble]{tibble()}} as \code{\link[base:as.data.frame]{base::as.data.frame()}} is to \code{\link[base:data.frame]{base::data.frame()}}. \code{as_tibble()} is an S3 generic, with methods for: \itemize{ \item \code{\link[base:data.frame]{data.frame}}: Thin wrapper around the \code{list} method that implements tibble's treatment of \link[tibble]{rownames}. \item \code{\link[base:matrix]{matrix}}, \code{\link[stats:poly]{poly}}, \code{\link[stats:ts]{ts}}, \code{\link[base:table]{table}} \item Default: Other inputs are first coerced with \code{\link[base:as.data.frame]{base::as.data.frame()}}. } \code{as_tibble_row()} converts a vector to a tibble with one row. If the input is a list, all elements must have size one. \code{as_tibble_col()} converts a vector to a tibble with one column. } \section{Row names}{ The default behavior is to silently remove row names. New code should explicitly convert row names to a new column using the \code{rownames} argument. For existing code that relies on the retention of row names, call \code{pkgconfig::set_config("tibble::rownames" = NA)} in your script or in your package's \code{\link[=.onLoad]{.onLoad()}} function. } \section{Life cycle}{ Using \code{as_tibble()} for vectors is superseded as of version 3.0.0, prefer the more expressive \code{as_tibble_row()} and \code{as_tibble_col()} variants for new code. } \examples{ data(pbmc_small) pbmc_small |> as_tibble() } \seealso{ \code{\link[tibble:tibble]{tibble()}} constructs a tibble from individual columns. \code{\link[tibble:enframe]{enframe()}} converts a named vector to a tibble with a column of names and column of values. Name repair is implemented using \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}. } ================================================ FILE: man/bind_rows.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{bind_rows} \alias{bind_rows} \alias{bind_rows.Seurat} \alias{bind_cols.Seurat} \alias{bind_cols} \title{Efficiently bind multiple data frames by row and column} \usage{ \method{bind_rows}{Seurat}(..., .id = NULL, add.cell.ids = NULL) \method{bind_cols}{Seurat}(..., .id = NULL) } \arguments{ \item{...}{Data frames to combine. Each argument can either be a data frame, a list that could be a data frame, or a list of data frames. When row-binding, columns are matched by name, and any missing columns will be filled with NA. When column-binding, rows are matched by position, so all data frames must have the same number of rows. To match by value, not position, see mutate-joins.} \item{.id}{Data frame identifier. When `.id` is supplied, a new column of identifiers is created to link each row to its original data frame. The labels are taken from the named arguments to `bind_rows()`. When a list of data frames is supplied, the labels are taken from the names of the list. If no names are found a numeric sequence is used instead.} \item{add.cell.ids}{from Seurat 3.0 A character vector of length(x = c(x, y)). Appends the corresponding values to the start of each objects' cell names.} } \value{ `bind_rows()` and `bind_cols()` return the same type as the first input, either a data frame, `tbl_df`, or `grouped_df`. `bind_rows()` and `bind_cols()` return the same type as the first input, either a data frame, `tbl_df`, or `grouped_df`. } \description{ This is an efficient implementation of the common pattern of `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many data frames into one. This is an efficient implementation of the common pattern of `do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many data frames into one. } \details{ The output of `bind_rows()` will contain a column if that column appears in any of the inputs. The output of `bind_rows()` will contain a column if that column appears in any of the inputs. } \examples{ data(pbmc_small) tt <- pbmc_small ttservice::bind_rows(tt, tt) tt_bind <- tt |> select(nCount_RNA ,nFeature_RNA) tt |> ttservice::bind_cols(tt_bind) } ================================================ FILE: man/cell_type_df.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{cell_type_df} \alias{cell_type_df} \title{Cell types of 80 PBMC single cells} \format{ A tibble containing 80 rows and 2 columns. Cells are a subsample of the Peripheral Blood Mononuclear Cells (PBMC) dataset of 2,700 single cell. Cell types were identified with SingleR. \describe{ \item{cell}{cell identifier, barcode} \item{first.labels}{cell type} } } \source{ \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html} } \usage{ data(cell_type_df) } \value{ `tibble` } \description{ A dataset containing the barcodes and cell types of 80 PBMC single cells. } \keyword{datasets} ================================================ FILE: man/count.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{count} \alias{count} \alias{count.Seurat} \alias{add_count} \alias{add_count.default} \alias{add_count.Seurat} \title{Count observations by group} \usage{ \method{count}{Seurat}( x, ..., wt = NULL, sort = FALSE, name = NULL, .drop = group_by_drop_default(x) ) add_count(x, ..., wt = NULL, sort = FALSE, name = NULL) \method{add_count}{default}(x, ..., wt = NULL, sort = FALSE, name = NULL) \method{add_count}{Seurat}(x, ..., wt = NULL, sort = FALSE, name = NULL) } \arguments{ \item{x}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr).} \item{...}{<[`data-masking`][dplyr_data_masking]> Variables to group by.} \item{wt}{<[`data-masking`][dplyr_data_masking]> Frequency weights. Can be `NULL` or a variable: * If `NULL` (the default), counts the number of rows in each group. * If a variable, computes `sum(wt)` for each group.} \item{sort}{If `TRUE`, will show the largest groups at the top.} \item{name}{The name of the new column in the output. If omitted, it will default to `n`. If there's already a column called `n`, it will error, and require you to specify the name.} \item{.drop}{For `count()`: if `FALSE` will include counts for empty groups (i.e. for levels of factors that don't exist in the data).} } \value{ An object of the same type as `.data`. `count()` and `add_count()` group transiently, so the output has the same groups as the input. } \description{ `count()` lets you quickly count the unique values of one or more variables: `df %>% count(a, b)` is roughly equivalent to `df %>% group_by(a, b) %>% summarise(n = n())`. `count()` is paired with `tally()`, a lower-level helper that is equivalent to `df %>% summarise(n = n())`. Supply `wt` to perform weighted counts, switching the summary from `n = n()` to `n = sum(wt)`. `add_count()` and `add_tally()` are equivalents to `count()` and `tally()` but use `mutate()` instead of `summarise()` so that they add a new column with group-wise counts. } \examples{ data(pbmc_small) pbmc_small |> count(groups) } ================================================ FILE: man/distinct.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{distinct} \alias{distinct} \alias{distinct.Seurat} \title{Keep distinct/unique rows} \usage{ \method{distinct}{Seurat}(.data, ..., .keep_all = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to use when determining uniqueness. If there are multiple rows for a given combination of inputs, only the first row will be preserved. If omitted, will use all variables in the data frame.} \item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}. If a combination of \code{...} is not distinct, this keeps the first row of values.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are a subset of the input but appear in the same order. \item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}. Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ Keep only unique/distinct rows from a data frame. This is similar to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}. } \examples{ data("pbmc_small") pbmc_small |> distinct(groups) } ================================================ FILE: man/drop_class.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utilities.R \name{drop_class} \alias{drop_class} \title{Remove class to abject} \usage{ drop_class(var, name) } \arguments{ \item{var}{A tibble} \item{name}{A character name of the class} } \value{ A tibble with an additional attribute } \description{ Remove class to abject } \keyword{internal} ================================================ FILE: man/extract.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R \name{extract} \alias{extract} \alias{extract.Seurat} \title{Extract a character column into multiple columns using regular expression groups} \usage{ \method{extract}{Seurat}( data, col, into, regex = "([[:alnum:]]+)", remove = TRUE, convert = FALSE, ... ) } \arguments{ \item{data}{A data frame.} \item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.} \item{into}{Names of new variables to create as character vector. Use \code{NA} to omit the variable in the output.} \item{regex}{A string representing a regular expression used to extract the desired values. There should be one group (defined by \verb{()}) for each element of \code{into}.} \item{remove}{If \code{TRUE}, remove input column from output data frame.} \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with \code{as.is = TRUE} on new columns. This is useful if the component columns are integer, numeric or logical. NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.} \item{...}{Additional arguments passed on to methods.} } \value{ `tidyseurat` } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{extract()} has been superseded in favour of \code{\link[tidyr:separate_wider_regex]{separate_wider_regex()}} because it has a more polished API and better handling of problems. Superseded functions will not go away, but will only receive critical bug fixes. Given a regular expression with capturing groups, \code{extract()} turns each group into a new column. If the groups don't match, or the input is NA, the output will be NA. } \examples{ data(pbmc_small) pbmc_small |> extract(groups, into="g", regex="g([0-9])", convert=TRUE) } \seealso{ \code{\link[tidyr:separate]{separate()}} to split up by a separator. } ================================================ FILE: man/filter.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{filter} \alias{filter} \alias{filter.Seurat} \title{Keep or drop rows that match a condition} \usage{ \method{filter}{Seurat}(.data, ..., .preserve = FALSE) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that return a logical vector, defined in terms of the variables in \code{.data}. If multiple expressions are included, they are combined with the \code{&} operator. To combine expressions using \code{|} instead, wrap them in \code{\link[dplyr:when_any]{when_any()}}. Only rows for which all expressions evaluate to \code{TRUE} are kept (for \code{filter()}) or dropped (for \code{filter_out()}).} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are a subset of the input, but appear in the same order. \item Columns are not modified. \item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}). \item Data frame attributes are preserved. } } \description{ These functions are used to subset a data frame, applying the expressions in \code{...} to determine which rows should be kept (for \code{filter()}) or dropped ( for \code{filter_out()}). Multiple conditions can be supplied separated by a comma. These will be combined with the \code{&} operator. To combine comma separated conditions using \code{|} instead, wrap them in \code{\link[dplyr:when_any]{when_any()}}. Both \code{filter()} and \code{filter_out()} treat \code{NA} like \code{FALSE}. This subtle behavior can impact how you write your conditions when missing values are involved. See the section on \verb{Missing values} for important details and examples. } \section{Missing values}{ Both \code{filter()} and \code{filter_out()} treat \code{NA} like \code{FALSE}. This results in the following behavior: \itemize{ \item \code{filter()} \emph{drops} both \code{NA} and \code{FALSE}. \item \code{filter_out()} \emph{keeps} both \code{NA} and \code{FALSE}. } This means that \verb{filter(data, ) + filter_out(data, )} captures every row within \code{data} exactly once. The \code{NA} handling of these functions has been designed to match your \emph{intent}. When your intent is to keep rows, use \code{filter()}. When your intent is to drop rows, use \code{filter_out()}. For example, if your goal with this \code{cars} data is to "drop rows where the \code{class} is suv", then you might write this in one of two ways: \if{html}{\out{
}}\preformatted{cars <- tibble(class = c("suv", NA, "coupe")) cars #> # A tibble: 3 x 1 #> class #> #> 1 suv #> 2 #> 3 coupe }\if{html}{\out{
}} \if{html}{\out{
}}\preformatted{cars |> filter(class != "suv") #> # A tibble: 1 x 1 #> class #> #> 1 coupe }\if{html}{\out{
}} \if{html}{\out{
}}\preformatted{cars |> filter_out(class == "suv") #> # A tibble: 2 x 1 #> class #> #> 1 #> 2 coupe }\if{html}{\out{
}} Note how \code{filter()} drops the \code{NA} rows even though our goal was only to drop \code{"suv"} rows, but \code{filter_out()} matches our intuition. To generate the correct result with \code{filter()}, you'd need to use: \if{html}{\out{
}}\preformatted{cars |> filter(class != "suv" | is.na(class)) #> # A tibble: 2 x 1 #> class #> #> 1 #> 2 coupe }\if{html}{\out{
}} This quickly gets unwieldy when multiple conditions are involved. In general, if you find yourself: \itemize{ \item Using "negative" operators like \code{!=} or \code{!} \item Adding in \code{NA} handling like \verb{| is.na(col)} or \verb{& !is.na(col)} } then you should consider if swapping to the other filtering variant would make your conditions simpler. \subsection{Comparison to base subsetting}{ Base subsetting with \code{[} doesn't treat \code{NA} like \code{TRUE} or \code{FALSE}. Instead, it generates a fully missing row, which is different from how both \code{filter()} and \code{filter_out()} work. \if{html}{\out{
}}\preformatted{cars <- tibble(class = c("suv", NA, "coupe"), mpg = c(10, 12, 14)) cars #> # A tibble: 3 x 2 #> class mpg #> #> 1 suv 10 #> 2 12 #> 3 coupe 14 }\if{html}{\out{
}} \if{html}{\out{
}}\preformatted{cars[cars$class == "suv",] #> # A tibble: 2 x 2 #> class mpg #> #> 1 suv 10 #> 2 NA cars |> filter(class == "suv") #> # A tibble: 1 x 2 #> class mpg #> #> 1 suv 10 }\if{html}{\out{
}} } } \section{Useful filter functions}{ There are many functions and operators that are useful when constructing the expressions used to filter the data: \itemize{ \item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc \item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}} \item \code{\link[=is.na]{is.na()}} \item \code{\link[dplyr:between]{between()}}, \code{\link[dplyr:near]{near()}} \item \code{\link[dplyr:when_any]{when_any()}}, \code{\link[dplyr:when_all]{when_all()}} } } \section{Grouped tibbles}{ Because filtering expressions are computed within groups, they may yield different results on grouped tibbles. This will be the case as soon as an aggregating, lagging, or ranking function is involved. Compare this ungrouped filtering: \if{html}{\out{
}}\preformatted{starwars |> filter(mass > mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} With the grouped equivalent: \if{html}{\out{
}}\preformatted{starwars |> filter(mass > mean(mass, na.rm = TRUE), .by = gender) }\if{html}{\out{
}} In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row to the global average (taken over the whole data set), keeping only the rows with \code{mass} greater than this global average. In contrast, the grouped version calculates the average mass separately for each \code{gender} group, and keeps rows with \code{mass} greater than the relevant within-gender average. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}. } \examples{ data("pbmc_small") pbmc_small |> filter(groups == "g1") # Learn more in ?dplyr_eval } \seealso{ Other single table verbs: \code{\link[dplyr]{arrange}()}, \code{\link[dplyr]{mutate}()}, \code{\link[dplyr]{reframe}()}, \code{\link[dplyr]{rename}()}, \code{\link[dplyr]{select}()}, \code{\link[dplyr]{slice}()}, \code{\link[dplyr]{summarise}()} } ================================================ FILE: man/formatting.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/print_method.R \name{formatting} \alias{formatting} \alias{print.Seurat} \alias{print} \title{Printing tibbles} \usage{ \method{print}{Seurat}(x, ..., n = NULL, width = NULL, n_extra = NULL) } \arguments{ \item{x}{Object to format or print.} \item{...}{Passed on to \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.} \item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows if less than the \code{print_max} \link[pillar:pillar_options]{option}. Otherwise, will print as many rows as specified by the \code{print_min} \link[pillar:pillar_options]{option}.} \item{width}{Width of text output to generate. This defaults to \code{NULL}, which means use the \code{width} \link[pillar:pillar_options]{option}.} \item{n_extra}{Number of extra columns to print abbreviated information for, if the width is too small for the entire tibble. If `NULL`, the default, will print information about at most `tibble.max_extra_cols` extra columns.} } \value{ Prints a message to the console describing the contents of the `tidyseurat`. } \description{ One of the main features of the \code{tbl_df} class is the printing: \itemize{ \item Tibbles only print as many rows and columns as fit on one screen, supplemented by a summary of the remaining rows and columns. \item Tibble reveals the type of each column, which keeps the user informed about whether a variable is, e.g., \verb{} or \verb{} (character versus factor). See \code{vignette("types")} for an overview of common type abbreviations. } Printing can be tweaked for a one-off call by calling \code{print()} explicitly and setting arguments like \code{n} and \code{width}. More persistent control is available by setting the options described in \link[pillar:pillar_options]{pillar::pillar_options}. See also \code{vignette("digits")} for a comparison to base options, and \code{vignette("numbers")} that showcases \code{\link[tibble:num]{num()}} and \code{\link[tibble:char]{char()}} for creating columns with custom formatting options. As of tibble 3.1.0, printing is handled entirely by the \pkg{pillar} package. If you implement a package that extends tibble, the printed output can be customized in various ways. See \code{vignette("extending", package = "pillar")} for details, and \link[pillar:pillar_options]{pillar::pillar_options} for options that control the display in the console. } \examples{ data(pbmc_small) print(pbmc_small) } ================================================ FILE: man/fragments/intro.Rmd ================================================ **Brings Seurat to the tidyverse!** website: [stemangiola.github.io/tidyseurat/](https://stemangiola.github.io/tidyseurat/) Please also have a look at - [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy single-cell RNA sequencing analysis - [tidySummarizedExperiment](https://tidyomics.github.io/tidySummarizedExperiment/) for tidy bulk RNA sequencing analysis - [tidybulk](https://tidyomics.github.io/tidybulk/) for tidy bulk RNA-seq analysis - [tidygate](https://github.com/stemangiola/tidygate/) for adding custom gate information to your tibble - [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles ```{r, echo=FALSE, include=FALSE, } library(knitr) knitr::opts_chunk$set(warning = FALSE, message = FALSE) ``` ![visual cue](`r visual_cue`) # Introduction tidyseurat provides a bridge between the Seurat single-cell package [@butler2018integrating; @stuart2019comprehensive] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the Seurat object as a tidyverse tibble, and provides Seurat-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions. ## Functions/utilities available Seurat-compatible Functions | Description ------------ | ------------- `all` | tidyverse Packages | Description ------------ | ------------- `dplyr` | All `dplyr` APIs like for any tibble `tidyr` | All `tidyr` APIs like for any tibble `ggplot2` | `ggplot` like for any tibble `plotly` | `plot_ly` like for any tibble Utilities | Description ------------ | ------------- `tidy` | Add `tidyseurat` invisible layer over a Seurat object `as_tibble` | Convert cell-wise information to a `tbl_df` `join_features` | Add feature-wise information, returns a `tbl_df` `aggregate_cells`| Aggregate cell gene-transcription abundance as pseudobulk tissue | ## Installation From CRAN ```{r eval=FALSE} install.packages("tidyseurat") ``` From Github (development) ```{r, eval=FALSE} devtools::install_github("stemangiola/tidyseurat") ``` ```{r} library(dplyr) library(tidyr) library(purrr) library(magrittr) library(ggplot2) library(Seurat) library(tidyseurat) ``` ## Create `tidyseurat`, the best of both worlds! This is a seurat object but it is evaluated as tibble. So it is fully compatible both with Seurat and tidyverse APIs. ```{r} pbmc_small = SeuratObject::pbmc_small ``` **It looks like a tibble** ```{r} pbmc_small ``` **But it is a Seurat object after all** ```{r} pbmc_small@assays ``` # Preliminary plots Set colours and theme for plots. ```{r} # Use colourblind-friendly colours friendly_cols <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC") # Set theme my_theme <- list( scale_fill_manual(values = friendly_cols), scale_color_manual(values = friendly_cols), theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size = 12), legend.position = "bottom", aspect.ratio = 1, strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)) ) ) ``` We can treat `pbmc_small` effectively as a normal tibble for plotting. Here we plot number of features per cell. ```{r plot1} pbmc_small %>% ggplot(aes(nFeature_RNA, fill = groups)) + geom_histogram() + my_theme ``` Here we plot total features per cell. ```{r plot2} pbmc_small %>% ggplot(aes(groups, nCount_RNA, fill = groups)) + geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.1) + my_theme ``` Here we plot abundance of two features for each group. ```{r} pbmc_small %>% join_features(features = c("HLA-DRA", "LYZ"), shape = "long") %>% ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) + geom_boxplot(outlier.shape = NA) + geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) + scale_y_log10() + my_theme ``` # Preprocess the dataset Also you can treat the object as Seurat object and proceed with data processing. ```{r preprocess} pbmc_small_pca <- pbmc_small %>% SCTransform(verbose = FALSE) %>% FindVariableFeatures(verbose = FALSE) %>% RunPCA(verbose = FALSE) pbmc_small_pca ``` If a tool is not included in the tidyseurat collection, we can use `as_tibble` to permanently convert `tidyseurat` into tibble. ```{r pc_plot} pbmc_small_pca %>% as_tibble() %>% select(contains("PC"), everything()) %>% GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) + my_theme ``` # Identify clusters We proceed with cluster identification with Seurat. ```{r cluster} pbmc_small_cluster <- pbmc_small_pca %>% FindNeighbors(verbose = FALSE) %>% FindClusters(method = "igraph", verbose = FALSE) pbmc_small_cluster ``` Now we can interrogate the object as if it was a regular tibble data frame. ```{r cluster count} pbmc_small_cluster %>% count(groups, seurat_clusters) ``` We can identify cluster markers using Seurat. `r if (packageVersion("Seurat") >= package_version("4.0.0")) {""}` `r if (packageVersion("Seurat") < package_version("4.0.0")) {""}` # Reduce dimensions We can calculate the first 3 UMAP dimensions using the Seurat framework. ```{r umap, eval=FALSE} pbmc_small_UMAP <- pbmc_small_cluster %>% RunUMAP(reduction = "pca", dims = 1:15, n.components = 3L) ``` And we can plot them using 3D plot using plotly. ```{r umap plot, eval=FALSE} pbmc_small_UMAP %>% plot_ly( x = ~`UMAP_1`, y = ~`UMAP_2`, z = ~`UMAP_3`, color = ~seurat_clusters, colors = friendly_cols[1:4] ) ``` ![screenshot plotly](`r screenshot`) ## Cell type prediction We can infer cell type identities using *SingleR* [@aran2019reference] and manipulate the output using tidyverse. ```{r eval=FALSE} # Get cell type reference data blueprint <- celldex::BlueprintEncodeData() # Infer cell identities cell_type_df <- GetAssayData(pbmc_small_UMAP, slot = 'counts', assay = "SCT") %>% log1p() %>% Matrix::Matrix(sparse = TRUE) %>% SingleR::SingleR( ref = blueprint, labels = blueprint$label.main, method = "single" ) %>% as.data.frame() %>% as_tibble(rownames = "cell") %>% select(cell, first.labels) ``` ```{r, eval=FALSE} # Join UMAP and cell type info pbmc_small_cell_type <- pbmc_small_UMAP %>% left_join(cell_type_df, by = "cell") # Reorder columns pbmc_small_cell_type %>% select(cell, first.labels, everything()) ``` We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification. ```{r, eval=FALSE} pbmc_small_cell_type %>% count(seurat_clusters, first.labels) ``` We can easily reshape the data for building information-rich faceted plots. ```{r eval=FALSE} pbmc_small_cell_type %>% # Reshape and add classifier column pivot_longer( cols = c(seurat_clusters, first.labels), names_to = "classifier", values_to = "label" ) %>% # UMAP plots for cell type and cluster ggplot(aes(UMAP_1, UMAP_2, color = label)) + geom_point() + facet_wrap(~classifier) + my_theme ``` We can easily plot gene correlation per cell category, adding multi-layer annotations. ```{r eval=FALSE} pbmc_small_cell_type %>% # Add some mitochondrial abundance values mutate(mitochondrial = rnorm(n())) %>% # Plot correlation join_features(features = c("CST3", "LYZ"), shape = "wide") %>% ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) + geom_point() + facet_wrap(~first.labels, scales = "free") + scale_x_log10() + scale_y_log10() + my_theme ``` # Nested analyses A powerful tool we can use with tidyseurat is `nest`. We can easily perform independent analyses on subsets of the dataset. First we classify cell types in lymphoid and myeloid; then, nest based on the new classification ```{r eval=FALSE} pbmc_small_nested <- pbmc_small_cell_type %>% filter(first.labels != "Erythrocytes") %>% mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>% nest(data = -cell_class) pbmc_small_nested ``` Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly. ```{r eval=FALSE} pbmc_small_nested_reanalysed <- pbmc_small_nested %>% mutate(data = map( data, ~ .x %>% FindVariableFeatures(verbose = FALSE) %>% RunPCA(npcs = 10, verbose = FALSE) %>% FindNeighbors(verbose = FALSE) %>% FindClusters(method = "igraph", verbose = FALSE) %>% RunUMAP(reduction = "pca", dims = 1:10, n.components = 3L, verbose = FALSE) )) pbmc_small_nested_reanalysed ``` Now we can unnest and plot the new classification. ```{r eval=FALSE} pbmc_small_nested_reanalysed %>% # Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets. mutate(data = map(data, ~ .x %>% as_tibble())) %>% unnest(data) %>% # Define unique clusters unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>% # Plotting ggplot(aes(UMAP_1, UMAP_2, color = cluster)) + geom_point() + facet_wrap(~cell_class) + my_theme ``` # Aggregating cells Sometimes, it is necessary to aggregate the gene-transcript abundance from a group of cells into a single value. For example, when comparing groups of cells across different samples with fixed-effect models. In tidyseurat, cell aggregation can be achieved using the `aggregate_cells` function. ```{r, eval=FALSE} pbmc_small %>% aggregate_cells(groups, assays = "RNA") ``` ================================================ FILE: man/full_join.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{full_join} \alias{full_join} \alias{full_join.Seurat} \title{Mutating joins} \usage{ \method{full_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[dplyr:cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} \item{...}{Other parameters passed onto methods.} } \value{ An object of the same type as \code{x} (including the same groups). The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item The rows are affect by the join type. \itemize{ \item \code{inner_join()} returns matched \code{x} rows. \item \code{left_join()} returns all \code{x} rows. \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. } \item Output columns include all columns from \code{x} and all non-key columns from \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate these as well. \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their common type between \code{x} and \code{y}. } } \description{ Mutating joins add columns from \code{y} to \code{x}, matching observations based on the keys. There are four mutating joins: the inner join, and the three outer joins. \subsection{Inner join}{ An \code{inner_join()} only keeps observations from \code{x} that have a matching key in \code{y}. The most important property of an inner join is that unmatched rows in either input are not included in the result. This means that generally inner joins are not appropriate in most analyses, because it is too easy to lose observations. } \subsection{Outer joins}{ The three outer joins keep observations that appear in at least one of the data frames: \itemize{ \item A \code{left_join()} keeps all observations in \code{x}. \item A \code{right_join()} keeps all observations in \code{y}. \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. } } } \section{Many-to-many relationships}{ By default, dplyr guards against many-to-many relationships in equality joins by throwing a warning. These occur when both of the following are true: \itemize{ \item A row in \code{x} matches multiple rows in \code{y}. \item A row in \code{y} matches multiple rows in \code{x}. } This is typically surprising, as most joins involve a relationship of one-to-one, one-to-many, or many-to-one, and is often the result of an improperly specified join. Many-to-many relationships are particularly problematic because they can result in a Cartesian explosion of the number of rows returned from the join. If a many-to-many relationship is expected, silence this warning by explicitly setting \code{relationship = "many-to-many"}. In production code, it is best to preemptively set \code{relationship} to whatever relationship you expect to exist between the keys of \code{x} and \code{y}, as this forces an error to occur immediately if the data doesn't align with your expectations. Inequality joins typically result in many-to-many relationships by nature, so they don't warn on them by default, but you should still take extra care when specifying an inequality join, because they also have the capability to return a large number of rows. Rolling joins don't warn on many-to-many relationships either, but many rolling joins follow a many-to-one relationship, so it is often useful to set \code{relationship = "many-to-one"} to enforce this. Note that in SQL, most database providers won't let you specify a many-to-many relationship between two tables, instead requiring that you create a third \emph{junction table} that results in two one-to-many relationships instead. } \section{Methods}{ These functions are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. } } \examples{ data(pbmc_small) tt <- pbmc_small tt |> full_join(tibble::tibble(groups="g1", other=1:4)) } \seealso{ Other joins: \code{\link[dplyr]{cross_join}()}, \code{\link[dplyr]{filter-joins}}, \code{\link[dplyr]{nest_join}()} } ================================================ FILE: man/get_abundance_sc_long.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utilities.R \name{get_abundance_sc_long} \alias{get_abundance_sc_long} \title{get abundance long} \usage{ get_abundance_sc_long( .data, features = NULL, all = FALSE, exclude_zeros = FALSE, assay = Assays(.data), slot = "data" ) } \arguments{ \item{.data}{A tidyseurat} \item{features}{A character} \item{all}{A boolean} \item{exclude_zeros}{A boolean} \item{assay}{assay name to extract feature abundance} \item{slot}{slot in the assay, e.g. `data` and `scale.data`} } \value{ A Seurat object } \description{ get abundance long } \examples{ data(pbmc_small) pbmc_small \%>\% get_abundance_sc_long(features=c("HLA-DRA", "LYZ")) } \keyword{internal} ================================================ FILE: man/get_abundance_sc_wide.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utilities.R \name{get_abundance_sc_wide} \alias{get_abundance_sc_wide} \title{get abundance wide} \usage{ get_abundance_sc_wide( .data, features = NULL, all = FALSE, assay = .data@active.assay, slot = "data", prefix = "" ) } \arguments{ \item{.data}{A tidyseurat} \item{features}{A character} \item{all}{A boolean} \item{assay}{assay name to extract feature abundance} \item{slot}{slot in the assay, e.g. `data` and `scale.data`} \item{prefix}{prefix for the feature names} } \value{ A Seurat object } \description{ get abundance wide } \examples{ data(pbmc_small) pbmc_small \%>\% get_abundance_sc_wide(features=c("HLA-DRA", "LYZ")) } \keyword{internal} ================================================ FILE: man/ggplot.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ggplot2_methods.R \name{ggplot} \alias{ggplot} \alias{ggplot.Seurat} \title{Create a new \code{ggplot} from a \code{tidyseurat}} \usage{ \method{ggplot}{Seurat}(data = NULL, mapping = aes(), ..., environment = parent.frame()) } \arguments{ \item{data}{Default dataset to use for plot. If not already a data.frame, will be converted to one by \code{\link[ggplot2:fortify]{fortify()}}. If not specified, must be supplied in each layer added to the plot.} \item{mapping}{Default list of aesthetic mappings to use for plot. If not specified, must be supplied in each layer added to the plot.} \item{...}{Other arguments passed on to methods. Not currently used.} \item{environment}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Used prior to tidy evaluation.} } \value{ `ggplot` } \description{ \code{ggplot()} initializes a ggplot object. It can be used to declare the input data frame for a graphic and to specify the set of aesthetic mappings for the plot, intended to be common throughout all subsequent layers unless specifically overridden. } \details{ \code{ggplot()} is used to construct the initial plot object, and is almost always followed by a plus sign (\code{+}) to add components to the plot. There are three common patterns used to invoke \code{ggplot()}: \itemize{ \item \verb{ggplot(data = df, mapping = aes(x, y, other aesthetics))} \item \code{ggplot(data = df)} \item \code{ggplot()} } The first pattern is recommended if all layers use the same data and the same set of aesthetics, although this method can also be used when adding a layer using data from another data frame. The second pattern specifies the default data frame to use for the plot, but no aesthetics are defined up front. This is useful when one data frame is used predominantly for the plot, but the aesthetics vary from one layer to another. The third pattern initializes a skeleton \code{ggplot} object, which is fleshed out as layers are added. This is useful when multiple data frames are used to produce different layers, as is often the case in complex graphics. The \verb{data =} and \verb{mapping =} specifications in the arguments are optional (and are often omitted in practice), so long as the data and the mapping values are passed into the function in the right order. In the examples below, however, they are left in place for clarity. } \examples{ library(ggplot2) data(pbmc_small) pbmc_small |> ggplot(aes(groups, nCount_RNA)) + geom_boxplot() } \seealso{ The \href{https://ggplot2-book.org/getting-started}{first steps chapter} of the online ggplot2 book. } ================================================ FILE: man/glimpse.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tibble_methods.R \name{glimpse} \alias{glimpse} \alias{glimpse.tidyseurat} \title{Get a glimpse of your data} \usage{ \method{glimpse}{tidyseurat}(x, width = NULL, ...) } \arguments{ \item{x}{An object to glimpse at.} \item{width}{Width of output: defaults to the setting of the \code{width} \link[pillar:pillar_options]{option} (if finite) or the width of the console.} \item{...}{Unused, for extensibility.} } \value{ x original x is (invisibly) returned, allowing \code{glimpse()} to be used within a data pipe line. } \description{ \code{glimpse()} is like a transposed version of \code{print()}: columns run down the page, and data runs across. This makes it possible to see every column in a data frame. It's a little like \code{\link[=str]{str()}} applied to a data frame but it tries to show you as much data as possible. (And it always shows the underlying data, even when applied to a remote data source.) See \code{\link[pillar:format_glimpse]{format_glimpse()}} for details on the formatting. } \section{S3 methods}{ \code{glimpse} is an S3 generic with a customised method for \code{tbl}s and \code{data.frames}, and a default method that calls \code{\link[=str]{str()}}. } \examples{ data(pbmc_small) pbmc_small |> glimpse() } ================================================ FILE: man/group_by.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{group_by} \alias{group_by} \alias{group_by.Seurat} \title{Group by one or more variables} \usage{ \method{group_by}{Seurat}(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data)) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> In \code{group_by()}, variables or computations to group by. Computations are always done on the ungrouped data frame. To perform computations on the grouped data, you need to use a separate \code{mutate()} step before the \code{group_by()}. Computations are not allowed in \code{nest_by()}. In \code{ungroup()}, variables to remove from the grouping.} \item{.add}{When \code{FALSE}, the default, \code{group_by()} will override existing groups. To add to the existing groups, use \code{.add = TRUE}.} \item{.drop}{Drop groups formed by factor levels that don't appear in the data? The default is \code{TRUE} except when \code{.data} has been previously grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.} } \value{ A grouped data frame with class \code{\link[dplyr]{grouped_df}}, unless the combination of \code{...} and \code{add} yields a empty set of grouping columns, in which case a tibble will be returned. } \description{ Most data operations are done on groups defined by variables. \code{group_by()} takes an existing tbl and converts it into a grouped tbl where operations are performed "by group". \code{ungroup()} removes grouping. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}. \item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}. } } \section{Ordering}{ Currently, \code{group_by()} internally orders the groups in ascending order. This results in ordered output from functions that aggregate groups, such as \code{\link[dplyr:summarise]{summarise()}}. When used as grouping columns, character vectors are ordered in the C locale for performance and reproducibility across R sessions. If the resulting ordering of your grouped operation matters and is dependent on the locale, you should follow up the grouped operation with an explicit call to \code{\link[dplyr:arrange]{arrange()}} and set the \code{.locale} argument. For example: \if{html}{\out{
}}\preformatted{data |> group_by(chr) |> summarise(avg = mean(x)) |> arrange(chr, .locale = "en") }\if{html}{\out{
}} This is often useful as a preliminary step before generating content intended for humans, such as an HTML table. \subsection{Legacy behavior}{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Prior to dplyr 1.1.0, character vector grouping columns were ordered in the system locale. Setting the global option \code{dplyr.legacy_locale} to \code{TRUE} retains this legacy behavior, but this has been deprecated. Update existing code to explicitly call \code{arrange(.locale = )} instead. Run \code{Sys.getlocale("LC_COLLATE")} to determine your system locale, and compare that against the list in \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to find an appropriate value for \code{.locale}, i.e. for American English, \code{"en_US"}. } } \examples{ data("pbmc_small") pbmc_small |> group_by(groups) } \seealso{ Other grouping functions: \code{\link[dplyr]{group_map}()}, \code{\link[dplyr]{group_nest}()}, \code{\link[dplyr]{group_split}()}, \code{\link[dplyr]{group_trim}()} } ================================================ FILE: man/group_split.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{group_split} \alias{group_split} \alias{group_split.Seurat} \title{Split data frame by groups} \usage{ \method{group_split}{Seurat}(.tbl, ..., .keep = TRUE) } \arguments{ \item{.tbl}{A tbl.} \item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification, forwarded to \code{\link[dplyr:group_by]{group_by()}}.} \item{.keep}{Should the grouping columns be kept?} } \value{ A list of tibbles. Each tibble contains the rows of \code{.tbl} for the associated group and all the columns, including the grouping variables. Note that this returns a \link[vctrs:list_of]{list_of} which is slightly stricter than a simple list but is useful for representing lists where every element has the same type. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} \code{\link[dplyr:group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but: \itemize{ \item It uses the grouping structure from \code{\link[dplyr:group_by]{group_by()}} and therefore is subject to the data mask \item It does not name the elements of the list based on the grouping as this only works well for a single character grouping variable. Instead, use \code{\link[dplyr:group_keys]{group_keys()}} to access a data frame that defines the groups. } \code{group_split()} is primarily designed to work with grouped data frames. You can pass \code{...} to group and split an ungrouped data frame, but this is generally not very useful as you want have easy access to the group metadata. } \section{Lifecycle}{ \code{group_split()} is not stable because you can achieve very similar results by manipulating the nested column returned from \code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all within a single data structure. \code{group_split()} may be deprecated in the future. } \examples{ data(pbmc_small) pbmc_small |> group_split(groups) } \seealso{ Other grouping functions: \code{\link[dplyr]{group_by}()}, \code{\link[dplyr]{group_map}()}, \code{\link[dplyr]{group_nest}()}, \code{\link[dplyr]{group_trim}()} } ================================================ FILE: man/inner_join.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{inner_join} \alias{inner_join} \alias{inner_join.Seurat} \title{Mutating joins} \usage{ \method{inner_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[dplyr:cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} \item{...}{Other parameters passed onto methods.} } \value{ An object of the same type as \code{x} (including the same groups). The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item The rows are affect by the join type. \itemize{ \item \code{inner_join()} returns matched \code{x} rows. \item \code{left_join()} returns all \code{x} rows. \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. } \item Output columns include all columns from \code{x} and all non-key columns from \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate these as well. \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their common type between \code{x} and \code{y}. } } \description{ Mutating joins add columns from \code{y} to \code{x}, matching observations based on the keys. There are four mutating joins: the inner join, and the three outer joins. \subsection{Inner join}{ An \code{inner_join()} only keeps observations from \code{x} that have a matching key in \code{y}. The most important property of an inner join is that unmatched rows in either input are not included in the result. This means that generally inner joins are not appropriate in most analyses, because it is too easy to lose observations. } \subsection{Outer joins}{ The three outer joins keep observations that appear in at least one of the data frames: \itemize{ \item A \code{left_join()} keeps all observations in \code{x}. \item A \code{right_join()} keeps all observations in \code{y}. \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. } } } \section{Many-to-many relationships}{ By default, dplyr guards against many-to-many relationships in equality joins by throwing a warning. These occur when both of the following are true: \itemize{ \item A row in \code{x} matches multiple rows in \code{y}. \item A row in \code{y} matches multiple rows in \code{x}. } This is typically surprising, as most joins involve a relationship of one-to-one, one-to-many, or many-to-one, and is often the result of an improperly specified join. Many-to-many relationships are particularly problematic because they can result in a Cartesian explosion of the number of rows returned from the join. If a many-to-many relationship is expected, silence this warning by explicitly setting \code{relationship = "many-to-many"}. In production code, it is best to preemptively set \code{relationship} to whatever relationship you expect to exist between the keys of \code{x} and \code{y}, as this forces an error to occur immediately if the data doesn't align with your expectations. Inequality joins typically result in many-to-many relationships by nature, so they don't warn on them by default, but you should still take extra care when specifying an inequality join, because they also have the capability to return a large number of rows. Rolling joins don't warn on many-to-many relationships either, but many rolling joins follow a many-to-one relationship, so it is often useful to set \code{relationship = "many-to-one"} to enforce this. Note that in SQL, most database providers won't let you specify a many-to-many relationship between two tables, instead requiring that you create a third \emph{junction table} that results in two one-to-many relationships instead. } \section{Methods}{ These functions are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. } } \examples{ data(pbmc_small) tt <- pbmc_small tt |> inner_join(tt |> distinct(groups) |> mutate(new_column=1:2) |> slice(1)) } \seealso{ Other joins: \code{\link[dplyr]{cross_join}()}, \code{\link[dplyr]{filter-joins}}, \code{\link[dplyr]{nest_join}()} } ================================================ FILE: man/join_features.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{join_features} \alias{join_features} \alias{join_features,Seurat-method} \title{join_features} \usage{ \S4method{join_features}{Seurat}( .data, features = NULL, all = FALSE, exclude_zeros = FALSE, shape = "wide", assay = NULL, slot = "data", ... ) } \arguments{ \item{.data}{A tidyseurat object} \item{features}{A vector of feature identifiers to join} \item{all}{If TRUE return all} \item{exclude_zeros}{If TRUE exclude zero values} \item{shape}{Format of the returned table "long" or "wide"} \item{assay}{assay name to extract feature abundance} \item{slot}{slot name to extract feature abundance} \item{...}{Parameters to pass to join wide, i.e. assay name to extract feature abundance from and gene prefix, for shape="wide"} } \value{ A `tidyseurat` object containing information for the specified features. } \description{ join_features() extracts and joins information for specific features } \details{ This function extracts information for specified features and returns the information in either long or wide format. } \examples{ data(pbmc_small) pbmc_small \%>\% join_features( features=c("HLA-DRA", "LYZ")) } ================================================ FILE: man/join_transcripts.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods_DEPRECATED.R \name{join_transcripts} \alias{join_transcripts} \title{(DEPRECATED) Extract and join information for transcripts.} \usage{ join_transcripts( .data, transcripts = NULL, all = FALSE, exclude_zeros = FALSE, shape = "wide", ... ) } \arguments{ \item{.data}{A tidyseurat object} \item{transcripts}{A vector of transcript identifiers to join} \item{all}{If TRUE return all} \item{exclude_zeros}{If TRUE exclude zero values} \item{shape}{Format of the returned table "long" or "wide"} \item{...}{Parameters to pass to join wide, i.e. assay name to extract transcript abundance from} } \value{ A `tbl` containing the information.for the specified transcripts } \description{ join_transcripts() extracts and joins information for specified transcripts } \details{ DEPRECATED, please use join_features() } \examples{ print("DEPRECATED") } ================================================ FILE: man/left_join.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{left_join} \alias{left_join} \alias{left_join.Seurat} \title{Mutating joins} \usage{ \method{left_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[dplyr:cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} \item{...}{Other parameters passed onto methods.} } \value{ An object of the same type as \code{x} (including the same groups). The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item The rows are affect by the join type. \itemize{ \item \code{inner_join()} returns matched \code{x} rows. \item \code{left_join()} returns all \code{x} rows. \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. } \item Output columns include all columns from \code{x} and all non-key columns from \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate these as well. \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their common type between \code{x} and \code{y}. } } \description{ Mutating joins add columns from \code{y} to \code{x}, matching observations based on the keys. There are four mutating joins: the inner join, and the three outer joins. \subsection{Inner join}{ An \code{inner_join()} only keeps observations from \code{x} that have a matching key in \code{y}. The most important property of an inner join is that unmatched rows in either input are not included in the result. This means that generally inner joins are not appropriate in most analyses, because it is too easy to lose observations. } \subsection{Outer joins}{ The three outer joins keep observations that appear in at least one of the data frames: \itemize{ \item A \code{left_join()} keeps all observations in \code{x}. \item A \code{right_join()} keeps all observations in \code{y}. \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. } } } \section{Many-to-many relationships}{ By default, dplyr guards against many-to-many relationships in equality joins by throwing a warning. These occur when both of the following are true: \itemize{ \item A row in \code{x} matches multiple rows in \code{y}. \item A row in \code{y} matches multiple rows in \code{x}. } This is typically surprising, as most joins involve a relationship of one-to-one, one-to-many, or many-to-one, and is often the result of an improperly specified join. Many-to-many relationships are particularly problematic because they can result in a Cartesian explosion of the number of rows returned from the join. If a many-to-many relationship is expected, silence this warning by explicitly setting \code{relationship = "many-to-many"}. In production code, it is best to preemptively set \code{relationship} to whatever relationship you expect to exist between the keys of \code{x} and \code{y}, as this forces an error to occur immediately if the data doesn't align with your expectations. Inequality joins typically result in many-to-many relationships by nature, so they don't warn on them by default, but you should still take extra care when specifying an inequality join, because they also have the capability to return a large number of rows. Rolling joins don't warn on many-to-many relationships either, but many rolling joins follow a many-to-one relationship, so it is often useful to set \code{relationship = "many-to-one"} to enforce this. Note that in SQL, most database providers won't let you specify a many-to-many relationship between two tables, instead requiring that you create a third \emph{junction table} that results in two one-to-many relationships instead. } \section{Methods}{ These functions are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. } } \examples{ data(pbmc_small) tt <- pbmc_small tt |> left_join(tt |> distinct(groups) |> mutate(new_column=1:2)) } \seealso{ Other joins: \code{\link[dplyr]{cross_join}()}, \code{\link[dplyr]{filter-joins}}, \code{\link[dplyr]{nest_join}()} } ================================================ FILE: man/mutate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{mutate} \alias{mutate} \alias{mutate.Seurat} \title{Create, modify, and delete columns} \usage{ \method{mutate}{Seurat}(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs. The name gives the name of the column in the output. The value can be: \itemize{ \item A vector of length 1, which will be recycled to the correct length. \item A vector the same length as the current group (or the whole data frame if ungrouped). \item \code{NULL}, to remove the column. \item A data frame or tibble, to create multiple columns in the output. }} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Columns from \code{.data} will be preserved according to the \code{.keep} argument. \item Existing columns that are modified by \code{...} will always be returned in their original location. \item New columns created through \code{...} will be placed according to the \code{.before} and \code{.after} arguments. \item The number of rows is not affected. \item Columns given the value \code{NULL} will be removed. \item Groups will be recomputed if a grouping variable is mutated. \item Data frame attributes are preserved. } } \description{ \code{mutate()} creates new columns that are functions of existing variables. It can also modify (if the name is the same as an existing column) and delete columns (by setting their value to \code{NULL}). } \section{Useful mutate functions}{ \itemize{ \item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings \item \code{\link[dplyr:lead]{lead()}}, \code{\link[dplyr:lag]{lag()}} \item \code{\link[dplyr:dense_rank]{dense_rank()}}, \code{\link[dplyr:min_rank]{min_rank()}}, \code{\link[dplyr:percent_rank]{percent_rank()}}, \code{\link[dplyr:row_number]{row_number()}}, \code{\link[dplyr:cume_dist]{cume_dist()}}, \code{\link[dplyr:ntile]{ntile()}} \item \code{\link[=cumsum]{cumsum()}}, \code{\link[dplyr:cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[dplyr:cumany]{cumany()}}, \code{\link[dplyr:cumall]{cumall()}} \item \code{\link[dplyr:na_if]{na_if()}}, \code{\link[dplyr:coalesce]{coalesce()}} \item \code{\link[dplyr:if_else]{if_else()}}, \code{\link[dplyr:recode]{recode()}}, \code{\link[dplyr:case_when]{case_when()}} } } \section{Grouped tibbles}{ Because mutating expressions are computed within groups, they may yield different results on grouped tibbles. This will be the case as soon as an aggregating, lagging, or ranking function is involved. Compare this ungrouped mutate: \if{html}{\out{
}}\preformatted{starwars |> select(name, mass, species) |> mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} With the grouped equivalent: \if{html}{\out{
}}\preformatted{starwars |> select(name, mass, species) |> group_by(species) |> mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) }\if{html}{\out{
}} The former normalises \code{mass} by the global average whereas the latter normalises by the averages within species levels. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}. } \examples{ data(pbmc_small) pbmc_small |> mutate(nFeature_RNA=1) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{rename}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} ================================================ FILE: man/nest.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R \name{nest} \alias{nest} \alias{nest.Seurat} \title{Nest rows into a list-column of data frames} \usage{ \method{nest}{Seurat}(.data, ..., .names_sep = NULL) } \arguments{ \item{.data}{A data frame.} \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest; these will appear in the inner data frames. Specified using name-variable pairs of the form \code{new_col = c(col1, col2, col3)}. The right hand side can be any valid tidyselect expression. If not supplied, then \code{...} is derived as all columns \emph{not} selected by \code{.by}, and will use the column name from \code{.key}. \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: previously you could write \code{df |> nest(x, y, z)}. Convert to \code{df |> nest(data = c(x, y, z))}.} \item{.names_sep}{If \code{NULL}, the default, the inner names will come from the former outer names. If a string, the new inner names will use the outer names with \code{names_sep} automatically stripped. This makes \code{names_sep} roughly symmetric between nesting and unnesting.} } \value{ `tidyseurat_nested` } \description{ Nesting creates a list-column of data frames; unnesting flattens it back out into regular columns. Nesting is implicitly a summarising operation: you get one row for each group defined by the non-nested columns. This is useful in conjunction with other summaries that work with whole datasets, most notably models. Learn more in \code{vignette("nest")}. } \details{ If neither \code{...} nor \code{.by} are supplied, \code{nest()} will nest all variables, and will use the column name supplied through \code{.key}. } \section{New syntax}{ tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's designed to be more similar to other functions. Converting to the new syntax should be straightforward (guided by the message you'll receive) but if you just need to run an old analysis, you can easily revert to the previous behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows: \if{html}{\out{
}}\preformatted{library(tidyr) nest <- nest_legacy unnest <- unnest_legacy }\if{html}{\out{
}} } \section{Grouped data frames}{ \code{df |> nest(data = c(x, y))} specifies the columns to be nested; i.e. the columns that will appear in the inner data frame. \code{df |> nest(.by = c(x, y))} specifies the columns to nest \emph{by}; i.e. the columns that will remain in the outer data frame. An alternative way to achieve the latter is to \code{nest()} a grouped data frame created by \code{\link[dplyr:group_by]{dplyr::group_by()}}. The grouping variables remain in the outer data frame and the others are nested. The result preserves the grouping of the input. Variables supplied to \code{nest()} will override grouping variables so that \code{df |> group_by(x, y) |> nest(data = !z)} will be equivalent to \code{df |> nest(data = !z)}. You can't supply \code{.by} with a grouped data frame, as the groups already represent what you are nesting by. } \examples{ data(pbmc_small) pbmc_small |> nest(data=-groups) |> unnest(data) } ================================================ FILE: man/pbmc_small_nested_interactions.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{pbmc_small_nested_interactions} \alias{pbmc_small_nested_interactions} \title{Intercellular ligand-receptor interactions for 38 ligands from a single cell RNA-seq cluster.} \format{ A `tibble` containing 100 rows and 9 columns. Cells are a subsample of the PBMC dataset of 2,700 single cells. Cell interactions were identified with `SingleCellSignalR`. \describe{ \item{sample}{sample identifier} \item{ligand}{cluster and ligand identifier} \item{receptor}{cluster and receptor identifier} \item{ligand.name}{ligand name} \item{receptor.name}{receptor name} \item{origin}{cluster containing ligand} \item{destination}{cluster containing receptor} \item{interaction.type}{type of interation, paracrine or autocrine} \item{LRscore}{interaction score} } } \source{ \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html} } \usage{ data(pbmc_small_nested_interactions) } \value{ `tibble` } \description{ A dataset containing ligand-receptor interactions within a sample. There are 38 ligands from a single cell cluster versus 35 receptors in 6 other clusters. } \keyword{datasets} ================================================ FILE: man/pipe.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils-pipe.R \name{\%>\%} \alias{\%>\%} \title{Pipe operator} \usage{ lhs \%>\% rhs } \value{ void } \description{ See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. } \examples{ data(pbmc_small) pbmc_small \%>\% print() } \keyword{internal} ================================================ FILE: man/pivot_longer.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R \name{pivot_longer} \alias{pivot_longer} \alias{pivot_longer.Seurat} \title{Pivot data from wide to long} \usage{ \method{pivot_longer}{Seurat}( data, cols, names_to = "name", names_prefix = NULL, names_sep = NULL, names_pattern = NULL, names_ptypes = NULL, names_transform = NULL, names_repair = "check_unique", values_to = "value", values_drop_na = FALSE, values_ptypes = NULL, values_transform = NULL, ... ) } \arguments{ \item{data}{A data frame to pivot.} \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into longer format.} \item{names_to}{A character vector specifying the new column or columns to create from the information stored in the column names of \code{data} specified by \code{cols}. \itemize{ \item If length 0, or if \code{NULL} is supplied, no columns will be created. \item If length 1, a single column will be created which will contain the column names specified by \code{cols}. \item If length >1, multiple columns will be created. In this case, one of \code{names_sep} or \code{names_pattern} must be supplied to specify how the column names should be split. There are also two additional character values you can take advantage of: \itemize{ \item \code{NA} will discard the corresponding component of the column name. \item \code{".value"} indicates that the corresponding component of the column name defines the name of the output column containing the cell values, overriding \code{values_to} entirely. } }} \item{names_prefix}{A regular expression used to remove matching text from the start of each variable name.} \item{names_sep, names_pattern}{If \code{names_to} contains multiple values, these arguments control how the column name is broken up. \code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either be a numeric vector (specifying positions to break on), or a single string (specifying a regular expression to split on). \code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular expression containing matching groups (\verb{()}). If these arguments do not give you enough control, use \code{pivot_longer_spec()} to create a spec object and process manually as needed.} \item{names_ptypes, values_ptypes}{Optionally, a list of column name-prototype pairs. Alternatively, a single empty prototype can be supplied, which will be applied to all columns. A prototype (or ptype for short) is a zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type, class, and attributes of a vector. Use these arguments if you want to confirm that the created columns are the types that you expect. Note that if you want to change (instead of confirm) the types of specific columns, you should use \code{names_transform} or \code{values_transform} instead.} \item{names_transform, values_transform}{Optionally, a list of column name-function pairs. Alternatively, a single function can be supplied, which will be applied to all columns. Use these arguments if you need to change the types of specific columns. For example, \code{names_transform = list(week = as.integer)} would convert a character variable called \code{week} to an integer. If not specified, the type of the columns generated from \code{names_to} will be character, and the type of the variables generated from \code{values_to} will be the common type of the input columns used to generate them.} \item{names_repair}{What happens if the output has invalid column names? The default, \code{"check_unique"} is to error if the columns are duplicated. Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more options.} \item{values_to}{A string specifying the name of the column to create from the data stored in cell values. If \code{names_to} is a character containing the special \code{.value} sentinel, this value will be ignored, and the name of the value column will be derived from part of the existing column names.} \item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA}s in the \code{values_to} column. This effectively converts explicit missing values to implicit missing values, and should generally be used only when missing values in \code{data} were created by its structure.} \item{...}{Additional arguments passed on to methods.} } \value{ `tidyseurat` } \description{ \code{pivot_longer()} "lengthens" data, increasing the number of rows and decreasing the number of columns. The inverse transformation is \code{\link[tidyr:pivot_wider]{pivot_wider()}} Learn more in \code{vignette("pivot")}. } \details{ \code{pivot_longer()} is an updated approach to \code{\link[tidyr:gather]{gather()}}, designed to be both simpler to use and to handle more use cases. We recommend you use \code{pivot_longer()} for new code; \code{gather()} isn't going away but is no longer under active development. } \examples{ data(pbmc_small) pbmc_small |> pivot_longer( cols=c(orig.ident, groups), names_to="name", values_to="value") } ================================================ FILE: man/plotly.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plotly_methods.R \name{plotly} \alias{plotly} \alias{plot_ly} \alias{plot_ly.tbl_df} \alias{plot_ly.Seurat} \title{Initiate a plotly visualization} \usage{ plot_ly( data = data.frame(), ..., type = NULL, name = NULL, color = NULL, colors = NULL, alpha = NULL, stroke = NULL, strokes = NULL, alpha_stroke = 1, size = NULL, sizes = c(10, 100), span = NULL, spans = c(1, 20), symbol = NULL, symbols = NULL, linetype = NULL, linetypes = NULL, split = NULL, frame = NULL, width = NULL, height = NULL, source = "A" ) \method{plot_ly}{tbl_df}( data = data.frame(), ..., type = NULL, name = NULL, color = NULL, colors = NULL, alpha = NULL, stroke = NULL, strokes = NULL, alpha_stroke = 1, size = NULL, sizes = c(10, 100), span = NULL, spans = c(1, 20), symbol = NULL, symbols = NULL, linetype = NULL, linetypes = NULL, split = NULL, frame = NULL, width = NULL, height = NULL, source = "A" ) \method{plot_ly}{Seurat}( data = data.frame(), ..., type = NULL, name = NULL, color = NULL, colors = NULL, alpha = NULL, stroke = NULL, strokes = NULL, alpha_stroke = 1, size = NULL, sizes = c(10, 100), span = NULL, spans = c(1, 20), symbol = NULL, symbols = NULL, linetype = NULL, linetypes = NULL, split = NULL, frame = NULL, width = NULL, height = NULL, source = "A" ) } \arguments{ \item{data}{A data frame (optional) or \link[crosstalk:SharedData]{crosstalk::SharedData} object.} \item{...}{Arguments (i.e., attributes) passed along to the trace \code{type}. See \code{\link[plotly:schema]{schema()}} for a list of acceptable attributes for a given trace \code{type} (by going to \code{traces} -> \code{type} -> \code{attributes}). Note that attributes provided at this level may override other arguments (e.g. \code{plot_ly(x = 1:10, y = 1:10, color = I("red"), marker = list(color = "blue"))}).} \item{type}{A character string specifying the trace type (e.g. \code{"scatter"}, \code{"bar"}, \code{"box"}, etc). If specified, it \emph{always} creates a trace, otherwise} \item{name}{Values mapped to the trace's name attribute. Since a trace can only have one name, this argument acts very much like \code{split} in that it creates one trace for every unique value.} \item{color}{Values mapped to relevant 'fill-color' attribute(s) (e.g. \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor}, \href{https://plotly.com/r/reference/#scatter-marker-color}{marker.color}, \href{https://plotly.com/r/reference/#scatter-textfont-color}{textfont.color}, etc.). The mapping from data values to color codes may be controlled using \code{colors} and \code{alpha}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{color = I("red")}). Any color understood by \code{\link[grDevices:col2rgb]{grDevices::col2rgb()}} may be used in this way.} \item{colors}{Either a colorbrewer2.org palette name (e.g. "YlOrRd" or "Blues"), or a vector of colors to interpolate in hexadecimal "#RRGGBB" format, or a color interpolation function like \code{colorRamp()}.} \item{alpha}{A number between 0 and 1 specifying the alpha channel applied to \code{color}. Defaults to 0.5 when mapping to \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor} and 1 otherwise.} \item{stroke}{Similar to \code{color}, but values are mapped to relevant 'stroke-color' attribute(s) (e.g., \href{https://plotly.com/r/reference/#scatter-marker-line-color}{marker.line.color} and \href{https://plotly.com/r/reference/#scatter-line-color}{line.color} for filled polygons). If not specified, \code{stroke} inherits from \code{color}.} \item{strokes}{Similar to \code{colors}, but controls the \code{stroke} mapping.} \item{alpha_stroke}{Similar to \code{alpha}, but applied to \code{stroke}.} \item{size}{(Numeric) values mapped to relevant 'fill-size' attribute(s) (e.g., \href{https://plotly.com/r/reference/#scatter-marker-size}{marker.size}, \href{https://plotly.com/r/reference/#scatter-textfont-size}{textfont.size}, and \href{https://plotly.com/r/reference/#scatter-error_x-width}{error_x.width}). The mapping from data values to symbols may be controlled using \code{sizes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{size = I(30)}).} \item{sizes}{A numeric vector of length 2 used to scale \code{size} to pixels.} \item{span}{(Numeric) values mapped to relevant 'stroke-size' attribute(s) (e.g., \href{https://plotly.com/r/reference/#scatter-marker-line-width}{marker.line.width}, \href{https://plotly.com/r/reference/#scatter-line-width}{line.width} for filled polygons, and \href{https://plotly.com/r/reference/#scatter-error_x-thickness}{error_x.thickness}) The mapping from data values to symbols may be controlled using \code{spans}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{span = I(30)}).} \item{spans}{A numeric vector of length 2 used to scale \code{span} to pixels.} \item{symbol}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-marker-symbol}{marker.symbol}. The mapping from data values to symbols may be controlled using \code{symbols}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{symbol = I("pentagon")}). Any \link{pch} value or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol name} may be used in this way.} \item{symbols}{A character vector of \link{pch} values or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol names}.} \item{linetype}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-line-dash}{line.dash}. The mapping from data values to symbols may be controlled using \code{linetypes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{linetype = I("dash")}). Any \code{lty} (see \link{par}) value or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash name} may be used in this way.} \item{linetypes}{A character vector of \code{lty} values or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash names}} \item{split}{(Discrete) values used to create multiple traces (one trace per value).} \item{frame}{(Discrete) values used to create animation frames.} \item{width}{Width in pixels (optional, defaults to automatic sizing).} \item{height}{Height in pixels (optional, defaults to automatic sizing).} \item{source}{a character string of length 1. Match the value of this string with the source argument in \code{\link[plotly:event_data]{event_data()}} to retrieve the event data corresponding to a specific plot (shiny apps can have multiple plots).} } \value{ `plotly` } \description{ This function maps R objects to \href{https://plotly.com/javascript/}{plotly.js}, an (MIT licensed) web-based interactive charting library. It provides abstractions for doing common things (e.g. mapping data values to fill colors (via \code{color}) or creating \link[plotly]{animation}s (via \code{frame})) and sets some different defaults to make the interface feel more 'R-like' (i.e., closer to \code{\link[=plot]{plot()}} and \code{\link[ggplot2:qplot]{ggplot2::qplot()}}). } \details{ Unless \code{type} is specified, this function just initiates a plotly object with 'global' attributes that are passed onto downstream uses of \code{\link[plotly:add_trace]{add_trace()}} (or similar). A \link{formula} must always be used when referencing column name(s) in \code{data} (e.g. \code{plot_ly(mtcars, x = ~wt)}). Formulas are optional when supplying values directly, but they do help inform default axis/scale titles (e.g., \code{plot_ly(x = mtcars$wt)} vs \code{plot_ly(x = ~mtcars$wt)}) } \examples{ data(pbmc_small) plot_ly(pbmc_small) } \references{ \url{https://plotly-r.com/overview.html} } \seealso{ \itemize{ \item For initializing a plotly-geo object: \code{\link[plotly:plot_geo]{plot_geo()}} \item For initializing a plotly-mapbox object: \code{\link[plotly:plot_mapbox]{plot_mapbox()}} \item For translating a ggplot2 object to a plotly object: \code{\link[plotly:ggplotly]{ggplotly()}} \item For modifying any plotly object: \code{\link[plotly:layout]{layout()}}, \code{\link[plotly:add_trace]{add_trace()}}, \code{\link[plotly:style]{style()}} \item For linked brushing: \code{\link[plotly:highlight]{highlight()}} \item For arranging multiple plots: \code{\link[plotly:subplot]{subplot()}}, \code{\link[crosstalk:bscols]{crosstalk::bscols()}} \item For inspecting plotly objects: \code{\link[plotly:plotly_json]{plotly_json()}} \item For quick, accurate, and searchable plotly.js reference: \code{\link[plotly:schema]{schema()}} } } \author{ Carson Sievert } ================================================ FILE: man/pull.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{pull} \alias{pull} \alias{pull.Seurat} \title{Extract a single column} \usage{ \method{pull}{Seurat}(.data, var = -1, name = NULL, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{var}{A variable specified as: \itemize{ \item a literal variable name \item a positive integer, giving the position counting from the left \item a negative integer, giving the position counting from the right. } The default returns the last column (on the assumption that's the column you've created most recently). This argument is taken by expression and supports \link[rlang:topic-inject]{quasiquotation} (you can unquote column names and column locations).} \item{name}{An optional parameter that specifies the column to be used as names for a named vector. Specified in a similar manner as \code{var}.} \item{...}{For use by methods.} } \value{ A vector the same size as \code{.data}. } \description{ \code{pull()} is similar to \code{$}. It's mostly useful because it looks a little nicer in pipes, it also works with remote data frames, and it can optionally name the output. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}. } \examples{ data(pbmc_small) pbmc_small |> pull(groups) } ================================================ FILE: man/quo_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utilities.R \name{quo_names} \alias{quo_names} \title{Convert array of quosure (e.g. c(col_a, col_b)) into character vector} \usage{ quo_names(v) } \arguments{ \item{v}{A array of quosures (e.g. c(col_a, col_b))} } \value{ A character vector } \description{ Convert array of quosure (e.g. c(col_a, col_b)) into character vector } \keyword{internal} ================================================ FILE: man/rename.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{rename} \alias{rename} \alias{rename.Seurat} \title{Rename columns} \usage{ \method{rename}{Seurat}(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use \code{new_name = old_name} to rename selected variables. For \code{rename_with()}: additional arguments passed onto \code{.fn}.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item Column names are changed; column order is preserved. \item Data frame attributes are preserved. \item Groups are updated to reflect new names. } } \description{ \code{rename()} changes the names of individual variables using \code{new_name = old_name} syntax; \code{rename_with()} renames columns using a function. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}. } \examples{ data(pbmc_small) pbmc_small |> rename(s_score=nFeature_RNA) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{mutate}()}, \code{\link{slice}()}, \code{\link{summarise}()} } \concept{single table verbs} ================================================ FILE: man/return_arguments_of.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utilities.R \name{return_arguments_of} \alias{return_arguments_of} \title{returns variables from an expression} \usage{ return_arguments_of(expression) } \arguments{ \item{expression}{an expression} } \value{ list of symbols } \description{ returns variables from an expression } ================================================ FILE: man/right_join.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{right_join} \alias{right_join} \alias{right_join.Seurat} \title{Mutating joins} \usage{ \method{right_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...) } \arguments{ \item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character vector of variables to join by. If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all variables in common across \code{x} and \code{y}. A message lists the variables so that you can check they're correct; suppress the message by supplying \code{by} explicitly. To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}} specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}. To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with multiple expressions. For example, \code{join_by(a == b, c == d)} will match \code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between \code{x} and \code{y}, you can shorten this by listing only the variable names, like \code{join_by(a, c)}. \code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on these types of joins. For simple equality joins, you can alternatively specify a character vector of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a} to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y}, use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}. To perform a cross-join, generating all combinations of \code{x} and \code{y}, see \code{\link[dplyr:cross_join]{cross_join()}}.} \item{copy}{If \code{x} and \code{y} are not from the same data source, and \code{copy} is \code{TRUE}, then \code{y} will be copied into the same src as \code{x}. This allows you to join tables across srcs, but it is a potentially expensive operation so you must opt into it.} \item{suffix}{If there are non-joined duplicate variables in \code{x} and \code{y}, these suffixes will be added to the output to disambiguate them. Should be a character vector of length 2.} \item{...}{Other parameters passed onto methods.} } \value{ An object of the same type as \code{x} (including the same groups). The order of the rows and columns of \code{x} is preserved as much as possible. The output has the following properties: \itemize{ \item The rows are affect by the join type. \itemize{ \item \code{inner_join()} returns matched \code{x} rows. \item \code{left_join()} returns all \code{x} rows. \item \code{right_join()} returns matched of \code{x} rows, followed by unmatched \code{y} rows. \item \code{full_join()} returns all \code{x} rows, followed by unmatched \code{y} rows. } \item Output columns include all columns from \code{x} and all non-key columns from \code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well. \item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added to disambiguate these as well. \item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their common type between \code{x} and \code{y}. } } \description{ Mutating joins add columns from \code{y} to \code{x}, matching observations based on the keys. There are four mutating joins: the inner join, and the three outer joins. \subsection{Inner join}{ An \code{inner_join()} only keeps observations from \code{x} that have a matching key in \code{y}. The most important property of an inner join is that unmatched rows in either input are not included in the result. This means that generally inner joins are not appropriate in most analyses, because it is too easy to lose observations. } \subsection{Outer joins}{ The three outer joins keep observations that appear in at least one of the data frames: \itemize{ \item A \code{left_join()} keeps all observations in \code{x}. \item A \code{right_join()} keeps all observations in \code{y}. \item A \code{full_join()} keeps all observations in \code{x} and \code{y}. } } } \section{Many-to-many relationships}{ By default, dplyr guards against many-to-many relationships in equality joins by throwing a warning. These occur when both of the following are true: \itemize{ \item A row in \code{x} matches multiple rows in \code{y}. \item A row in \code{y} matches multiple rows in \code{x}. } This is typically surprising, as most joins involve a relationship of one-to-one, one-to-many, or many-to-one, and is often the result of an improperly specified join. Many-to-many relationships are particularly problematic because they can result in a Cartesian explosion of the number of rows returned from the join. If a many-to-many relationship is expected, silence this warning by explicitly setting \code{relationship = "many-to-many"}. In production code, it is best to preemptively set \code{relationship} to whatever relationship you expect to exist between the keys of \code{x} and \code{y}, as this forces an error to occur immediately if the data doesn't align with your expectations. Inequality joins typically result in many-to-many relationships by nature, so they don't warn on them by default, but you should still take extra care when specifying an inequality join, because they also have the capability to return a large number of rows. Rolling joins don't warn on many-to-many relationships either, but many rolling joins follow a many-to-one relationship, so it is often useful to set \code{relationship = "many-to-one"} to enforce this. Note that in SQL, most database providers won't let you specify a many-to-many relationship between two tables, instead requiring that you create a third \emph{junction table} that results in two one-to-many relationships instead. } \section{Methods}{ These functions are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}. \item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}. \item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}. \item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}. } } \examples{ data(pbmc_small) tt <- pbmc_small tt |> right_join(tt |> distinct(groups) |> mutate(new_column=1:2) |> slice(1)) } \seealso{ Other joins: \code{\link[dplyr]{cross_join}()}, \code{\link[dplyr]{filter-joins}}, \code{\link[dplyr]{nest_join}()} } ================================================ FILE: man/rowwise.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{rowwise} \alias{rowwise} \alias{rowwise.Seurat} \title{Group input by rows} \usage{ \method{rowwise}{Seurat}(data, ...) } \arguments{ \item{data}{Input data frame.} \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Variables to be preserved when calling \code{\link[dplyr:summarise]{summarise()}}. This is typically a set of variables whose combination uniquely identify each row. \strong{NB}: unlike \code{group_by()} you can not create new variables here but instead you can select multiple variables with (e.g.) \code{everything()}.} } \value{ A row-wise data frame with class \code{rowwise_df}. Note that a \code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}. } \description{ \code{rowwise()} allows you to compute on a data frame a row-at-a-time. This is most useful when a vectorised function doesn't exist. Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[dplyr:summarise]{summarise()}}, which return a \link[dplyr]{grouped_df}. You can explicitly ungroup with \code{\link[dplyr:ungroup]{ungroup()}} or \code{\link[dplyr:as_tibble]{as_tibble()}}, or convert to a \link[dplyr]{grouped_df} with \code{\link[dplyr:group_by]{group_by()}}. } \section{List-columns}{ Because a rowwise has exactly one row per group it offers a small convenience for working with list-columns. Normally, \code{summarise()} and \code{mutate()} extract a groups worth of data with \code{[}. But when you index a list in this way, you get back another list. When you're working with a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your life a little easier. } \examples{ # TODO } \seealso{ \code{\link[dplyr:nest_by]{nest_by()}} for a convenient way of creating rowwise data frames with nested data. } ================================================ FILE: man/sample_n.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{sample_n} \alias{sample_n} \alias{sample_n.Seurat} \alias{sample_frac} \alias{sample_frac.Seurat} \title{Sample n rows from a table} \usage{ \method{sample_n}{Seurat}(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...) \method{sample_frac}{Seurat}(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...) } \arguments{ \item{tbl}{A data.frame.} \item{size}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> For \code{sample_n()}, the number of rows to select. For \code{sample_frac()}, the fraction of rows to select. If \code{tbl} is grouped, \code{size} applies to each group.} \item{replace}{Sample with or without replacement?} \item{weight}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1.} \item{.env}{DEPRECATED.} \item{...}{ignored} } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{sample_n()} and \code{sample_frac()} have been superseded in favour of \code{\link[dplyr:slice_sample]{slice_sample()}}. While they will not be deprecated in the near future, retirement means that we will only perform critical bug fixes, so we recommend moving to the newer alternative. These functions were superseded because we realised it was more convenient to have two mutually exclusive arguments to one function, rather than two separate functions. This also made it to clean up a few other smaller design issues with \code{sample_n()}/\code{sample_frac}: \itemize{ \item The connection to \code{slice()} was not obvious. \item The name of the first argument, \code{tbl}, is inconsistent with other single table verbs which use \code{.data}. \item The \code{size} argument uses tidy evaluation, which is surprising and undocumented. \item It was easier to remove the deprecated \code{.env} argument. \item \code{...} was in a suboptimal position. } } \examples{ data(pbmc_small) pbmc_small |> sample_n(50) pbmc_small |> sample_frac(0.1) } ================================================ FILE: man/select.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{select} \alias{select} \alias{select.Seurat} \title{Keep or drop columns using their names and types} \usage{ \method{select}{Seurat}(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted expressions separated by commas. Variable names can be used as if they were positions in the data frame, so expressions like \code{x:y} can be used to select a range of variables.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Rows are not affected. \item Output columns are a subset of input columns, potentially with a different order. Columns will be renamed if \code{new_name = old_name} form is used. \item Data frame attributes are preserved. \item Groups are maintained; you can't select off grouping variables. } } \description{ Select (and optionally rename) variables in a data frame, using a concise mini-language that makes it easy to refer to variables based on their name (e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the right) or type (e.g. \code{where(is.numeric)} selects all numeric columns). \subsection{Overview of selection features}{ Tidyverse selections implement a dialect of R where operators make it easy to select variables: \itemize{ \item \code{:} for selecting a range of consecutive variables. \item \code{!} for taking the complement of a set of variables. \item \code{&} and \code{|} for selecting the intersection or the union of two sets of variables. \item \code{c()} for combining selections. } In addition, you can use \strong{selection helpers}. Some helpers select specific columns: \itemize{ \item \code{\link[tidyselect:everything]{everything()}}: Matches all variables. \item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset. \item \code{\link[dplyr:group_cols]{group_cols()}}: Select all grouping columns. } Other helpers select variables by matching patterns in their names: \itemize{ \item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix. \item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix. \item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string. \item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression. \item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03. } Or from variables stored in a character vector: \itemize{ \item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All names must be present, otherwise an out-of-bounds error is thrown. \item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown for names that don't exist. } Or using a predicate function: \itemize{ \item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those for which the function returns \code{TRUE}. } } } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}. } \section{Examples}{ Here we show the usage for the basic selection operators. See the specific help pages to learn about helpers like \code{\link[dplyr:starts_with]{starts_with()}}. The selection language can be used in functions like \code{dplyr::select()}. Let's first attach the tidyverse: \if{html}{\out{
}}\preformatted{library(tidyverse) # For better printing iris <- as_tibble(iris) }\if{html}{\out{
}} Select variables by name: \if{html}{\out{
}}\preformatted{starwars |> select(height) #> # A tibble: 87 x 1 #> height #> #> 1 172 #> 2 167 #> 3 96 #> 4 202 #> # i 83 more rows iris |> select(Sepal.Length) #> # A tibble: 150 x 1 #> Sepal.Length #> #> 1 5.1 #> 2 4.9 #> 3 4.7 #> 4 4.6 #> # i 146 more rows }\if{html}{\out{
}} Select multiple variables by separating them with commas. Note how the order of columns is determined by the order of inputs: \if{html}{\out{
}}\preformatted{starwars |> select(homeworld, height, mass) #> # A tibble: 87 x 3 #> homeworld height mass #> #> 1 Tatooine 172 77 #> 2 Tatooine 167 75 #> 3 Naboo 96 32 #> 4 Tatooine 202 136 #> # i 83 more rows iris |> select(Sepal.Length, Petal.Length) #> # A tibble: 150 x 2 #> Sepal.Length Petal.Length #> #> 1 5.1 1.4 #> 2 4.9 1.4 #> 3 4.7 1.3 #> 4 4.6 1.5 #> # i 146 more rows }\if{html}{\out{
}} If you use a named vector to select columns, the output will have its columns renamed: \if{html}{\out{
}}\preformatted{selection <- c( new_homeworld = "homeworld", new_height = "height", new_mass = "mass" ) starwars |> select(all_of(selection)) #> # A tibble: 87 x 3 #> new_homeworld new_height new_mass #> #> 1 Tatooine 172 77 #> 2 Tatooine 167 75 #> 3 Naboo 96 32 #> 4 Tatooine 202 136 #> # i 83 more rows }\if{html}{\out{
}} \subsection{Operators:}{ The \code{:} operator selects a range of consecutive variables: \if{html}{\out{
}}\preformatted{starwars |> select(name:mass) #> # A tibble: 87 x 3 #> name height mass #> #> 1 Luke Skywalker 172 77 #> 2 C-3PO 167 75 #> 3 R2-D2 96 32 #> 4 Darth Vader 202 136 #> # i 83 more rows }\if{html}{\out{
}} The \code{!} operator negates a selection: \if{html}{\out{
}}\preformatted{starwars |> select(!(name:mass)) #> # A tibble: 87 x 11 #> hair_color skin_color eye_color birth_year sex gender homeworld species #> #> 1 blond fair blue 19 male masculine Tatooine Human #> 2 gold yellow 112 none masculine Tatooine Droid #> 3 white, blue red 33 none masculine Naboo Droid #> 4 none white yellow 41.9 male masculine Tatooine Human #> # i 83 more rows #> # i 3 more variables: films , vehicles , starships iris |> select(!c(Sepal.Length, Petal.Length)) #> # A tibble: 150 x 3 #> Sepal.Width Petal.Width Species #> #> 1 3.5 0.2 setosa #> 2 3 0.2 setosa #> 3 3.2 0.2 setosa #> 4 3.1 0.2 setosa #> # i 146 more rows iris |> select(!ends_with("Width")) #> # A tibble: 150 x 3 #> Sepal.Length Petal.Length Species #> #> 1 5.1 1.4 setosa #> 2 4.9 1.4 setosa #> 3 4.7 1.3 setosa #> 4 4.6 1.5 setosa #> # i 146 more rows }\if{html}{\out{
}} \code{&} and \code{|} take the intersection or the union of two selections: \if{html}{\out{
}}\preformatted{iris |> select(starts_with("Petal") & ends_with("Width")) #> # A tibble: 150 x 1 #> Petal.Width #> #> 1 0.2 #> 2 0.2 #> 3 0.2 #> 4 0.2 #> # i 146 more rows iris |> select(starts_with("Petal") | ends_with("Width")) #> # A tibble: 150 x 3 #> Petal.Length Petal.Width Sepal.Width #> #> 1 1.4 0.2 3.5 #> 2 1.4 0.2 3 #> 3 1.3 0.2 3.2 #> 4 1.5 0.2 3.1 #> # i 146 more rows }\if{html}{\out{
}} To take the difference between two selections, combine the \code{&} and \code{!} operators: \if{html}{\out{
}}\preformatted{iris |> select(starts_with("Petal") & !ends_with("Width")) #> # A tibble: 150 x 1 #> Petal.Length #> #> 1 1.4 #> 2 1.4 #> 3 1.3 #> 4 1.5 #> # i 146 more rows }\if{html}{\out{
}} } } \examples{ data(pbmc_small) pbmc_small |> select(cell, orig.ident) } \seealso{ Other single table verbs: \code{\link[dplyr]{arrange}()}, \code{\link[dplyr]{filter}()}, \code{\link[dplyr]{mutate}()}, \code{\link[dplyr]{reframe}()}, \code{\link[dplyr]{rename}()}, \code{\link[dplyr]{slice}()}, \code{\link[dplyr]{summarise}()} } ================================================ FILE: man/separate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R \name{separate} \alias{separate} \alias{separate.Seurat} \title{Separate a character column into multiple columns with a regular expression or numeric locations} \usage{ \method{separate}{Seurat}( data, col, into, sep = "[^[:alnum:]]+", remove = TRUE, convert = FALSE, extra = "warn", fill = "warn", ... ) } \arguments{ \item{data}{A data frame.} \item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.} \item{into}{Names of new variables to create as character vector. Use \code{NA} to omit the variable in the output.} \item{sep}{Separator between columns. If character, \code{sep} is interpreted as a regular expression. The default value is a regular expression that matches any sequence of non-alphanumeric values. If numeric, \code{sep} is interpreted as character positions to split at. Positive values start at 1 at the far-left of the string; negative value start at -1 at the far-right of the string. The length of \code{sep} should be one less than \code{into}.} \item{remove}{If \code{TRUE}, remove input column from output data frame.} \item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with \code{as.is = TRUE} on new columns. This is useful if the component columns are integer, numeric or logical. NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.} \item{extra}{If \code{sep} is a character vector, this controls what happens when there are too many pieces. There are three valid options: \itemize{ \item \code{"warn"} (the default): emit a warning and drop extra values. \item \code{"drop"}: drop any extra values without a warning. \item \code{"merge"}: only splits at most \code{length(into)} times }} \item{fill}{If \code{sep} is a character vector, this controls what happens when there are not enough pieces. There are three valid options: \itemize{ \item \code{"warn"} (the default): emit a warning and fill from the right \item \code{"right"}: fill with missing values on the right \item \code{"left"}: fill with missing values on the left }} \item{...}{Additional arguments passed on to methods.} } \value{ `tidyseurat` } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}} \code{separate()} has been superseded in favour of \code{\link[tidyr:separate_wider_position]{separate_wider_position()}} and \code{\link[tidyr:separate_wider_delim]{separate_wider_delim()}} because the two functions make the two uses more obvious, the API is more polished, and the handling of problems is better. Superseded functions will not go away, but will only receive critical bug fixes. Given either a regular expression or a vector of character positions, \code{separate()} turns a single character column into multiple columns. } \examples{ data(pbmc_small) un <- pbmc_small |> unite("new_col", c(orig.ident, groups)) un |> separate(new_col, c("orig.ident", "groups")) } \seealso{ \code{\link[tidyr:unite]{unite()}}, the complement, \code{\link[tidyr:extract]{extract()}} which uses regular expression capturing groups. } ================================================ FILE: man/slice.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{slice} \alias{slice} \alias{slice.Seurat} \alias{slice_head} \alias{slice_tail} \alias{slice_sample} \alias{slice_min} \alias{slice_max} \alias{slice_sample.Seurat} \alias{slice_head.Seurat} \alias{slice_tail.Seurat} \alias{slice_min.Seurat} \alias{slice_max.Seurat} \title{Subset rows using their positions} \usage{ \method{slice}{Seurat}(.data, ..., .by = NULL, .preserve = FALSE) \method{slice_sample}{Seurat}( .data, ..., n = NULL, prop = NULL, by = NULL, weight_by = NULL, replace = FALSE ) \method{slice_head}{Seurat}(.data, ..., n, prop, by = NULL) \method{slice_tail}{Seurat}(.data, ..., n, prop, by = NULL) \method{slice_min}{Seurat}( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) \method{slice_max}{Seurat}( .data, order_by, ..., n, prop, by = NULL, with_ties = TRUE, na_rm = FALSE ) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}> Integer row values. Provide either positive values to keep, or negative values to drop. The values provided must be either all positive or all negative. Indices beyond the number of rows in the input are silently ignored. For \verb{slice_*()}, these arguments are passed on to methods.} \item{.by, by}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.} \item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the resulting data, otherwise the grouping is kept as is.} \item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the proportion of rows to select. If neither are supplied, \code{n = 1} will be used. If \code{n} is greater than the number of rows in the group (or \code{prop > 1}), the result will be silently truncated to the group size. \code{prop} will be rounded towards zero to generate an integer number of rows. A negative value of \code{n} or \code{prop} will be subtracted from the group size. For example, \code{n = -2} with a group of 5 rows will select 5 - 2 = 3 rows; \code{prop = -0.25} with 8 rows will select 8 * (1 - 0.25) = 6 rows.} \item{weight_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Sampling weights. This must evaluate to a vector of non-negative numbers the same length as the input. Weights are automatically standardised to sum to 1. See the \code{Details} section for more technical details regarding these weights.} \item{replace}{Should sampling be performed with (\code{TRUE}) or without (\code{FALSE}, the default) replacement.} \item{order_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variable or function of variables to order by. To order by multiple variables, wrap them in a data frame or tibble.} \item{with_ties}{Should ties be kept together? The default, \code{TRUE}, may return more rows than you request. Use \code{FALSE} to ignore ties, and return the first \code{n} rows.} \item{na_rm}{Should missing values in \code{order_by} be removed from the result? If \code{FALSE}, \code{NA} values are sorted to the end (like in \code{\link[dplyr:arrange]{arrange()}}), so they will only be included if there are insufficient non-missing values to reach \code{n}/\code{prop}.} } \value{ An object of the same type as \code{.data}. The output has the following properties: \itemize{ \item Each row may appear 0, 1, or many times in the output. \item Columns are not modified. \item Groups are not modified. \item Data frame attributes are preserved. } } \description{ \code{slice()} lets you index rows by their (integer) locations. It allows you to select, remove, and duplicate rows. It is accompanied by a number of helpers for common use cases: \itemize{ \item \code{slice_head()} and \code{slice_tail()} select the first or last rows. \item \code{slice_sample()} randomly selects rows. \item \code{slice_min()} and \code{slice_max()} select rows with the smallest or largest values of a variable. } If \code{.data} is a \link[dplyr]{grouped_df}, the operation will be performed on each group, so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in each group. } \details{ Slice does not work with relational databases because they have no intrinsic notion of row order. If you want to perform the equivalent operation, use \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:row_number]{row_number()}}. For \code{slice_sample()}, note that the weights provided in \code{weight_by} are passed through to the \code{prob} argument of \code{\link[base:sample]{base::sample.int()}}. This means they cannot be used to reconstruct summary statistics from the underlying population. See \href{https://stats.stackexchange.com/q/639211/}{this discussion} for more details. } \section{Methods}{ These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } These function are \strong{generic}s, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. Methods available in currently loaded packages: \itemize{ \item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}. \item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}. \item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}. \item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}. \item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}. \item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}. } } \examples{ data(pbmc_small) pbmc_small |> slice(1) # Slice group-wise using .by pbmc_small |> slice(1:2, .by=groups) # slice_sample() allows you to random select with or without replacement pbmc_small |> slice_sample(n=5) # if using replacement, and duplicate cells are returned, a tibble will be # returned because duplicate cells cannot exist in Seurat objects pbmc_small |> slice_sample(n=1, replace=TRUE) # returns Seurat pbmc_small |> slice_sample(n=100, replace=TRUE) # returns tibble # weight by a variable pbmc_small |> slice_sample(n=5, weight_by=nCount_RNA) # sample by group pbmc_small |> slice_sample(n=5, by=groups) # sample using proportions pbmc_small |> slice_sample(prop=0.10) # First rows based on existing order pbmc_small |> slice_head(n=5) # Last rows based on existing order pbmc_small |> slice_tail(n=5) # Rows with minimum and maximum values of a metadata variable pbmc_small |> slice_min(nFeature_RNA, n=5) # slice_min() and slice_max() may return more rows than requested # in the presence of ties. pbmc_small |> slice_min(nFeature_RNA, n=2) # Use with_ties=FALSE to return exactly n matches pbmc_small |> slice_min(nFeature_RNA, n=2, with_ties=FALSE) # Or use additional variables to break the tie: pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n=2) # Use by for group-wise operations pbmc_small |> slice_min(nFeature_RNA, n=5, by=groups) # Rows with minimum and maximum values of a metadata variable pbmc_small |> slice_max(nFeature_RNA, n=5) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{summarise}()} } \concept{single table verbs} ================================================ FILE: man/summarise.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dplyr_methods.R \name{summarise} \alias{summarise} \alias{summarise.Seurat} \alias{summarize} \alias{summarize.Seurat} \title{Summarise each group down to one row} \usage{ \method{summarise}{Seurat}(.data, ...) \method{summarize}{Seurat}(.data, ...) } \arguments{ \item{.data}{A data frame, data frame extension (e.g. a tibble), or a lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for more details.} \item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of summary functions. The name will be the name of the variable in the result. The value can be: \itemize{ \item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}. \item A data frame with 1 row, to add multiple columns from a single expression. }} } \value{ An object \emph{usually} of the same type as \code{.data}. \itemize{ \item The rows come from the underlying \code{\link[dplyr:group_keys]{group_keys()}}. \item The columns are a combination of the grouping keys and the summary expressions that you provide. \item The grouping structure is controlled by the \verb{.groups=} argument, the output may be another \link[dplyr]{grouped_df}, a \link[dplyr]{tibble} or a \link[dplyr]{rowwise} data frame. \item Data frame attributes are \strong{not} preserved, because \code{summarise()} fundamentally creates a new data frame. } } \description{ \code{summarise()} creates a new data frame. It returns one row for each combination of grouping variables; if there are no grouping variables, the output will have a single row summarising all observations in the input. It will contain one column for each grouping variable and one column for each of the summary statistics that you have specified. \code{summarise()} and \code{summarize()} are synonyms. } \section{Useful functions}{ \itemize{ \item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}} \item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}} \item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \item Position: \code{\link[dplyr:first]{first()}}, \code{\link[dplyr:last]{last()}}, \code{\link[dplyr:nth]{nth()}}, \item Count: \code{\link[dplyr:n]{n()}}, \code{\link[dplyr:n_distinct]{n_distinct()}} \item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}} } } \section{Backend variations}{ The data frame backend supports creating a variable and using it in the same summary. This means that previously created summary variables can be further transformed or combined within the summary, as in \code{\link[dplyr:mutate]{mutate()}}. However, it also means that summary variables with the same names as previous variables overwrite them, making those variables unavailable to later summary variables. This behaviour may not be supported in other backends. To avoid unexpected results, consider using new names for your summary variables, especially when creating multiple summaries. } \section{Methods}{ This function is a \strong{generic}, which means that packages can provide implementations (methods) for other classes. See the documentation of individual methods for extra arguments and differences in behaviour. The following methods are currently available in loaded packages: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}. } \examples{ data(pbmc_small) pbmc_small |> summarise(mean(nCount_RNA)) } \seealso{ Other single table verbs: \code{\link{arrange}()}, \code{\link{mutate}()}, \code{\link{rename}()}, \code{\link{slice}()} } \concept{single table verbs} ================================================ FILE: man/tbl_format_header.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/print_method.R \name{tbl_format_header} \alias{tbl_format_header} \alias{tbl_format_header.tidySeurat} \title{Format the header of a tibble} \usage{ \method{tbl_format_header}{tidySeurat}(x, setup, ...) } \arguments{ \item{x}{A tibble-like object.} \item{setup}{A setup object returned from \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.} \item{...}{These dots are for future extensions and must be empty.} } \value{ A character vector. } \description{ \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} For easier customization, the formatting of a tibble is split into three components: header, body, and footer. The \code{tbl_format_header()} method is responsible for formatting the header of a tibble. Override this method if you need to change the appearance of the entire header. If you only need to change or extend the components shown in the header, override or extend \code{\link[pillar:tbl_sum]{tbl_sum()}} for your class which is called by the default method. } \examples{ # TODO } ================================================ FILE: man/tidy.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/methods.R \name{tidy} \alias{tidy} \alias{tidy.Seurat} \title{tidy for Seurat objects} \usage{ \method{tidy}{Seurat}(x, ...) } \arguments{ \item{x}{A Seurat object} \item{...}{Additional arguments (not used)} } \value{ A tidyseurat object } \description{ tidy for Seurat objects } ================================================ FILE: man/unite.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R \name{unite} \alias{unite} \alias{unite.Seurat} \title{Unite multiple columns into one by pasting strings together} \usage{ \method{unite}{Seurat}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE) } \arguments{ \item{data}{A data frame.} \item{col}{The name of the new column, as a string or symbol. This argument is passed by expression and supports \link[rlang:topic-inject]{quasiquotation} (you can unquote strings and symbols). The name is captured from the expression with \code{\link[rlang:defusing-advanced]{rlang::ensym()}} (note that this kind of interface where symbols do not represent actual objects is now discouraged in the tidyverse; we support it here for backward compatibility).} \item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite} \item{sep}{Separator to use between values.} \item{remove}{If \code{TRUE}, remove input columns from output data frame.} \item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting each value.} } \value{ `tidyseurat` } \description{ Convenience function to paste together multiple columns into one. } \examples{ data(pbmc_small) pbmc_small |> unite( col="new_col", c("orig.ident", "groups")) } \seealso{ \code{\link[tidyr:separate]{separate()}}, the complement. } ================================================ FILE: man/unnest.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/tidyr_methods.R \name{unnest} \alias{unnest} \alias{unnest.tidyseurat_nested} \alias{unnest_seurat} \title{Unnest a list-column of data frames into rows and columns} \usage{ \method{unnest}{tidyseurat_nested}( data, cols, ..., keep_empty = FALSE, ptype = NULL, names_sep = NULL, names_repair = "check_unique", .drop, .id, .sep, .preserve ) unnest_seurat( data, cols, ..., keep_empty = FALSE, ptype = NULL, names_sep = NULL, names_repair = "check_unique", .drop, .id, .sep, .preserve ) } \arguments{ \item{data}{A data frame.} \item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> List-columns to unnest. When selecting multiple columns, values from the same row will be recycled to their common size.} \item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: previously you could write \code{df |> unnest(x, y, z)}. Convert to \code{df |> unnest(c(x, y, z))}. If you previously created a new variable in \code{unnest()} you'll now need to do it explicitly with \code{mutate()}. Convert \code{df |> unnest(y = fun(x, y, z))} to \code{df |> mutate(y = fun(x, y, z)) |> unnest(y)}.} \item{keep_empty}{By default, you get one row of output for each element of the list that you are unchopping/unnesting. This means that if there's a size-0 element (like \code{NULL} or an empty data frame or vector), then that entire row will be dropped from the output. If you want to preserve all rows, use \code{keep_empty = TRUE} to replace size-0 elements with a single row of missing values.} \item{ptype}{Optionally, a named list of column name-prototype pairs to coerce \code{cols} to, overriding the default that will be guessed from combining the individual values. Alternatively, a single empty ptype can be supplied, which will be applied to all \code{cols}.} \item{names_sep}{If \code{NULL}, the default, the outer names will come from the inner names. If a string, the outer names will be formed by pasting together the outer and the inner column names, separated by \code{names_sep}.} \item{names_repair}{Used to check that output data frame has valid names. Must be one of the following options: \itemize{ \item \verb{"minimal}": no name repair or checks, beyond basic existence, \item \verb{"unique}": make sure names are unique and not empty, \item \verb{"check_unique}": (the default), no name repair, but check they are unique, \item \verb{"universal}": make the names unique and syntactic \item a function: apply custom name repair. \item \link[tidyr]{tidyr_legacy}: use the name repair from tidyr 0.8. \item a formula: a purrr-style anonymous function (see \code{\link[rlang:as_function]{rlang::as_function()}}) } See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more details on these terms and the strategies used to enforce them.} \item{.drop, .preserve}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: all list-columns are now preserved; If there are any that you don't want in the output use \code{select()} to remove them prior to unnesting.} \item{.id}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: convert \code{df |> unnest(x, .id = "id")} to \verb{df |> mutate(id = names(x)) |> unnest(x))}.} \item{.sep}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}: use \code{names_sep} instead.} } \value{ `tidyseurat` } \description{ Unnest expands a list-column containing data frames into rows and columns. } \section{New syntax}{ tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's designed to be more similar to other functions. Converting to the new syntax should be straightforward (guided by the message you'll receive) but if you just need to run an old analysis, you can easily revert to the previous behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows: \if{html}{\out{
}}\preformatted{library(tidyr) nest <- nest_legacy unnest <- unnest_legacy }\if{html}{\out{
}} } \examples{ data(pbmc_small) pbmc_small |> nest(data=-groups) |> unnest(data) } \seealso{ Other rectangling: \code{\link[tidyr]{hoist}()}, \code{\link[tidyr]{unnest_longer}()}, \code{\link[tidyr]{unnest_wider}()} } ================================================ FILE: tests/testthat/test-dplyr.R ================================================ context('dplyr test') library(Seurat) data("pbmc_small") set.seed(42) test_that("arrange", { pbmc_small |> arrange(nFeature_RNA) |> expect_warning(regexp = "`arrange\\(\\)` was deprecated in tidyseurat .*") # pbmc_small_pca_arranged <- pbmc_small |> arrange(nFeature_RNA) |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA() # pbmc_small_pca <- pbmc_small |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA() # expect_equal( # Seurat::VariableFeatures(pbmc_small_pca_arranged), # Seurat::VariableFeatures(pbmc_small_pca) # ) # # Failing only for ATLAS CRAN, but succeding for the rest # expect_equal( # pbmc_small_pca_arranged[["pca"]]@cell.embeddings, # pbmc_small_pca[["pca"]]@cell.embeddings, # tolerance=0.1 # ) # expect_equal( # pbmc_small_pca_arranged |> as_tibble() |>dplyr::slice_head(n = 1), # pbmc_small_pca |> as_tibble() |> dplyr::slice_min(nFeature_RNA, n = 1) # ) }) test_that("bind_cols", { pbmc_small_bind <- pbmc_small |> select(nCount_RNA, nFeature_RNA) pbmc_small |> ttservice::bind_cols(pbmc_small_bind) |> select(nCount_RNA...2, nFeature_RNA...3) |> ncol() |> expect_equal(2) }) test_that("distinct", { expect_equal(pbmc_small |> distinct(groups) |> ncol(), 1) }) test_that("filter", { expect_equal( pbmc_small |> filter(groups == "g1") |> ncol(), sum(pbmc_small[[]]$groups == "g1") ) }) test_that("group_by", { expect_equal( pbmc_small |> group_by(groups) |> nrow(), nrow(pbmc_small[[]]) ) }) test_that("summarise", { expect_equal(pbmc_small |> summarise(mean(nCount_RNA)) |> nrow(), 1) }) test_that("mutate", { expect_equal(pbmc_small |> mutate(nFeature_RNA = 1) |> distinct(nFeature_RNA) |> nrow(), 1) }) test_that("rename", { expect_equal(pbmc_small |> rename(s_score = nFeature_RNA) |> select(s_score) |> ncol(), 1) }) test_that("left_join", { expect_equal( pbmc_small |> left_join(pbmc_small |> distinct(groups) |> mutate(new_column = 1:2) |> slice(1)) |> ncol(), nrow(pbmc_small[[]]) ) }) test_that("inner_join", { expect_equal( pbmc_small |> inner_join(pbmc_small |> distinct(groups) |> mutate(new_column = 1:2) |> slice(1)) |> ncol(), sum(pbmc_small[[]]$groups == "g2") ) }) test_that("right_join", { expect_equal( pbmc_small |> right_join(pbmc_small |> distinct(groups) |> mutate(new_column = 1:2) |> slice(1)) |> ncol(), sum(pbmc_small[[]]$groups == "g2") ) }) test_that("full_join", { expect_equal( pbmc_small |> full_join(tibble::tibble(groups = "g1", other = 1:4)) |> nrow(), sum(pbmc_small[[]]$groups == "g1") * 4 + sum(pbmc_small[[]]$groups == "g2") ) }) test_that("slice", { expect_equal(pbmc_small |> slice(1) |> ncol(), 1) expect_equal( pbmc_small |> slice(1:6) |> colnames(), colnames(pbmc_small) |> head(6)) }) test_that("sample_n", { expect_equal(pbmc_small |> sample_n(50) |> ncol(), 50) expect_equal( pbmc_small |> sample_n(500, replace = TRUE) |> ncol(), pbmc_small |> as_tibble() |> ncol() ) }) test_that("slice_sample", { pbmc_small |> slice_sample(n = 50) |> ncol() |> expect_equal(50) }) test_that("slice_head", { pbmc_small |> slice_head(n = 50) |> ncol() |> expect_equal(50) expect_equal( colnames(pbmc_small) |> head(n = 50), pbmc_small |> slice_head(n = 50) |> colnames() ) }) test_that("slice_tail", { pbmc_small |> slice_tail(n = 50) |> ncol() |> expect_equal(50) expect_equal( colnames(pbmc_small) |> tail(n = 50), pbmc_small |> slice_tail(n = 50) |> colnames() ) }) test_that("slice_min", { pbmc_small |> slice_min(nFeature_RNA, n = 5) |> ncol() |> expect_equal(5) # Arrange is deprecated # expect_equal( # pbmc_small |> as_tibble() |> arrange(nFeature_RNA) |> head(n = 5) %>% pull(.cell), # pbmc_small |> slice_min(nFeature_RNA, n = 5) |> colnames() # ) }) test_that("slice_max", { pbmc_small |> slice_max(nFeature_RNA, n = 5) |> ncol() |> expect_equal(5) # Arrange is deprecated # expect_equal( # pbmc_small |> as_tibble() |> arrange(desc(nFeature_RNA)) |> head(n = 5) %>% pull(.cell), # pbmc_small |> slice_max(nFeature_RNA, n = 5) |> colnames() # ) }) test_that("slice_min slice_max tibble input for order_by", { pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n = 5) |> ncol() |> expect_equal(5) pbmc_small |> slice_max(tibble::tibble(nFeature_RNA, nCount_RNA), n = 5) |> ncol() |> expect_equal(5) }) test_that("select", { expect_equal(pbmc_small |> select(cell, orig.ident) |> class() |> as.character(), "Seurat") expect_equal(pbmc_small |> select(orig.ident) |> class() |> as.character() |> purrr::pluck(1), "tbl_df") }) test_that("sample_frac", { expect_equal( pbmc_small |> sample_frac(0.1) |> ncol(), nrow(pbmc_small[[]]) * 0.1 ) expect_equal( pbmc_small |> sample_frac(10, replace = TRUE) |> ncol(), pbmc_small |> as_tibble() |> ncol() ) }) test_that("count", { expect_equal( pbmc_small |> count(groups) |> nrow(), pbmc_small[[]]$groups |> unique() |> length() ) }) test_that("add_count", { expect_equal( pbmc_small |> add_count(groups) |> nrow(), pbmc_small |> rownames() |> length() ) }) test_that("rowwise", { expect_equal( pbmc_small |> rowwise() |> mutate(m = mean(c(nCount_RNA, nFeature_RNA))) |> purrr::pluck("m", 1), ((pbmc_small[, 1]$nCount_RNA + pbmc_small[, 1]$nFeature_RNA) / 2) |> unname() ) }) test_that("group_split() works for one variable", { fd <- pbmc_small |> group_split(groups) expect_equal(length(fd), length(unique(pbmc_small$groups))) }) test_that("group_split() works for combination of variables", { fd <- pbmc_small |> group_split(groups, letter.idents) expect_equal(length(fd), length(unique(pbmc_small$groups)) * length(unique(pbmc_small$letter.idents))) }) test_that("group_split() works for one logical statement", { fd_log <- pbmc_small |> group_split(groups=="g1") fd_var <- pbmc_small |> group_split(groups=="g1") expect_equal(lapply(fd_var, count), lapply(fd_log, count)) }) test_that("group_split() works for two logical statements", { fd <- pbmc_small |> group_split(PC_1>0 & groups=="g1") fd_counts <- lapply(fd, count) expect_equal(c(fd_counts[[1]], fd_counts[[2]], use.names = FALSE), list(75, 5)) }) ================================================ FILE: tests/testthat/test-ggplotly_methods.R ================================================ context('ggplot test') data("pbmc_small") df <- pbmc_small df$number <- rnorm(ncol(df)) df$factor <- sample(gl(3, 1, ncol(df))) test_that("ggplot", { # cell metadata p <- ggplot(df, aes(factor, number)) expect_silent(show(p)) expect_s3_class(p, "ggplot") # assay data g <- sample(rownames(df), 1) fd <- join_features(df, g, shape="wide") p <- ggplot(fd, aes(factor, .data[[g]])) expect_silent(show(p)) expect_s3_class(p, "ggplot") # reduced dimensions p <- ggplot(df, aes(PC_1, PC_2, col=factor)) expect_silent(show(p)) expect_s3_class(p, "ggplot") }) test_that("plotly", { # cell metadata p <- plot_ly(df, x=~factor, y=~number, type="violin") expect_silent(show(p)) expect_s3_class(p, "plotly") # assay data g <- sample(rownames(df), 1) fd <- join_features(df, g, shape="wide") p <- plot_ly(fd, x=~factor, y=g, type="violin") expect_silent(show(p)) expect_s3_class(p, "plotly") # reduced dimensions p <- plot_ly(fd, x=~PC_1, y=~PC_2, type="scatter", mode="markers") expect_silent(show(p)) expect_s3_class(p, "plotly") }) ================================================ FILE: tests/testthat/test-methods.R ================================================ context('methods test') data("pbmc_small") test_that("join_features_long", { pbmc_small |> join_features("CD3D", shape="long") |> slice(1) |> pull(.abundance_RNA) |> expect_equal(6.35, tolerance = 0.1) }) test_that("join_features_wide", { pbmc_small |> join_features("CD3D", shape="wide") |> slice(1) |> pull(CD3D) |> expect_equal(6.35, tolerance = 0.1) }) test_that("join_features_default_wide", { pbmc_small |> join_features("CD3D") |> slice(1) |> pull(CD3D) |> expect_equal(6.35, tolerance = 0.1) }) test_that("aggregate_cells() returns expected values", { # Create pseudo-bulk object for testing pbmc_pseudo_bulk <- pbmc_small |> aggregate_cells(c(groups, letter.idents), assays = "RNA") # Check row length is unchanged pbmc_pseudo_bulk |> distinct(.feature) |> nrow() |> expect_equal(pbmc_small |> nrow()) # Check column length is correctly modified pbmc_pseudo_bulk |> distinct(.sample) |> nrow() |> expect_equal(pbmc_small |> as_tibble() |> select(groups, letter.idents) |> unique() |> nrow() ) # Spot check for correctly aggregated count value of ACAP1 gene pbmc_pseudo_bulk |> filter(.feature == "ACAP1" & .sample == "g1___A") |> select(RNA) |> as.numeric() |> expect_equal( Seurat::DietSeurat(pbmc_small, assays = "RNA", features = "ACAP1")[, pbmc_small |> as_tibble() |> filter(groups == "g1", letter.idents == "A") |> pull(.cell)] |> LayerData() |> sum()) # Aggregate with tidyselect pbmc_small |> aggregate_cells(c(any_of("groups"), letter.idents), assays = "RNA") |> expect_no_error() }) test_that("get_abundance_sc_wide", { expect_equal( pbmc_small |> get_abundance_sc_wide() |> nrow(), pbmc_small[[]] |> nrow() ) expect_equal( pbmc_small |> get_abundance_sc_wide() |> pull("S100A9") |> sum(), pbmc_small |> FetchData("S100A9") |> sum(), tolerance = 0.1 ) }) test_that("get_abundance_sc_long", { expect_equal(pbmc_small |> get_abundance_sc_long() |> ncol(), 3) expect_equal( pbmc_small |> get_abundance_sc_long() |> filter(.feature == "S100A9") |> pull(".abundance_RNA") |> sum(), pbmc_small |> FetchData("S100A9") |> sum(), tolerance = 0.1 ) }) ================================================ FILE: tests/testthat/test-pillar.R ================================================ context('pillar test') test_string <- "A small string to test the function of pillar utilities." test_that("pillar___format_comment", { test_string |> pillar___format_comment(width = 20) |> stringr::str_count("# ") |> expect_equal(5) }) test_that("pillar___strwrap2", { test_string |> pillar___strwrap2(width = 20, indent = 4) |> stringr::str_count(" ") |> expect_equal(c(0, 1, 1, 1, 1)) }) test_that("pillar___wrap", { test_string |> pillar___wrap(width = 20) |> stringr::str_count("\n") |> expect_equal(3) }) ================================================ FILE: tests/testthat/test-print.R ================================================ context('print test') data("pbmc_small") test_that("print", { text <- capture.output(print(pbmc_small)) expect_equal(grep("Seurat-tibble abstraction", text), 1) i <- grep(str <- ".*Features=([0-9]+).*", text) expect_equal(gsub(str, "\\1", text[i]), paste(nrow(pbmc_small))) i <- grep(str <- ".*Cells=([0-9]+).*", text) expect_equal(gsub(str, "\\1", text[i]), paste(ncol(pbmc_small))) }) test_that("glimpse", { text <- capture.output(glimpse(pbmc_small)) expect_equal(length(text), 37) }) ================================================ FILE: tests/testthat/test-tidyr.R ================================================ context('tidyr test') data("pbmc_small") tt <- GetAssayData(pbmc_small, layer = 'counts', assay = "RNA") |> CreateSeuratObject() |> mutate(groups = sprintf("g%s", rep(1:2, dplyr::n()/2))) test_that("nest_unnest", { col_names <- colnames(tt[[]]) |> c("cell") x <- tt |> nest(data = -groups) |> unnest(data) |> Seurat::NormalizeData() |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA() y <- tt |> Seurat::NormalizeData() |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA() expect_equal( x[["pca"]]@cell.embeddings |> as_tibble(rownames = "cell") |> arrange(cell) |> pull(PC_1), y[["pca"]]@cell.embeddings |> as_tibble(rownames = "cell") |> arrange(cell) |> pull(PC_1) ) }) test_that("fast_vs_slow_nest", { expect_identical( tt |> mutate(groups2 = groups) |> nest(data = -c(groups, groups2)) |> select(-groups2), tt |> nest(data = -groups) ) }) test_that("nest_unnest_slice_1", { expect_equal( tt |> nest(data = -groups) |> slice(1) |> unnest(data) |> ncol(), sum(tt[[]]$groups == "g1") ) }) test_that("unite separate", { un <- tt |> unite("new_col", c(orig.ident, groups)) se <- un |> separate(col = new_col, into = c("orig.ident", "groups")) expect_equal(un |> select(new_col) |> slice(1) |> pull(new_col), "SeuratProject_g1") expect_equal(se |> select(orig.ident) |> ncol(), 1) }) test_that("extract", { expect_equal( tt |> extract(groups, into = "g", regex = "g([0-9])", convert = TRUE) |> pull(g) |> class(), "integer" ) }) test_that("pivot_longer", { expect_equal( tt |> pivot_longer(c(orig.ident, groups), names_to = "name", values_to = "value") |> class() |> magrittr::extract2(1), "tbl_df" ) }) ================================================ FILE: tests/testthat/test-utilities.R ================================================ context('utilities test') data("pbmc_small") test_that("get_special_column_name_symbol", { expect_equal(get_special_column_name_symbol(".cell")$symbol, rlang::sym(".cell")) expect_equal(get_special_column_name_symbol(".cell")$name, c(".cell")) }) test_that("ping_old_special_column_into_metadata", { ping_old_special_column_into_metadata(pbmc_small) |> as_tibble() |> colnames() |> purrr::pluck(1) |> expect_equal("cell") }) ================================================ FILE: tests/testthat.R ================================================ library(testthat) library(tidyseurat) test_check("tidyseurat") ================================================ FILE: vignettes/figures_article.Rmd ================================================ --- title: "Code for producing the figures in the article" author: "Stefano Mangiola" date: "`r Sys.Date()`" package: tidyseurat output: html_vignette: toc_float: true vignette: > %\VignetteEngine{knitr::knitr} %\VignetteIndexEntry{Code for producing the figures in the article} %\usepackage[UTF-8]{inputenc} --- [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) ```{r include=FALSE} # Set path to plotly screenshot. We don't run the plotly code chunk as most servers do not have javascript libraries needed for interactive plotting screenshot <- "../man/figures/plotly.png" # The chunk below uses Rmd in man/fragments to avoid duplication, as the content is shared with the vignette and README. As suggested here: https://www.garrickadenbuie.com/blog/dry-vignette-and-readme/ visual_cue <- "../man/figures/logo_interaction-01.png" ``` ```{r eval=FALSE} # Article workflow library(tidyverse) library(Seurat) library(SingleR) library(plotly) library(tidyHeatmap) library(ggalluvial) library(ggplot2) library(tidyseurat) options(future.globals.maxSize = 50068 * 1024^2) # Use colourblind-friendly colours friendly_cols <- dittoSeq::dittoColors() # Set theme custom_theme <- list( scale_fill_manual(values = friendly_cols), scale_color_manual(values = friendly_cols), theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size = 9), legend.position = "bottom", strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1) ) ) PBMC_clean_scaled_UMAP_cluster_cell_type <- readRDS("dev/PBMC_clean_scaled_UMAP_cluster_cell_type.rds") ``` ```{r eval=FALSE} p1 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% pivot_longer( c(mito.fraction, S.Score, G2M.Score), names_to="property", values_to="Value" ) %>% mutate(property = factor(property, levels = c("mito.fraction", "G2M.Score", "S.Score"))) %>% ggplot(aes(sample, Value)) + geom_boxplot(outlier.size = 0.5 ) + facet_wrap(~property, scales = "free_y" ) + custom_theme + theme(aspect.ratio=1) ``` ```{r eval=FALSE} p2 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(20000) %>% ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) + geom_point(size=0.05, alpha=0.2) + custom_theme + theme(aspect.ratio=1) PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(20000) %>% plot_ly( x = ~`UMAP_1`, y = ~`UMAP_2`, z = ~`UMAP_3`, color = ~seurat_clusters, colors = friendly_cols[1:24],sizes = 50, size = 1 ) markers = readRDS("dev/PBMC_marker_df.rds") ``` ```{r eval=FALSE} p3 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% arrange(first.labels) %>% mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>% join_features(features=c("CD3D", "HLA-DRB1")) %>% ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) + geom_density_ridges(bandwidth = 0.2) + facet_wrap(~ .feature, nrow = 2) + coord_flip() + custom_theme ``` ```{r eval=FALSE} # Plot heatmap p4 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(2000) %>% DoHeatmap( features = markers$gene, group.colors = friendly_cols ) ``` ```{r eval=FALSE} p5 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% sample_n(1000) %>% join_features(features=markers$gene) %>% mutate(seurat_clusters = as.integer(seurat_clusters)) %>% filter(seurat_clusters<10) %>% group_by(seurat_clusters) %>% # Plot heatmap heatmap( .row = .feature, .column = .cell, .value = .abundance_SCT, palette_grouping = list(rep("black",9)), palette_value = circlize::colorRamp2(c(-1.5, 0, 1.5), c("purple", "black", "yellow")), # ComplexHeatmap parameters row_gap = unit(0.1, "mm"), column_gap = unit(0.1, "mm") ) %>% # Add annotation add_tile(sample, palette = friendly_cols[1:7]) %>% add_point(PC_1) ``` ```{r eval=FALSE} p6 = PBMC_clean_scaled_UMAP_cluster_cell_type %>% unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>% pivot_longer( c(seurat_clusters, first.labels_single), names_to = "classification", values_to = "value" ) %>% ggplot(aes(x = classification, stratum = value, alluvium = cell, fill = first.labels, label = value)) + scale_x_discrete(expand = c(1, 1)) + geom_flow() + geom_stratum(alpha = .5) + # geom_text(stat = "stratum", size = 3) + geom_text_repel(stat = "stratum", size = 3, nudge_x = 0.05, direction = "y", angle = 0, vjust = 0, segment.size = 0.2 ) + scale_fill_manual(values = friendly_cols) + #guides(fill = FALSE) + coord_flip() + theme_bw() + theme( panel.border = element_blank(), axis.line = element_line(), panel.grid.major = element_line(size = 0.2), panel.grid.minor = element_line(size = 0.1), text = element_text(size = 9), legend.position = "bottom", strip.background = element_blank(), axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)), axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1) ) ``` ================================================ FILE: vignettes/introduction.Rmd ================================================ --- title: "Overview of the tidyseurat package" author: "Stefano Mangiola" date: "`r Sys.Date()`" package: tidyseurat output: html_vignette: toc_float: true bibliography: tidyseurat.bib vignette: > %\VignetteEngine{knitr::knitr} %\VignetteIndexEntry{Overview of the tidyseurat package} %\usepackage[UTF-8]{inputenc} --- [![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) ```{r include=FALSE} # Set path to plotly screenshot. We don't run the plotly code chunk as most servers do not have javascript libraries needed for interactive plotting screenshot <- "../man/figures/plotly.png" # The chunk below uses Rmd in man/fragments to avoid duplication, as the content is shared with the vignette and README. As suggested here: https://www.garrickadenbuie.com/blog/dry-vignette-and-readme/ visual_cue <- "../man/figures/logo_interaction-01.png" ``` ```{r child="../man/fragments/intro.Rmd"} ``` # Session Info ```{r} sessionInfo() ``` # References ================================================ FILE: vignettes/tidyseurat.bib ================================================ @article{butler2018integrating, title={Integrating single-cell transcriptomic data across different conditions, technologies, and species}, author={Butler, Andrew and Hoffman, Paul and Smibert, Peter and Papalexi, Efthymia and Satija, Rahul}, journal={Nature biotechnology}, volume={36}, number={5}, pages={411--420}, year={2018}, publisher={Nature Publishing Group} } @article{stuart2019comprehensive, title={Comprehensive integration of single-cell data}, author={Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck III, William M and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul}, journal={Cell}, volume={177}, number={7}, pages={1888--1902}, year={2019}, publisher={Elsevier} } @article{aran2019reference, title={Reference-based analysis of lung single-cell sequencing reveals a transitional profibrotic macrophage}, author={Aran, Dvir and Looney, Agnieszka P and Liu, Leqian and Wu, Esther and Fong, Valerie and Hsu, Austin and Chak, Suzanna and Naikawadi, Ram P and Wolters, Paul J and Abate, Adam R and others}, journal={Nature immunology}, volume={20}, number={2}, pages={163--172}, year={2019}, publisher={Nature Publishing Group} } @article{cabello2020singlecellsignalr, title={SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics}, author={Cabello-Aguilar, Simon and Alame, M{\'e}lissa and Kon-Sun-Tack, Fabien and Fau, Caroline and Lacroix, Matthieu and Colinge, Jacques}, journal={Nucleic acids research}, volume={48}, number={10}, pages={e55--e55}, year={2020}, publisher={Oxford University Press} } @article{wickham2019welcome, title={Welcome to the Tidyverse}, author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others}, journal={Journal of Open Source Software}, volume={4}, number={43}, pages={1686}, year={2019} }