Repository: stemangiola/tidyseurat
Branch: master
Commit: 5a96f9573251
Files: 94
Total size: 278.9 KB

Directory structure:
gitextract_po_cxl1f/

├── .Rbuildignore
├── .coveralls.yml
├── .github/
│   ├── .gitignore
│   ├── ISSUE_TEMPLATE/
│   │   └── bug_report.md
│   └── workflows/
│       └── rworkflows.yml
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── NAMESPACE
├── R/
│   ├── attach.R
│   ├── data.R
│   ├── dplyr_methods.R
│   ├── ggplot2_methods.R
│   ├── methods.R
│   ├── methods_DEPRECATED.R
│   ├── pillar_utilities.R
│   ├── plotly_methods.R
│   ├── print_method.R
│   ├── tibble_methods.R
│   ├── tidyr_methods.R
│   ├── utilities.R
│   ├── utils-pipe.R
│   └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── data/
│   ├── cell_type_df.rda
│   └── pbmc_small_nested_interactions.rda
├── dev/
│   ├── code_comparison.Rmd
│   ├── plot_seurat_structure.R
│   ├── test_scrna_for_tidyseurat.rdata
│   ├── use_cases_BioCAsia2021.R
│   ├── workflow_article.R
│   ├── workflow_create_integrated_pbmc.R
│   └── workflow_figures.R
├── inst/
│   ├── CITATION
│   └── NEWS.rd
├── man/
│   ├── add_class.Rd
│   ├── aggregate_cells.Rd
│   ├── arrange.Rd
│   ├── as_tibble.Rd
│   ├── bind_rows.Rd
│   ├── cell_type_df.Rd
│   ├── count.Rd
│   ├── distinct.Rd
│   ├── drop_class.Rd
│   ├── extract.Rd
│   ├── filter.Rd
│   ├── formatting.Rd
│   ├── fragments/
│   │   └── intro.Rmd
│   ├── full_join.Rd
│   ├── get_abundance_sc_long.Rd
│   ├── get_abundance_sc_wide.Rd
│   ├── ggplot.Rd
│   ├── glimpse.Rd
│   ├── group_by.Rd
│   ├── group_split.Rd
│   ├── inner_join.Rd
│   ├── join_features.Rd
│   ├── join_transcripts.Rd
│   ├── left_join.Rd
│   ├── mutate.Rd
│   ├── nest.Rd
│   ├── pbmc_small_nested_interactions.Rd
│   ├── pipe.Rd
│   ├── pivot_longer.Rd
│   ├── plotly.Rd
│   ├── pull.Rd
│   ├── quo_names.Rd
│   ├── rename.Rd
│   ├── return_arguments_of.Rd
│   ├── right_join.Rd
│   ├── rowwise.Rd
│   ├── sample_n.Rd
│   ├── select.Rd
│   ├── separate.Rd
│   ├── slice.Rd
│   ├── summarise.Rd
│   ├── tbl_format_header.Rd
│   ├── tidy.Rd
│   ├── unite.Rd
│   └── unnest.Rd
├── tests/
│   ├── testthat/
│   │   ├── test-dplyr.R
│   │   ├── test-ggplotly_methods.R
│   │   ├── test-methods.R
│   │   ├── test-pillar.R
│   │   ├── test-print.R
│   │   ├── test-tidyr.R
│   │   └── test-utilities.R
│   └── testthat.R
└── vignettes/
    ├── figures_article.Rmd
    ├── introduction.Rmd
    └── tidyseurat.bib

================================================
FILE CONTENTS
================================================

================================================
FILE: .Rbuildignore
================================================
^.*\.Rproj$
^\.Rproj\.user$
^vignettes/introduction_cache$
^doc$
^Meta$
^codecov\.yml$
^dev$
^README_cache$
^README_files$
README.Rmd
^.git$
.coveralls.yml
.travis.yml
^.github$
^\.github$
_pkgdown.yml
^tidyseurat\.Rproj$


================================================
FILE: .coveralls.yml
================================================
service_name: travis-pro
repo_token: O4NscPehU4qrWznFtQRiyJJBIOyRgPzsB


================================================
FILE: .github/.gitignore
================================================
*.html


================================================
FILE: .github/ISSUE_TEMPLATE/bug_report.md
================================================
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''

---

Thanks for submitting an issue.

Please add the following information to the issue

1. Describe the issue/bug
2. Print out the input dataset immediately before the bug occurs
3. Paste the code immediately leading to the bug
4. Print out of the output, if any
5. Print out of the complete error/warning message, if any
6. sessionInfo()

Thanks!


================================================
FILE: .github/workflows/rworkflows.yml
================================================
name: rworkflows
'on':
  push:
    branches:
    - master
    - main
    - devel
    - RELEASE_**
  pull_request:
    branches:
    - master
    - main
    - devel
    - RELEASE_**
jobs:
  rworkflows:
    permissions: write-all
    runs-on: ${{ matrix.config.os }}
    name: ${{ matrix.config.os }} (${{ matrix.config.r }})
    container: ${{ matrix.config.cont }}
    strategy:
      fail-fast: ${{ false }}
      matrix:
        config:
        - os: ubuntu-latest
          bioc: devel
          r: auto
          cont: ghcr.io/bioconductor/bioconductor_docker:devel
          rspm: ~
        - os: macOS-latest
          bioc: release
          r: auto
          cont: ~
          rspm: ~
        - os: windows-latest
          bioc: release
          r: auto
          cont: ~
          rspm: ~
    steps:
    - uses: neurogenomics/rworkflows@master
      with:
        run_bioccheck: ${{ false }}
        run_rcmdcheck: ${{ true }}
        as_cran: ${{ true }}
        run_vignettes: ${{ true }}
        has_testthat: ${{ true }}
        run_covr: ${{ true }}
        run_pkgdown: ${{ true }}
        has_runit: ${{ false }}
        has_latex: ${{ false }}
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run_docker: ${{ false }}
        DOCKER_TOKEN: ${{ secrets.DOCKER_TOKEN }}
        runner_os: ${{ runner.os }}
        cache_version: cache-v1
        docker_registry: ghcr.io


================================================
FILE: .gitignore
================================================
.Rproj.user
.Rhistory
.RData
.Ruserdata
tidyseurat.Rproj
README_cache/*
vignettes/introduction_cache*
tidyseurat.Rproj
Meta
doc
dev/*csv
dev/*rds
dev/*rda
dev/*pdf
dev/dplyr-master/*
tidyseurat.Rproj
/doc/
/Meta/
..Rcheck/*


================================================
FILE: .travis.yml
================================================
# Adapted from https://github.com/hadley/testthat/blob/master/.travis.yml
# R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
language: r
cache: packages
r:
 - bioc-release
 - bioc-devel
env:
- R_QPDF=true

r_github_packages:
  - r-lib/covr

after_success:
    - tar -C .. -xf $PKG_TARBALL
    - xvfb-run Rscript -e 'covr::codecov(type=c("tests", "vignettes", "examples"))'


================================================
FILE: DESCRIPTION
================================================
Type: Package
Package: tidyseurat
Title: Brings Seurat to the Tidyverse 
Version: 0.8.9
Authors@R: c(person("Stefano", "Mangiola", email = "mangiolastefano@gmail.com",
                  role = c("aut", "cre")),
            person("Maria", "Doyle", email = "Maria.Doyle@petermac.org",
            role = c("ctb"))
                  )
Description: It creates an invisible layer that allow to see the 'Seurat' object 
    as tibble and interact seamlessly with the tidyverse.
License: GPL-3
Depends:
    R (>= 4.1.0),
    ttservice (>= 0.3.8),
    SeuratObject
Imports:
    Seurat (>= 4.3.0),
    tibble,
    dplyr (>= 1.1.4),
    magrittr,
    tidyr (>= 1.2.0),
    ggplot2,
    rlang (>= 1.0.0),
    purrr,
    lifecycle,
    methods,
    plotly,
    tidyselect,
    utils,
    vctrs,
    pillar,
    stringr,
    cli,
    fansi,
    Matrix,
    generics
Suggests:
    testthat,
    knitr,
    GGally,
    markdown,
    rbibutils
VignetteBuilder: 
    knitr
RdMacros:
    lifecycle
Biarch: true
biocViews: AssayDomain, Infrastructure, RNASeq, DifferentialExpression, GeneExpression, Normalization, Clustering, QualityControl, Sequencing, Transcription, Transcriptomics
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.3
URL: https://github.com/stemangiola/tidyseurat, https://stemangiola.github.io/tidyseurat/
BugReports: https://github.com/stemangiola/tidyseurat/issues


================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand

S3method(add_count,Seurat)
S3method(add_count,default)
S3method(arrange,Seurat)
S3method(as_tibble,Seurat)
S3method(bind_cols,Seurat)
S3method(bind_rows,Seurat)
S3method(count,Seurat)
S3method(distinct,Seurat)
S3method(extract,Seurat)
S3method(filter,Seurat)
S3method(full_join,Seurat)
S3method(ggplot,Seurat)
S3method(glimpse,tidyseurat)
S3method(group_by,Seurat)
S3method(group_split,Seurat)
S3method(inner_join,Seurat)
S3method(join_transcripts,Seurat)
S3method(join_transcripts,default)
S3method(left_join,Seurat)
S3method(mutate,Seurat)
S3method(nest,Seurat)
S3method(pivot_longer,Seurat)
S3method(plot_ly,Seurat)
S3method(plot_ly,tbl_df)
S3method(print,Seurat)
S3method(pull,Seurat)
S3method(rename,Seurat)
S3method(right_join,Seurat)
S3method(rowwise,Seurat)
S3method(sample_frac,Seurat)
S3method(sample_n,Seurat)
S3method(select,Seurat)
S3method(separate,Seurat)
S3method(slice,Seurat)
S3method(slice_head,Seurat)
S3method(slice_max,Seurat)
S3method(slice_min,Seurat)
S3method(slice_sample,Seurat)
S3method(slice_tail,Seurat)
S3method(summarise,Seurat)
S3method(summarize,Seurat)
S3method(tbl_format_header,tidySeurat)
S3method(tidy,Seurat)
S3method(unite,Seurat)
S3method(unnest,tidyseurat_nested)
export("%>%")
export(add_count)
export(get_abundance_sc_long)
export(get_abundance_sc_wide)
export(join_transcripts)
export(plot_ly)
export(unnest_seurat)
exportMethods(join_features)
importFrom(Matrix,rowSums)
importFrom(Seurat,Assays)
importFrom(Seurat,DietSeurat)
importFrom(Seurat,GetAssayData)
importFrom(Seurat,SplitObject)
importFrom(Seurat,VariableFeatures)
importFrom(SeuratObject,"DefaultAssay<-")
importFrom(SeuratObject,DefaultAssay)
importFrom(dplyr,arrange)
importFrom(dplyr,contains)
importFrom(dplyr,count)
importFrom(dplyr,distinct)
importFrom(dplyr,distinct_at)
importFrom(dplyr,everything)
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,group_by_drop_default)
importFrom(dplyr,group_rows)
importFrom(dplyr,group_split)
importFrom(dplyr,inner_join)
importFrom(dplyr,left_join)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,right_join)
importFrom(dplyr,rowwise)
importFrom(dplyr,sample_frac)
importFrom(dplyr,sample_n)
importFrom(dplyr,select)
importFrom(dplyr,select_if)
importFrom(dplyr,slice)
importFrom(dplyr,slice_head)
importFrom(dplyr,slice_max)
importFrom(dplyr,slice_min)
importFrom(dplyr,slice_sample)
importFrom(dplyr,slice_tail)
importFrom(dplyr,summarise)
importFrom(dplyr,summarize)
importFrom(dplyr,vars)
importFrom(fansi,strwrap_ctl)
importFrom(generics,tidy)
importFrom(ggplot2,aes)
importFrom(ggplot2,ggplot)
importFrom(lifecycle,deprecate_warn)
importFrom(magrittr,"%$%")
importFrom(magrittr,"%>%")
importFrom(magrittr,equals)
importFrom(methods,.hasSlot)
importFrom(methods,getMethod)
importFrom(methods,is)
importFrom(pillar,align)
importFrom(pillar,get_extent)
importFrom(pillar,style_subtle)
importFrom(pillar,tbl_format_header)
importFrom(plotly,plot_ly)
importFrom(purrr,imap)
importFrom(purrr,map)
importFrom(purrr,map2)
importFrom(purrr,map_chr)
importFrom(purrr,map_int)
importFrom(purrr,reduce)
importFrom(purrr,when)
importFrom(rlang,":=")
importFrom(rlang,check_dots_used)
importFrom(rlang,dots_values)
importFrom(rlang,enexpr)
importFrom(rlang,enquo)
importFrom(rlang,enquos)
importFrom(rlang,expr)
importFrom(rlang,flatten_if)
importFrom(rlang,is_spliced)
importFrom(rlang,names2)
importFrom(rlang,quo_name)
importFrom(rlang,quo_squash)
importFrom(rlang,sym)
importFrom(stats,setNames)
importFrom(stringr,regex)
importFrom(stringr,str_detect)
importFrom(tibble,as_tibble)
importFrom(tibble,column_to_rownames)
importFrom(tibble,enframe)
importFrom(tibble,glimpse)
importFrom(tibble,rowid_to_column)
importFrom(tidyr,extract)
importFrom(tidyr,nest)
importFrom(tidyr,pivot_longer)
importFrom(tidyr,separate)
importFrom(tidyr,spread)
importFrom(tidyr,unite)
importFrom(tidyr,unnest)
importFrom(tidyselect,eval_select)
importFrom(ttservice,aggregate_cells)
importFrom(ttservice,bind_cols)
importFrom(ttservice,bind_rows)
importFrom(ttservice,join_features)
importFrom(utils,packageDescription)
importFrom(utils,tail)
importFrom(vctrs,new_data_frame)


================================================
FILE: R/attach.R
================================================
core <- c("dplyr", "tidyr", "ttservice", "ggplot2")

core_unloaded <- function() {
    search <- paste0("package:", core)
    core[!search %in% search()]
}

# Attach the package from the same library it was loaded from before.
# [source: https://github.com/tidy-biology/tidyverse/issues/171]
same_library <- function(pkg) {
    loc <- if (pkg %in% loadedNamespaces()) 
        dirname(getNamespaceInfo(pkg, "path"))
    library(pkg, lib.loc=loc, character.only=TRUE, warn.conflicts=FALSE)
}

tidyverse_attach <- function() {
    to_load <- core_unloaded()
    
    suppressPackageStartupMessages(
        lapply(to_load, same_library))
    
    invisible(to_load)
}


================================================
FILE: R/data.R
================================================
#' Cell types of 80 PBMC single cells
#' 
#' A dataset containing the barcodes and cell types of 80 PBMC single cells.
#'
#' @format A tibble containing 80 rows and 2 columns.
#'   Cells are a subsample of the Peripheral Blood Mononuclear Cells (PBMC) 
#'   dataset of 2,700 single cell. Cell types were identified with SingleR.
#' \describe{
#'   \item{cell}{cell identifier, barcode}
#'   \item{first.labels}{cell type}
#' }
#' @source \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html}
#' @usage data(cell_type_df)
#' @return `tibble`
"cell_type_df"

#' Intercellular ligand-receptor interactions for 
#' 38 ligands from a single cell RNA-seq cluster.
#'
#' A dataset containing ligand-receptor interactions within a sample.
#' There are 38 ligands from a single cell cluster versus 35 receptors 
#' in 6 other clusters.
#'
#' @format A `tibble` containing 100 rows and 9 columns.
#'   Cells are a subsample of the PBMC dataset of 2,700 single cells. 
#'   Cell interactions were identified with `SingleCellSignalR`.
#' \describe{
#'   \item{sample}{sample identifier}
#'   \item{ligand}{cluster and ligand identifier}
#'   \item{receptor}{cluster and receptor identifier}
#'   \item{ligand.name}{ligand name}
#'   \item{receptor.name}{receptor name}
#'   \item{origin}{cluster containing ligand}
#'   \item{destination}{cluster containing receptor}
#'   \item{interaction.type}{type of interation, paracrine or autocrine}
#'   \item{LRscore}{interaction score}
#' }
#' @source \url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html}
#' @usage data(pbmc_small_nested_interactions)
#' @return `tibble`
"pbmc_small_nested_interactions"

================================================
FILE: R/dplyr_methods.R
================================================
#' @name arrange
#' @rdname arrange
#' @inherit dplyr::arrange
#' @family single table verbs
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small |>
#'     arrange(nFeature_RNA)
#' 
#' @importFrom tibble as_tibble
#' @importFrom dplyr arrange
#' @export
arrange.Seurat <- function(.data, ..., .by_group=FALSE) {
  
  # DEPRECATE
  deprecate_warn(
    when="0.7.5",
    what="arrange()",
    details="tidyseurat says: arrange() is temporarly deprected as it is not clear that Seurat allows reordering of cells."
  )
  
    # .cell_ordered <-
    #     .data %>%
    #     as_tibble() %>%
    #     dplyr::arrange(  ..., .by_group=.by_group  ) %>%
    #    pull(!!c_(.data)$symbol)
    # 
    # .data[,.cell_ordered]
  
  .data
}

#' @name bind_rows
#' @rdname bind_rows
#' @inherit ttservice::bind_rows
#'
#' @examples
#' data(pbmc_small)
#' tt <- pbmc_small
#' ttservice::bind_rows(tt, tt)
#'
#' tt_bind <- tt |> select(nCount_RNA ,nFeature_RNA)
#' tt |> ttservice::bind_cols(tt_bind)
#' 
#' @importFrom rlang dots_values
#' @importFrom rlang flatten_if
#' @importFrom rlang is_spliced
#' @importFrom ttservice bind_rows
#' @export
bind_rows.Seurat <- function(..., .id=NULL,  add.cell.ids=NULL)
{
    tts <- flatten_if(dots_values(...), is_spliced)

    # Strange error for Seurat merge
    # GetResidualSCTModel
    # close to a line as such
    # slot(object=object[[assay]], name="SCTModel.list")
    # So I have to delete any sample of size 1 if I have calculated SCT
    # if()
    # GetAssayData(object, layer='SCTModel.list', assay="SCT") %>%
    #     map(~ .x@cell.attributes %>% nrow)

    # Check if cell with same name
    merge(tts[[1]], y=tts[[2]], add.cell.ids=add.cell.ids)
}

#' @importFrom rlang flatten_if
#' @importFrom rlang is_spliced
#' @importFrom rlang dots_values
#' @importFrom ttservice bind_cols
bind_cols_ <- function(..., .id=NULL){

    tts <- flatten_if(dots_values(...), is_spliced)

    tts[[1]]@meta.data <- bind_cols(tts[[1]][[]], tts[[2]], .id=.id)

    tts[[1]]
}

#' @rdname bind_rows
#' @aliases bind_cols
#' @export
bind_cols.Seurat <- bind_cols_

#' @name distinct
#' @rdname distinct
#' @inherit dplyr::distinct
#'
#' @examples
#' data("pbmc_small")
#' pbmc_small |> distinct(groups)
#'
#' @importFrom dplyr distinct
#' @export
distinct.Seurat <- function (.data, ..., .keep_all=FALSE)
{
    message(data_frame_returned_message)

    distinct_columns <-
        (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist)

    # Deprecation of special column names
    if(is_sample_feature_deprecated_used(.data, distinct_columns)){
        .data= ping_old_special_column_into_metadata(.data)
    }

    .data %>%
        as_tibble() %>%
        dplyr::distinct(..., .keep_all=.keep_all)

}

#' @name filter
#' @rdname filter
#' @inherit dplyr::filter
#' 
#' @examples
#' data("pbmc_small")
#' pbmc_small |>  filter(groups == "g1")
#'
#' # Learn more in ?dplyr_eval
#' 
#' @importFrom purrr map
#' @importFrom dplyr filter
#' @export
filter.Seurat <- function (.data, ..., .preserve=FALSE)
{

    # Deprecation of special column names
    if(is_sample_feature_deprecated_used(
        .data,
        (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist)
    )){
        .data= ping_old_special_column_into_metadata(.data)
    }

    new_meta <- .data %>%
        as_tibble() %>%
        dplyr::filter( ..., .preserve=.preserve) %>%
        as_meta_data(.data)

    # Error if size == 0
    if(nrow(new_meta) == 0) stop("tidyseurat says: the resulting data",
        " container is empty. Seurat does not allow for empty containers.")

    new_obj <-
        subset(.data, cells=rownames(new_meta)) %>%

    # Clean empty slots
    clean_seurat_object()

    new_obj

}

#' @name group_by
#' @rdname group_by
#' @inherit dplyr::group_by
#' 
#' @examples
#' data("pbmc_small")
#' pbmc_small |>  group_by(groups)
#'
#' @importFrom dplyr group_by_drop_default
#' @importFrom dplyr group_by
#' @export
group_by.Seurat <- function (.data, ..., .add=FALSE,
    .drop=group_by_drop_default(.data))
{
    message(data_frame_returned_message)

    # Deprecation of special column names
    if(is_sample_feature_deprecated_used(
        .data,
        (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist)
    )){
        .data <- ping_old_special_column_into_metadata(.data)
    }

    .data %>%
        as_tibble() %>%
        dplyr::group_by( ..., .add=.add, .drop=.drop)

}

#' @name summarise
#' @aliases summarize
#' @inherit dplyr::summarise
#' @family single table verbs
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> summarise(mean(nCount_RNA))
#'
#' @importFrom dplyr summarise
#' @importFrom purrr map
#' @export
summarise.Seurat <- function (.data, ...) {
    message(data_frame_returned_message)

    # Deprecation of special column names
    if(is_sample_feature_deprecated_used(
        .data,
        (enquos(..., .ignore_empty="all") %>% map(~ quo_name(.x)) %>% unlist)
    )){
        .data= ping_old_special_column_into_metadata(.data)
    }

    .data %>%
        as_tibble() %>%
        dplyr::summarise( ...)

}

#' @name summarise
#' @rdname summarise
#' @importFrom dplyr summarize
#' @export
summarize.Seurat <- summarise.Seurat

#' @name mutate
#' @rdname mutate
#' @inherit dplyr::mutate
#' @family single table verbs
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small |> mutate(nFeature_RNA=1)
#'
#' @importFrom rlang enquos
#' @importFrom dplyr mutate
#' @importFrom purrr map
#' @export
mutate.Seurat <- function(.data, ...) {

    # Check that we are not modifying a key column
    cols <- enquos(...) %>% names

    # Deprecation of special column names
    .cols <- enquos(..., .ignore_empty="all") %>% 
        map(~ quo_name(.x)) %>% unlist()
    if (is_sample_feature_deprecated_used(.data, .cols)) {
        .data <- ping_old_special_column_into_metadata(.data)
    }

    .view_only_cols <- c(
        get_special_columns(.data),
        get_needed_columns(.data))
    
    .test <- cols |>
        intersect(.view_only_cols) |>
        length()

    if (.test) {
        stop("tidyseurat says:",
            " you are trying to mutate a column that is view only",
            " ", paste(.view_only_cols, collapse=", "),
            " (it is not present in the colData).",
            " If you want to mutate a view-only column, make a copy",
            " (e.g. mutate(new_column=", cols[1], ")) and mutate that one.")
    }

    .data@meta.data <-
        .data %>%
        as_tibble %>%
        dplyr::mutate( ...)  %>%
        as_meta_data(.data)

    .data
}

#' @name rename
#' @rdname rename
#' @inherit dplyr::rename
#' @family single table verbs
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small |> rename(s_score=nFeature_RNA)
#'
#' @importFrom Seurat DietSeurat
#' @importFrom tidyselect eval_select
#' @importFrom dplyr rename
#' @export
rename.Seurat <- function(.data, ...)
{

    # Check that we are not modifying a key column
    read_only_columns <- c(
        get_needed_columns(.data),
        get_special_columns(.data))

    # Small df to be more efficient
    df <- 
      DietSeurat(.data, features = rownames(.data)[1])[,1]  |>
      suppressWarnings() |> 
      as_tibble() 
    
    # What columns we are going to create
    cols_from <- tidyselect::eval_select(expr(c(...)), df) |> names()
    
    # What are the columns before renaming
    original_columns <- df |> colnames()
    
    # What the column after renaming would be
    new_colums <- df |> rename(...) |> colnames()
    
    # What column you are impacting
    changed_columns <- original_columns |> setdiff(new_colums)
    
    # Check that you are not impacting any read-only columns
    if (any(changed_columns %in% read_only_columns)) {
        stop("tidyseurat says:",
            " you are trying to rename a column that is view only",
            " ", paste(changed_columns, collapse=", "),
            " (it is not present in the colData).",
            " If you want to rename a view-only column, make a copy",
            " (e.g., mutate(", cols_from[1], "=",  changed_columns[1], ")).")
    }

    .data@meta.data <- dplyr::rename( .data[[]],  ...)

    .data
}

#' @name rowwise
#' @rdname rowwise
#' @inherit dplyr::rowwise
#'
#' @examples
#' # TODO
#'
#' @importFrom dplyr rowwise
#' @export
rowwise.Seurat <- function(data, ...) {
    message(data_frame_returned_message)

    data %>%
        as_tibble() %>%
        dplyr::rowwise(...)

}

#' @name left_join
#' @rdname left_join
#' @inherit dplyr::left_join
#'
#' @examples
#' data(pbmc_small)
#' tt <- pbmc_small
#' tt |> left_join(tt |>  
#'   distinct(groups) |> 
#'   mutate(new_column=1:2))
#'
#' @importFrom dplyr left_join
#' @importFrom dplyr count
#' @export
left_join.Seurat <- function (x, y, by=NULL, copy=FALSE,
    suffix=c(".x", ".y"), ...) {

    # Deprecation of special column names
    .cols <- if (!is.null(by)) by else colnames(y)
    if (is_sample_feature_deprecated_used(x, .cols)) {
        x <- ping_old_special_column_into_metadata(x)
    }

    x %>%
        as_tibble() %>%
        dplyr::left_join( y, by=by, copy=copy, suffix=suffix, ...) %>%

        when(

            # If duplicated cells returns tibble
            count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ {
                message(duplicated_cell_names)
                (.)
            },

            # Otherwise return updated tidyseurat
            ~ {
                x@meta.data <- (.) %>% as_meta_data(x)
                x
            }
        )

}

#' @name inner_join
#' @rdname inner_join
#' @inherit dplyr::inner_join
#'
#' @examples
#' data(pbmc_small)
#' tt <- pbmc_small
#' tt |> inner_join(tt |> 
#'   distinct(groups) |>  
#'   mutate(new_column=1:2) |> 
#'   slice(1))
#'
#' @importFrom dplyr inner_join
#' @importFrom dplyr pull
#' @export
inner_join.Seurat <- function (x, y, by=NULL, copy=FALSE,
    suffix=c(".x", ".y"), ...) {

    # Deprecation of special column names
    .cols <- if (!is.null(by)) by else colnames(y)
    if (is_sample_feature_deprecated_used(x, .cols)) {
        x <- ping_old_special_column_into_metadata(x)
    }

    x %>%
        as_tibble() %>%
        dplyr::inner_join( y, by=by, copy=copy, suffix=suffix, ...)  %>%

        when(

            # If duplicated cells returns tibble
            count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ {
                message(duplicated_cell_names)
                (.)
            },

            # Otherwise return updated tidyseurat
            ~ {
                new_obj <- subset(x, cells= pull(., c_(x)$name))
                new_obj@meta.data <- (.) %>% as_meta_data(new_obj)
                new_obj
            }
        )

}

#' @name right_join
#' @rdname right_join
#' @inherit dplyr::right_join
#'
#' @examples
#' data(pbmc_small)
#' tt <- pbmc_small
#' tt |> right_join(tt |> 
#'   distinct(groups) |> 
#'   mutate(new_column=1:2) |> 
#'   slice(1))
#'
#' @importFrom dplyr right_join
#' @importFrom dplyr pull
#' @export
right_join.Seurat <- function (x, y, by=NULL, copy=FALSE,
    suffix=c(".x", ".y"), ...) {

    # Deprecation of special column names
    .cols <- if (!is.null(by)) by else colnames(y)
    if (is_sample_feature_deprecated_used(x, .cols)) {
        x <- ping_old_special_column_into_metadata(x)
    }

    x %>%
        as_tibble() %>%
        dplyr::right_join( y, by=by, copy=copy, suffix=suffix, ...) %>%

        when(

            # If duplicated cells returns tibble
            count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ {
                message(duplicated_cell_names)
                (.)
            },

            # Otherwise return updated tidyseurat
            ~ {
                new_obj <- subset(x, cells=(.) %>% pull(c_(x)$name))
                new_obj@meta.data <- (.) %>% as_meta_data(new_obj)
                new_obj
            }
        )

}

#' @name full_join
#' @rdname full_join
#' @inherit dplyr::full_join
#'
#' @examples
#' data(pbmc_small)
#' tt <- pbmc_small
#' tt |> full_join(tibble::tibble(groups="g1", other=1:4))
#'
#' @importFrom dplyr full_join
#' @export
full_join.Seurat <- function (x, y, by=NULL, copy=FALSE,
    suffix=c(".x", ".y"), ...) {

    # Deprecation of special column names
    .cols <- if (!is.null(by)) by else colnames(y)
    if (is_sample_feature_deprecated_used(x, .cols)) {
        x <- ping_old_special_column_into_metadata(x)
    }

    x %>%
        as_tibble() %>%
        dplyr::full_join( y, by=by, copy=copy, suffix=suffix, ...) %>%

        when(

            # If duplicated cells returns tibble
            count(., !!c_(x)$symbol) %>% filter(n>1) %>% nrow %>% gt(0) ~ {
                message(duplicated_cell_names)
                (.)
            },

            # Otherwise return updated tidyseurat
            ~ {
                x@meta.data <- (.) %>% as_meta_data(x)
                x
            }
        )

}

#' @name slice
#' @rdname slice
#' @aliases slice_head slice_tail 
#'   slice_sample slice_min slice_max
#' @inherit dplyr::slice
#' @family single table verbs
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> slice(1)
#' 
#' # Slice group-wise using .by
#' pbmc_small |> slice(1:2, .by=groups)
#'
#' @importFrom dplyr slice
#' @importFrom tibble rowid_to_column
#' @export
slice.Seurat <- function (.data, ..., .by=NULL, .preserve=FALSE)
{
    row_number___ <- NULL
    idx <- .data[[]] |>
        select(-everything(), {{ .by }}) |>
        rowid_to_column(var='row_number___')  |>
        slice(..., .by={{ .by }}, .preserve=.preserve) |>
        pull(row_number___)

    if (length(idx) == 0) {
        stop("tidyseurat says: the resulting data container is empty.",
            " Seurat does not allow for empty containers.")
    }

    new_obj <- subset(.data,   cells=colnames(.data)[idx])
    new_obj
}

#' @name slice_sample
#' @rdname slice
#' @inherit dplyr::slice_sample
#' @examples
#'
#' # slice_sample() allows you to random select with or without replacement
#' pbmc_small |> slice_sample(n=5)
#'
#' # if using replacement, and duplicate cells are returned, a tibble will be
#' # returned because duplicate cells cannot exist in Seurat objects
#' pbmc_small |> slice_sample(n=1, replace=TRUE) # returns Seurat
#' pbmc_small |> slice_sample(n=100, replace=TRUE) # returns tibble
#'
#' # weight by a variable
#' pbmc_small |> slice_sample(n=5, weight_by=nCount_RNA)
#'
#' # sample by group
#' pbmc_small |> slice_sample(n=5, by=groups)
#'
#' # sample using proportions
#' pbmc_small |> slice_sample(prop=0.10)
#'
#' @importFrom dplyr slice_sample
#' @export
slice_sample.Seurat <- function(.data, ..., n=NULL,
    prop=NULL, by=NULL, weight_by=NULL, replace=FALSE) {

    # Solve CRAN NOTES
    cell <- NULL
    . <- NULL

    lifecycle::signal_superseded("1.0.0", "sample_n()", "slice_sample()")

    if (!is.null(n))
        new_meta <-
            .data[[]] |>
            as_tibble(rownames=c_(.data)$name) |>
            select(-everything(), c_(.data)$name, {{ by }}, {{ weight_by }}) |>
            slice_sample(..., n=n, by={{ by }},
                weight_by={{ weight_by }}, replace=replace)
    else if (!is.null(prop))
        new_meta <-
            .data[[]] |>
            as_tibble(rownames=c_(.data)$name) |>
            select(-everything(), c_(.data)$name, {{ by }}, {{ weight_by }}) |>
            slice_sample(..., prop=prop, by={{ by }},
                weight_by={{ weight_by }}, replace=replace)
    else
        stop("tidyseurat says: you should provide `n` or `prop` arguments")

    count_cells <- new_meta %>%
        select(!!c_(.data)$symbol) %>%
        count(!!c_(.data)$symbol)
    .max_cell_count <- ifelse(nrow(count_cells)==0, 0, max(count_cells$n))

    # If repeated cells due to replacement
    if (.max_cell_count |> gt(1)){
        message("tidyseurat says: When sampling with replacement",
            " a data frame is returned for independent data analysis.")
        .data |>
            as_tibble()  |>
            right_join(new_meta %>% 
                select(!!c_(.data)$symbol), by=c_(.data)$name)
    } else {
        new_obj <- subset(.data, cells=new_meta %>% pull(!!c_(.data)$symbol))
        new_obj
    }
}

#' @name slice_head
#' @rdname slice
#' @inherit dplyr::slice_head
#' @examples
#'
#' # First rows based on existing order
#' pbmc_small |> slice_head(n=5)
#' 
#' @importFrom dplyr slice_head
#' @importFrom tibble rowid_to_column
#' @export
slice_head.Seurat <- function(.data, ..., n, prop, by=NULL) {
    row_number___ <- NULL
    idx <- .data[[]] |>
        select(-everything(), {{ by }}) |>
        rowid_to_column(var='row_number___')  |>
        slice_head(..., n=n, prop=prop, by={{ by }}) |>
        pull(row_number___)

    if (length(idx) == 0) {
        stop("tidyseurat says: the resulting data container is empty.",
            " Seurat does not allow for empty containers.")
    }
    new_obj <- subset(.data, cells=colnames(.data)[idx])
    new_obj
}

#' @name slice_tail
#' @rdname slice
#' @inherit dplyr::slice_tail
#' @examples
#'
#' # Last rows based on existing order
#' pbmc_small |> slice_tail(n=5)
#' 
#' @importFrom dplyr slice_tail
#' @importFrom tibble rowid_to_column
#' @export
slice_tail.Seurat <- function(.data, ..., n, prop, by=NULL) {
    row_number___ <- NULL
    idx <- .data[[]] |>
        select(-everything(), {{ by }}) |>
        rowid_to_column(var='row_number___')  |>
        slice_tail(..., n=n, prop=prop, by={{ by }}) |>
        pull(row_number___)

    if (length(idx) == 0) {
        stop("tidyseurat says: the resulting data container is empty.",
            " Seurat does not allow for empty containers.")
    }

    new_obj <- subset(.data, cells=colnames(.data)[idx])
    new_obj
}

#' @name slice_min
#' @rdname slice
#' @inherit dplyr::slice_min
#' @examples
#'
#' # Rows with minimum and maximum values of a metadata variable
#' pbmc_small |> slice_min(nFeature_RNA, n=5)
#'
#' # slice_min() and slice_max() may return more rows than requested
#' # in the presence of ties.
#' pbmc_small |>  slice_min(nFeature_RNA, n=2)
#'
#' # Use with_ties=FALSE to return exactly n matches
#' pbmc_small |> slice_min(nFeature_RNA, n=2, with_ties=FALSE)
#'
#' # Or use additional variables to break the tie:
#' pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n=2)
#'
#' # Use by for group-wise operations
#' pbmc_small |> slice_min(nFeature_RNA, n=5, by=groups)
#'
#' @importFrom dplyr slice_min
#' @importFrom tibble rowid_to_column
#' @export
slice_min.Seurat <- function(.data, order_by, ..., n, prop,
    by=NULL, with_ties=TRUE, na_rm=FALSE) {
    row_number___ <- NULL
    order_by_variables <- return_arguments_of(!!enexpr(order_by))

    idx <- .data[[]] |>
        select(-everything(), !!!order_by_variables, {{ by }}) |>
        rowid_to_column(var ='row_number___')  |>
        slice_min(
            order_by={{ order_by }}, ..., n=n, prop=prop, by={{ by }},
            with_ties=with_ties, na_rm=na_rm
        ) |>
        pull(row_number___)

    if (length(idx) == 0) {
        stop("tidyseurat says: the resulting data container is empty.",
            " Seurat does not allow for empty containers.")
    }

    new_obj <- subset(.data, cells=colnames(.data)[idx])
    new_obj
}

#' @name slice_max
#' @rdname slice
#' @inherit dplyr::slice_max
#' @examples
#'
#' # Rows with minimum and maximum values of a metadata variable
#' pbmc_small |> slice_max(nFeature_RNA, n=5)
#' 
#' @importFrom dplyr slice_max
#' @importFrom tibble rowid_to_column
#' @export
slice_max.Seurat <- function(.data, order_by, ..., n, prop,
    by=NULL, with_ties=TRUE, na_rm=FALSE) {
    row_number___ <- NULL

    order_by_variables <- return_arguments_of(!!enexpr(order_by))

    idx <- .data[[]] |>
        select(-everything(), !!!order_by_variables, {{ by }}) |>
        rowid_to_column(var ='row_number___')  |>
        slice_max(
            order_by={{ order_by }}, ..., n=n, prop=prop, by={{ by }},
            with_ties=with_ties, na_rm=na_rm
        ) |>
        pull(row_number___)

    if (length(idx) == 0) {
        stop("tidyseurat says: the resulting data container is empty.",
            " Seurat does not allow for empty containers.")
    }

    new_obj <- subset(.data, cells=colnames(.data)[idx])
    new_obj
}

#' @name select
#' @rdname select
#' @inherit dplyr::select
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small |> select(cell, orig.ident)
#' 
#' @importFrom dplyr select
#' @export
select.Seurat <- function (.data, ...)
{
    # Deprecation of special column names
    .cols <- enquos(..., .ignore_empty="all") %>% 
        map(~ quo_name(.x)) %>% unlist()
    if (is_sample_feature_deprecated_used(.data, .cols)) {
        .data <- ping_old_special_column_into_metadata(.data)
    }

    .data %>%
        as_tibble() %>%
        select_helper(...) %>%
        when(

            # If key columns are missing
            (get_needed_columns(.data) %in% colnames(.)) %>% all %>% `!` ~ {
                message("tidyseurat says: Key columns are missing.",
                    " A data frame is returned for independent data analysis.")
                (.)
            },

            # If valid seurat meta data
            ~ {
                .data@meta.data <- (.) %>% as_meta_data(.data)
                .data
            }
        )

}

#' @name sample_n
#' @rdname sample_n
#' @aliases sample_frac
#' @inherit dplyr::sample_n
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> sample_n(50)
#' pbmc_small |> sample_frac(0.1)
#' 
#' @importFrom dplyr sample_n
#' @export
sample_n.Seurat <- function(tbl, size, replace=FALSE,
                                weight=NULL, .env=NULL, ...) {
    # Solve CRAN NOTES
    cell <- NULL
    . <- NULL

    lifecycle::signal_superseded("1.0.0", "sample_n()", "slice_sample()")

    new_meta <- tbl[[]] %>%
        as_tibble(rownames=c_(tbl)$name) %>%
        dplyr::sample_n(size, replace=replace, weight=weight, .env=.env, ...)

    count_cells <- new_meta %>%
        select(!!c_(tbl)$symbol) %>%
        count(!!c_(tbl)$symbol)

    # If repeted cells
    if (count_cells$n %>% max() %>% gt(1)){
        message("tidyseurat says: When sampling with replacement",
            " a data frame is returned for independent data analysis.")
        tbl %>%
            as_tibble() %>%
            right_join(new_meta %>% select(!!c_(tbl)$symbol),  by=c_(tbl)$name)
    } else {
        new_obj <- subset(tbl, cells=new_meta %>% pull(!!c_(tbl)$symbol))
        new_obj@meta.data <-
            new_meta %>%
            data.frame(row.names=pull(.,!!c_(tbl)$symbol),
                check.names=FALSE) %>%
            select(- !!c_(tbl)$symbol)
        new_obj
    }
}

#' @rdname sample_n
#' @importFrom dplyr sample_frac
#' @export
sample_frac.Seurat <- function(tbl, size=1, replace=FALSE,
                                   weight=NULL, .env=NULL, ...) {

    # Solve CRAN NOTES
    cell <- NULL
    . <- NULL

    lifecycle::signal_superseded("1.0.0", "sample_frac()", "slice_sample()")

    new_meta <- tbl[[]] %>% 
        as_tibble(rownames=c_(tbl)$name) %>%
        dplyr::sample_frac(size, replace=replace,
            weight=weight, .env=.env, ...)

    count_cells <- new_meta %>%
        select(!!c_(tbl)$symbol) %>%
        count(!!c_(tbl)$symbol)

    # If repeted cells
    if (count_cells$n %>% max() %>% gt(1)){
        message("tidyseurat says: When sampling with replacement",
            " a data frame is returned for independent data analysis.")
        tbl %>%
            as_tibble() %>%
            right_join(new_meta %>% select(!!c_(tbl)$symbol),  by=c_(tbl)$name)
    } else {
        new_obj <- subset(tbl, cells=new_meta %>% pull(!!c_(tbl)$symbol))
        new_obj@meta.data <-
            new_meta %>%
            data.frame(row.names=pull(.,!!c_(tbl)$symbol),
                check.names=FALSE) %>%
            select(- !!c_(tbl)$symbol)
        new_obj
    }
}

#' Count observations by group
#'
#' @description
#' `count()` lets you quickly count the unique values of one or more variables:
#' `df %>% count(a, b)` is roughly equivalent to
#' `df %>% group_by(a, b) %>% summarise(n = n())`.
#' `count()` is paired with `tally()`, a lower-level helper that is equivalent
#' to `df %>% summarise(n = n())`. Supply `wt` to perform weighted counts,
#' switching the summary from `n = n()` to `n = sum(wt)`.
#'
#' `add_count()` and `add_tally()` are equivalents to `count()` and `tally()`
#' but use `mutate()` instead of `summarise()` so that they add a new column
#' with group-wise counts.
#'
#' @param x A data frame, data frame extension (e.g. a tibble), or a
#'   lazy data frame (e.g. from dbplyr or dtplyr).
#' @param ... <[`data-masking`][dplyr_data_masking]> Variables to group by.
#' @param wt <[`data-masking`][dplyr_data_masking]> Frequency weights.
#'   Can be `NULL` or a variable:
#'
#'   * If `NULL` (the default), counts the number of rows in each group.
#'   * If a variable, computes `sum(wt)` for each group.
#' @param sort If `TRUE`, will show the largest groups at the top.
#' @param name The name of the new column in the output.
#'
#'   If omitted, it will default to `n`. If there's already a column called `n`,
#'   it will error, and require you to specify the name.
#' @param .drop For `count()`: if `FALSE` will include counts for empty groups
#'   (i.e. for levels of factors that don't exist in the data).
#' @return
#' An object of the same type as `.data`. `count()` and `add_count()`
#' group transiently, so the output has the same groups as the input.
#' @name count
#' @rdname count
#' @examples
#' data(pbmc_small)
#' pbmc_small |> count(groups)
#'     
#' @importFrom dplyr count
#' @export
count.Seurat <- function(x, ..., wt=NULL, sort=FALSE,
    name=NULL, .drop=group_by_drop_default(x)) {

    message("tidyseurat says: A data frame is",
        " returned for independent data analysis.")

    # Deprecation of special column names
    .cols <- enquos(..., .ignore_empty="all") %>% 
        map(~ quo_name(.x)) %>% unlist()
    if (is_sample_feature_deprecated_used(x, .cols)) {
        x <- ping_old_special_column_into_metadata(x)
    }

    x %>%
        as_tibble() %>%
        dplyr::count(..., wt=!!enquo(wt), sort=sort, name=name, .drop=.drop)
}

#' @export
#' @rdname count
add_count <- function(x, ..., wt=NULL, sort=FALSE, name=NULL) {
    UseMethod("add_count")
}

#' @export
#' @rdname count
add_count.default <- function(x, ..., wt=NULL, sort=FALSE, name=NULL) {
    if (is.null(name)) name <- "n"
    .out <- x %>%
        dplyr::group_by(..., .add = TRUE) %>%
        dplyr::mutate(!!rlang::sym(name) := if (is.null(wt)) dplyr::n() else sum(!!enquo(wt), na.rm = TRUE)) %>%
        dplyr::ungroup()
    if (sort) .out <- dplyr::arrange(.out, dplyr::desc(!!rlang::sym(name)))
    .out
}

#' @rdname count
#' @aliases add_count
#' @importFrom rlang sym
#' @export
add_count.Seurat <- function(x, ..., wt=NULL, sort=FALSE, name=NULL) {

    # Deprecation of special column names
    .cols <- enquos(..., .ignore_empty="all") %>%
        map(~ quo_name(.x)) %>% unlist()
    if (is_sample_feature_deprecated_used(x, .cols)) {
        x <- ping_old_special_column_into_metadata(x)
    }

    if (is.null(name)) name <- "n"
    .out <- x %>%
        as_tibble %>%
        dplyr::group_by(..., .add = TRUE) %>%
        dplyr::mutate(!!sym(name) := if (is.null(wt)) dplyr::n() else sum(!!enquo(wt), na.rm = TRUE)) %>%
        dplyr::ungroup()
    if (sort) .out <- dplyr::arrange(.out, dplyr::desc(!!sym(name)))
    x@meta.data <- .out %>% as_meta_data(x)

    x
}

#' @name pull
#' @rdname pull
#' @inherit dplyr::pull
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> pull(groups)
#' 
#' @importFrom dplyr pull
#' @export
pull.Seurat <- function(.data, var=-1, name=NULL, ...) {
    var <- enquo(var)
    name <- enquo(name)

    message("tidyseurat says: A data frame is",
        " returned for independent data analysis.")

    # Deprecation of special column names
    if(is_sample_feature_deprecated_used(
        .data,
        quo_name(var)
    )){
        .data <- ping_old_special_column_into_metadata(.data)
    }

    .data %>%
        as_tibble() %>%
        dplyr::pull( var=!!var, name=!!name, ...)
}

#' @name group_split
#' @rdname group_split
#' @inherit dplyr::group_split
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> group_split(groups)
#' 
#' @importFrom rlang check_dots_used
#' @importFrom dplyr group_by
#' @importFrom dplyr group_rows
#' @importFrom dplyr group_split
#' @export
group_split.Seurat <- function(.tbl, ..., .keep = TRUE) {
  
  var_list <- enquos(...)
  
  group_list <- .tbl |> 
    as_tibble() |> 
    dplyr::group_by(!!!var_list)
  
  groups <- group_list |> 
    dplyr::group_rows()
  
  v <- vector(mode = "list", length = length(groups))
  
  for (i in seq_along(v)) {
    v[[i]] <- .tbl[,groups[[i]]]
    
    if(.keep == FALSE) {
      v[[i]] <- select(v[[i]], !(!!!var_list))
    }
  }
  
  v
  
}


================================================
FILE: R/ggplot2_methods.R
================================================
#' @name ggplot
#' @rdname ggplot
#' @inherit ggplot2::ggplot
#' @title Create a new \code{ggplot} from a \code{tidyseurat}
#' @return `ggplot`
#'
#' @examples
#' library(ggplot2)
#' data(pbmc_small)
#' pbmc_small |> 
#'   ggplot(aes(groups, nCount_RNA)) +
#'   geom_boxplot()
#' 
#' @importFrom purrr map
#' @importFrom rlang quo_name
#' @importFrom ggplot2 aes ggplot
#' @export
ggplot.Seurat <- function(data=NULL, mapping=aes(),
    ..., environment=parent.frame()) {
  
    # Deprecation of special column names
    .cols <- mapping %>% 
        unlist() %>% map(~ quo_name(.x)) %>% 
        unlist() %>% as.character()
    if (is_sample_feature_deprecated_used(data, .cols)) {
        data <- ping_old_special_column_into_metadata(data)
    }
  
    data %>%
        as_tibble() %>%
        ggplot2::ggplot(mapping=mapping)
}


================================================
FILE: R/methods.R
================================================
#' @importFrom methods getMethod
setMethod(
    f="show",
    signature="Seurat",
    definition=function(object) {
        if (isTRUE(x=getOption(x="restore_Seurat_show", default=FALSE))) {
            f <- getMethod(
                f="show",
                signature="Seurat",
                where=asNamespace(ns="SeuratObject"))
            f(object=object)
        } else { print(object) }
    }
)

setClass("tidyseurat", contains="Seurat")

#' @importFrom generics tidy

#' @title tidy for Seurat objects
#' @name tidy
#' @description tidy for Seurat objects
#' @param x A Seurat object
#' @param ... Additional arguments (not used)
#' @return A tidyseurat object
#' @importFrom lifecycle deprecate_warn
#' @export
tidy.Seurat <- function(x, ...){ 
  
    # DEPRECATE
    deprecate_warn(
        when="0.2.0",
        what="tidy()",
        details="tidyseurat says: tidy() is not needed anymore."
    )
  
    return(x)
}


#' @name join_features
#' @rdname join_features
#' @inherit ttservice::join_features
#' @aliases join_features,Seurat-method
#'
#' @param .data A tidyseurat object
#' @param assay assay name to extract feature abundance
#' @param slot slot name to extract feature abundance
#' 
#' @return A `tidyseurat` object
#'   containing information for the specified features.
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small %>% join_features(
#'   features=c("HLA-DRA", "LYZ"))
#'
#' @importFrom magrittr "%>%"
#' @importFrom dplyr contains
#' @importFrom dplyr everything
#' @importFrom ttservice join_features
#' @export
setMethod("join_features", "Seurat", function(.data,
    features=NULL, all=FALSE, exclude_zeros=FALSE, shape="wide",
    assay=NULL, slot="data", ...) {
  
  .feature = NULL
  
  if(shape == "long")
    .data |> 
    left_join(
      get_abundance_sc_long(
        .data=.data,
        features=features,
        all=all,
        exclude_zeros=exclude_zeros,
        assay=assay,
        slot=slot,
        ...
      ),
      by=c_(.data)$name
    ) %>%
    select(!!c_(.data)$symbol, .feature,
           contains(".abundance"), everything())
  else
    .data |> 
    left_join(
      get_abundance_sc_wide(
        .data=.data,
        features=features,
        all=all,
        assay=assay,
        slot=slot,
        ...
      ),
      by=c_(.data)$name
    ) 

})

#' @name aggregate_cells
#' @rdname aggregate_cells
#' @inherit ttservice::aggregate_cells
#' @aliases aggregate_cells,Seurat-method
#' 
#' @param .data A tidyseurat object
#' 
#' @examples 
#' data(pbmc_small)
#' pbmc_small_pseudo_bulk <- pbmc_small |>
#'   aggregate_cells(c(groups, letter.idents), assays="RNA")
#'
#' @importFrom rlang enquo
#' @importFrom magrittr "%>%"
#' @importFrom tibble enframe
#' @importFrom Matrix rowSums
#' @importFrom ttservice aggregate_cells
#' @importFrom SeuratObject DefaultAssay
#' @importFrom Seurat DietSeurat
#' @importFrom Seurat GetAssayData
#' @importFrom purrr map_int
setMethod("aggregate_cells", "Seurat",  function(.data,
    .sample=NULL, slot="data", assays=NULL,
    aggregation_function=Matrix::rowSums, ...){
    # Solve NOTE  
    data <- NULL
    .feature <- NULL
  
    .sample <- enquo(.sample)

    # Subset only wanted assays
    if(!is.null(assays)){
        DefaultAssay(.data) <- assays[1]
        .data = .data |> DietSeurat(assays = assays)
    }

    .data %>%
        nest(data=-!!.sample) %>%
        mutate(.aggregated_cells=map_int(data, ~ ncol(.x))) %>% 
        mutate(
            data=map(data, ~ 
                # Loop over assays
                map2(.x@assays, names(.x@assays),
                    # Get counts
                    ~ GetAssayData_robust(.x, layer=slot) %>%
                        aggregation_function(na.rm=T) %>%
                        tibble::enframe(
                            name=".feature",
                            value=sprintf("%s", .y)
                        ) %>%
                        mutate(.feature=as.character(.feature)) 
                ) %>%
                Reduce(function(...) full_join(..., by=c(".feature")), .), 
                .progress = TRUE          
        )) %>%
        left_join(
            .data %>%
                as_tibble() %>%
                subset_tidyseurat(!!.sample)) %>%
        unnest(data) %>%
        tidyr::unite(".sample", !!.sample,  sep="___", remove=FALSE) |> 
        select(.feature, .sample, names(.data@assays), everything()) |> 
        drop_class("tidyseurat_nested") 
})

================================================
FILE: R/methods_DEPRECATED.R
================================================
#' (DEPRECATED) Extract and join information for transcripts.
#'
#'
#' @description join_transcripts() extracts and joins information for specified transcripts
#'
#' @importFrom rlang enquo
#' @importFrom magrittr "%>%"
#'
#' @name join_transcripts
#' @rdname join_transcripts
#'
#' @param .data A tidyseurat object
#' @param transcripts A vector of transcript identifiers to join
#' @param all If TRUE return all
#' @param exclude_zeros If TRUE exclude zero values
#' @param shape Format of the returned table "long" or "wide"
#' @param ... Parameters to pass to join wide, i.e. assay name to extract transcript abundance from
#'
#' @details DEPRECATED, please use join_features()
#'
#' @return A `tbl` containing the information.for the specified transcripts
#' 
#' @examples
#'
#' print("DEPRECATED")
#'
#'
#' @export
#'
join_transcripts <- function(.data,
                          transcripts = NULL,
                          all = FALSE,
                          exclude_zeros = FALSE,
                          shape = "wide", ...) {
  UseMethod("join_transcripts", .data)
}
#' @export
join_transcripts.default <-
  function(.data,
           transcripts = NULL,
           all = FALSE,
           exclude_zeros = FALSE,
           shape = "wide", ...)
  {
    print("tidyseurat says: This function cannot be applied to this object")
  }
#' @export
join_transcripts.Seurat <-
  function(.data,
           transcripts = NULL,
           all = FALSE,
           exclude_zeros = FALSE,
           shape = "wide", ...)
  {
    
    deprecate_warn("0.2.1", "join_transcripts()", "tidyseurat::join_features()")
    
    
    .data %>%
      join_features(features = transcripts,
                       all = all,
                       exclude_zeros = exclude_zeros,
                       shape = shape, ...)
    
  }

================================================
FILE: R/pillar_utilities.R
================================================
NBSP <- "\U00A0"

pillar___format_comment <- function (x, width)
{
    if (length(x) == 0L) {
        return(character())
    }
    map_chr(x, pillar___wrap, prefix="# ",
        width=min(width, cli::console_width()))
}

#' @importFrom fansi strwrap_ctl
pillar___strwrap2 <- function (x, width, indent)
{
    fansi::strwrap_ctl(x, width=max(width, 0),
        indent=indent, exdent=indent + 2)
}


pillar___wrap <- function (..., indent=0, prefix="", width)
{
    x <- paste0(..., collapse="")
    wrapped <- pillar___strwrap2(x, width - get_extent(prefix), indent)
    wrapped <- paste0(prefix, wrapped)
    wrapped <- gsub(NBSP, " ", wrapped)
    paste0(wrapped, collapse="\n")
}


================================================
FILE: R/plotly_methods.R
================================================
#' @name plotly
#' @rdname plotly
#' @inherit plotly::plot_ly
#' @return `plotly`
#' 
#' @examples
#' data(pbmc_small)
#' plot_ly(pbmc_small)
#' 
#' @importFrom plotly plot_ly
#' @export
plot_ly <- function(data=data.frame(), ..., type=NULL, name=NULL,
    color=NULL, colors=NULL, alpha=NULL,
    stroke=NULL, strokes=NULL, alpha_stroke=1,
    size=NULL, sizes=c(10, 100),
    span=NULL, spans=c(1, 20),
    symbol=NULL, symbols=NULL,
    linetype=NULL, linetypes=NULL,
    split=NULL, frame=NULL,
    width=NULL, height=NULL, source="A") {
    UseMethod("plot_ly")
}

#' @rdname plotly
#' @export
plot_ly.tbl_df <- function(data=data.frame(), ..., type=NULL, name=NULL,
    color=NULL, colors=NULL, alpha=NULL,
    stroke=NULL, strokes=NULL, alpha_stroke=1,
    size=NULL, sizes=c(10, 100),
    span=NULL, spans=c(1, 20),
    symbol=NULL, symbols=NULL,
    linetype=NULL, linetypes=NULL,
    split=NULL, frame=NULL,
    width=NULL, height=NULL, source="A") {
    data %>%
        # This is a trick to not loop the call
        drop_class("tbl_df") %>%
        plotly::plot_ly(...,
            type=type, name=name,
            color=color, colors=colors, alpha=alpha,
            stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke,
            size=size, sizes=sizes,
            span=span, spans=spans,
            symbol=symbol, symbols=symbols,
            linetype=linetype, linetypes=linetypes,
            split=split, frame=frame,
            width=width, height=height, source=source)
}

#' @rdname plotly
#' @export
plot_ly.Seurat <- function(data=data.frame(), ..., type=NULL, name=NULL,
    color=NULL, colors=NULL, alpha=NULL, 
    stroke=NULL, strokes=NULL, alpha_stroke=1,
    size=NULL, sizes=c(10, 100), 
    span=NULL, spans=c(1, 20),
    symbol=NULL, symbols=NULL,
    linetype=NULL, linetypes=NULL,
    split=NULL, frame=NULL, 
    width=NULL, height=NULL, source="A") {
  
    data %>%
        # This is a trick to not loop the call
        as_tibble() %>%
        plot_ly(..., type=type, name=name,
	        color=color, colors=colors, alpha=alpha, 
	        stroke=stroke, strokes=strokes, alpha_stroke=alpha_stroke,
	        size=size, sizes=sizes, 
	        span=span, spans=spans,
	        symbol=symbol, symbols=symbols, 
	        linetype=linetype, linetypes=linetypes,
	        split=split, frame=frame, 
	        width=width, height=height, source=source)    
}


================================================
FILE: R/print_method.R
================================================
# This file is a replacement of the unexported functions in the tibble
# package, in order to specify "tibble abstraction in the header"

#' @name tbl_format_header
#' @rdname tbl_format_header
#' @inherit pillar::tbl_format_header
#' 
#' @examples
#' # TODO
#' 
#' @importFrom rlang names2
#' @importFrom pillar align
#' @importFrom pillar get_extent
#' @importFrom pillar style_subtle
#' @importFrom pillar tbl_format_header
#' @export
tbl_format_header.tidySeurat <- function(x, setup, ...){
  
    number_of_features <- x |> attr("number_of_features")
    assay_names <- x |> attr("assay_names")
    active_assay <- x |> attr("active_assay")
  
    named_header <- setup$tbl_sum
  
    # Change name
    names(named_header) <- "A Seurat-tibble abstraction"
  
    if (all(names2(named_header) == "")) {
        header <- named_header
    } else {
        header <- paste0(
        align(paste0(names2(named_header), ":"), space=NBSP),
        " ", named_header) %>%
        # Add further info single-cell
        append(sprintf(
            "\033[90m Features=%s | Cells=%s | Active assay=%s | Assays=%s\033[39m",
            number_of_features,
            nrow(x),
            active_assay,
            assay_names %>% paste(collapse=", ")
        ), after=1)
    }
    style_subtle(pillar___format_comment(header, width=setup$width))
}


#' @name formatting
#' @rdname formatting
#' @aliases print
#' @inherit tibble::formatting
#' @return Prints a message to the console describing
#'   the contents of the `tidyseurat`.
#'
#' @param ... Passed on to \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.
#' @param n_extra Number of extra columns to print abbreviated information for,
#'   if the width is too small for the entire tibble. If `NULL`, the default,
#'   will print information about at most `tibble.max_extra_cols` extra columns.
#' @examples
#' data(pbmc_small)
#' print(pbmc_small)
#' 
#' @importFrom vctrs new_data_frame
#' @importFrom Seurat GetAssayData
#' @importFrom Seurat Assays
#' @export
print.Seurat <- function(x, ..., n=NULL, width=NULL, n_extra=NULL) {

    x |>
        as_tibble(n_dimensions_to_return=5) |>
        new_data_frame(class=c("tidySeurat", "tbl")) %>%
        add_attr(GetAssayData(x) %>% nrow,  "number_of_features") %>%
        add_attr(Assays(x) , "assay_names") %>%
        add_attr(x@active.assay , "active_assay") %>%
        print()
    invisible(x)
}


================================================
FILE: R/tibble_methods.R
================================================
#' @name as_tibble
#' @rdname as_tibble
#' @inherit tibble::as_tibble
#' @return `tibble`
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> as_tibble()
#' 
#' @importFrom tibble as_tibble
#' @importFrom purrr reduce
#' @importFrom purrr map
#' @importFrom tidyr spread
#' @importFrom tibble enframe
#' @export
as_tibble.Seurat <- function(x, ...,
    .name_repair=c("check_unique", "unique", "universal", "minimal"),
    rownames=NULL){

    x[[]] %>%
        tibble::as_tibble(rownames=c_(x)$name) %>%

        # Attach reduced dimensions
        when(
            # Only if I have reduced dimensions and special datasets
            length(x@reductions) > 0 ~ (.) %>%
                left_join(
                    get_special_datasets(x, ...) %>%
                    map(~ .x %>% when(
                        # If row == 1 do a trick
                        dim(.) %>% is.null ~ {
                            (.) %>% tibble::enframe() %>%
                                spread(name, value) %>%
                                mutate(!!c_(x)$symbol := rownames(x[[]]))
                        },

                        # Otherwise continue normally
                        ~ as_tibble(., rownames=c_(x)$name)
                    )) %>%
                    reduce(left_join, by=c_(x)$name),
                by=c_(x)$name
                ),
            # Otherwise skip
            ~ (.)
        )
}

#' @name glimpse
#' @rdname glimpse
#' @inherit pillar::glimpse
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small |> glimpse()
#' 
#' @importFrom tibble glimpse
#' @export
glimpse.tidyseurat <- function(x, width=NULL, ...){
    x %>%
        as_tibble() %>%
        tibble::glimpse(width=width, ...)
}


================================================
FILE: R/tidyr_methods.R
================================================
#' @name unnest
#' @rdname unnest
#' @inherit tidyr::unnest
#' @aliases unnest_seurat
#' @return `tidyseurat`
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> 
#'     nest(data=-groups) |> 
#'     unnest(data)
#'
#' @importFrom rlang quo_name
#' @importFrom purrr imap
#' @importFrom tidyr unnest
#' @export
unnest.tidyseurat_nested <- function(data, cols, ...,
    keep_empty=FALSE, ptype=NULL, names_sep=NULL, 
    names_repair="check_unique", .drop, .id, .sep, .preserve) {

    cols <- enquo(cols)

    unnest_seurat(data, !!cols, ...,
        keep_empty=keep_empty, ptype=ptype,
        names_sep=names_sep, names_repair=names_repair)

}

#' @rdname unnest
#' @importFrom tidyr unnest
#' @importFrom purrr when
#' @importFrom rlang quo_name
#' @importFrom purrr imap
#' @export
unnest_seurat  <-  function(data, cols, ...,
    keep_empty=FALSE, ptype=NULL,
    names_sep=NULL, names_repair="check_unique",
    .drop, .id, .sep, .preserve) {
    # Need this otherwise crashes map
    .data_ <- data
  
    cols <- enquo(cols)
  
    .data_ %>% 
        when(
      
            # If my only column to unnest is tidyseurat
            pull(., !!cols) %>% .[[1]] %>% is("Seurat") %>% any ~  
            {
                # Do my trick to unnest
                list_seurat <- mutate(.,
                    !!cols := imap(
                        !!cols, ~ .x %>%
                            bind_cols_(
                                .data_ %>%
                                    select(-!!cols) %>%
                                    slice(rep(.y, nrow(as_tibble(.x))))
                            )
                        )) %>%
                    pull(!!cols)
                list_seurat[[1]] %>%
                    # Bind only if length list > 1
                    when(
                        length(list_seurat)>1 ~ bind_rows(.,
                            list_seurat[2:length(list_seurat)]),
                        ~ (.)
                    )
            },
      
            # Else do normal stuff
            ~ (.) %>% 
                drop_class("tidyseurat_nested") %>%
                tidyr::unnest(!!cols, ..., keep_empty=keep_empty,
                    ptype=ptype, names_sep=names_sep,
                    names_repair=names_repair) %>%
                add_class("tidyseurat_nested"))
}


#' @name nest
#' @rdname nest
#' @inherit tidyr::nest
#' @return `tidyseurat_nested`
#'
#' @examples
#' data(pbmc_small)
#' pbmc_small |> 
#'     nest(data=-groups) |> 
#'     unnest(data)
#' 
#' @importFrom tidyr nest
#' @importFrom magrittr equals
#' @importFrom rlang enquos
#' @importFrom Seurat SplitObject
#' @importFrom Seurat DietSeurat
#' @importFrom rlang :=
#' @export
nest.Seurat <- function (.data, ..., .names_sep=NULL)
{
    cols <- enquos(...)
    col_name_data  <- names(cols)

    # Deprecation of special column names
    .cols <- enquos(..., .ignore_empty="all") %>%
        map(~ quo_name(.x)) %>% unlist()
    if (is_sample_feature_deprecated_used(.data, .cols)) {
        .data <- ping_old_special_column_into_metadata(.data)
    }
  
    my_data__ <- .data 
  
    # This is for getting the column names
    dummy_nested <- 
        my_data__ |> 
        DietSeurat(features = rownames(my_data__)[1:2], assays = DefaultAssay(my_data__)) |>
        suppressWarnings() |> 
        to_tib() %>%
        tidyr::nest(...)
  
    split_by_column <- 
        dummy_nested |> 
        select(-col_name_data) |>
        colnames()
  
    # If nesting on one group use the fast split
    if (split_by_column |> length() |> identical(1L))
  
        my_data__ |> 
            SplitObject(split.by=split_by_column) |>
            map(~ .x |> select(-split_by_column)) |> 
            enframe(name=split_by_column, value=col_name_data) |>
            # Coerce to tidyseurat_nested for unnesting
            add_class("tidyseurat_nested")
  
    # If arbitrary nest is needed use the slow one
    else
        my_data__ %>%
        # This is needed otherwise nest goes into loop and fails
            to_tib %>%
            tidyr::nest(...) %>%
      
            mutate(
                !!as.symbol(col_name_data) := map(
                    !!as.symbol(col_name_data),
                    ~ my_data__ %>% 
                    # Subset cells
                    filter(!!c_(.data)$symbol %in% 
                        pull(.x, !!c_(.data)$symbol)) %>%
                    # Subset columns
                    select(colnames(.x))
                )) |>
      
            # Coerce to tidyseurat_nested for unnesting
            add_class("tidyseurat_nested")
}

#' @name extract
#' @rdname extract
#' @inherit tidyr::extract
#' @return `tidyseurat`
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |>
#'   extract(groups, 
#'     into="g", 
#'     regex="g([0-9])", 
#'     convert=TRUE)
#' 
#' @importFrom tidyr extract
#' @export
extract.Seurat <- function  (data, col, into,
    regex="([[:alnum:]]+)", remove=TRUE, convert=FALSE, ...) {
	
    col <- enquo(col)
	
	# Deprecation of special column names
	if (is_sample_feature_deprecated_used(
	    data, 
	    c(quo_name(col), into)
	)) {
	    data= ping_old_special_column_into_metadata(data)
	}
	
	data@meta.data <- 
	    data %>%
	    as_tibble() %>%
	    tidyr::extract(col=!!col, into=into, regex=regex,
            remove=remove, convert=convert, ...) %>%
	    as_meta_data(data)

	data
}

#' @name pivot_longer
#' @rdname pivot_longer
#' @inherit tidyr::pivot_longer
#' @return `tidyseurat`
#' 
#' @export
#' @examples
#' data(pbmc_small)
#' pbmc_small |> pivot_longer(
#'   cols=c(orig.ident, groups),
#'   names_to="name", values_to="value")
#' 
#' @importFrom rlang check_dots_used
#' @importFrom tidyr pivot_longer
#' @export
pivot_longer.Seurat <- function(data,
    cols, names_to="name", names_prefix=NULL,
    names_sep=NULL, names_pattern=NULL, names_ptypes=NULL,
    names_transform=NULL, names_repair="check_unique",
    values_to="value", values_drop_na=FALSE,
    values_ptypes=NULL, values_transform=NULL, ...) {
    cols <- enquo(cols) 
  
    message(data_frame_returned_message)
  
    # Deprecation of special column names
    if (is_sample_feature_deprecated_used(
        data, 
        c(quo_names(cols))
    )) {
        data= ping_old_special_column_into_metadata(data)
    }
  
    data %>%
        as_tibble() %>%
        tidyr::pivot_longer(!!cols, names_to=names_to,
            names_prefix=names_prefix, names_sep=names_sep,
            names_pattern=names_pattern, names_ptypes=names_ptypes,
            names_transform=names_transform, names_repair=names_repair,
            values_to=values_to, values_drop_na=values_drop_na,
            values_ptypes=values_ptypes, values_transform=values_transform,
            ...)
}

#' @name unite
#' @rdname unite
#' @inherit tidyr::unite
#' @return `tidyseurat`
#' 
#' @examples
#' data(pbmc_small)
#' pbmc_small |> unite(
#'   col="new_col", 
#'   c("orig.ident", "groups"))
#'     
#' @importFrom rlang enquo enquos quo_name
#' @importFrom tidyr unite
#' @export
unite.Seurat <- function(data, col,
    ..., sep="_", remove=TRUE, na.rm=FALSE) {
  
    # Check that we are not modifying a key column
    cols <- enquo(col) 
  
    # Deprecation of special column names
    .cols <- enquos(..., .ignore_empty="all") %>% 
        map(~ quo_name(.x)) %>% unlist()
    if (is_sample_feature_deprecated_used(data, .cols)) {
        data <- ping_old_special_column_into_metadata(data)
    }

    .view_only_cols <- c(
        get_special_columns(data),
        get_needed_columns(data))
    
    .test <- intersect(
        quo_names(cols), 
        .view_only_cols)

    if (remove && length(.test)) {
        stop("tidyseurat says:",
            " you are trying to rename a column",
            " that is view only ", 
            paste(.view_only_cols, collapse=", "),
            " (it is not present in the colData).",
            " If you want to mutate a view-only column,",
            " make a copy and mutate that one.")
    }
  
    data@meta.data <- data %>%
        as_tibble() %>%
        tidyr::unite(!!cols, ..., sep=sep,
            remove=remove, na.rm=na.rm) %>%
        as_meta_data(data)
  
    data
}

#' @name separate
#' @rdname separate
#' @inherit tidyr::separate
#' @return `tidyseurat`
#' 
#' @examples
#' data(pbmc_small)
#' un <- pbmc_small |> unite("new_col", c(orig.ident, groups))
#' un |> separate(new_col, c("orig.ident", "groups"))
#' 
#' @importFrom tidyr separate
#' @export
separate.Seurat <- function(data, col, into,
    sep="[^[:alnum:]]+", remove=TRUE, convert=FALSE,
    extra="warn", fill="warn", ...) {
  
    # Check that we are not modifying a key column
    cols <- enquo(col)
  
    # Deprecation of special column names
    if(is_sample_feature_deprecated_used(
        data, 
        c(quo_names(cols))
    )) {
        data= ping_old_special_column_into_metadata(data)
    }

    .view_only_cols <- c(
        get_special_columns(data),
        get_needed_columns(data))
    
    .test <- intersect(
        quo_names(cols), 
        .view_only_cols)

    if (remove && length(.test)) {
        stop("tidyseurat says:",
            " you are trying to rename a column",
            " that is view only ",
            paste(.view_only_cols, collapse=", "),
            "(it is not present in the colData).",
            " If you want to mutate a view-only column,",
            " make a copy and mutate that one.")
    }
   
    data@meta.data =
        data %>%
        as_tibble() %>% 
        tidyr::separate(!!cols, into=into, sep=sep, remove=remove, 
            convert=convert, extra=extra, fill=fill, ...) %>%
        as_meta_data(data)
    data
}

================================================
FILE: R/utilities.R
================================================
#' @importFrom tibble as_tibble
#'
#' @keywords internal
#'
#' @param .data A tidyseurat
#' 
#' @noRd
to_tib <- function(.data) {
    .data[[]] %>%
        as_tibble(rownames=c_(.data)$name)
}

# Greater than
gt <- function(a, b) {
    a > b
}

# Smaller than
st <- function(a, b) {
    a < b
}

# Negation
not <- function(is) {
    !is
}

# Raise to the power
pow <- function(a, b) {
    a^b
}

# Equals
eq <- function(a, b) {
    a == b
}

prepend <- function(x, values, before=1) {
    n <- length(x)
    stopifnot(before > 0 && before <= n)
    if (before == 1) {
        c(values, x)
    } else {
        c(x[seq_len(before-1)], values, x[seq(before, n)])
    }
}

#' Add class to abject
#'
#' @keywords internal
#'
#' @param var A tibble
#' @param name A character name of the attribute
#'
#' @return A tibble with an additional attribute
add_class <- function(var, name) {
    if (!name %in% class(var)) 
        class(var) <- prepend(class(var), name)
    return(var)
}

#' Remove class to abject
#'
#' @keywords internal
#'
#' @param var A tibble
#' @param name A character name of the class
#'
#' @return A tibble with an additional attribute
#' @keywords internal
drop_class <- function(var, name) {
    class(var) <- class(var)[!class(var) %in% name]
    return(var)
}

#' get abundance wide
#'
#' @keywords internal
#'
#' @importFrom magrittr "%$%"
#' @importFrom utils tail
#' @importFrom Seurat GetAssayData
#' @importFrom Seurat DietSeurat
#' @importFrom SeuratObject DefaultAssay<-
#' @importFrom stats setNames
#'
#' @param .data A tidyseurat
#' @param features A character
#' @param all A boolean
#' @param assay assay name to extract feature abundance
#' @param slot slot in the assay, e.g. `data` and `scale.data`
#' @param prefix prefix for the feature names
#'
#' @return A Seurat object
#' @examples
#' data(pbmc_small)
#' pbmc_small %>%
#'   get_abundance_sc_wide(features=c("HLA-DRA", "LYZ"))
#'
#' @export
get_abundance_sc_wide <- function(.data, features=NULL, all=FALSE,
    assay=.data@active.assay, slot="data", prefix="") {

    # Solve CRAN warnings
    . <- NULL
    assays <- NULL
    counts <- NULL
  
    if (is.null(assay)) {
  	    assay <- .data@active.assay
    }

    # Check if output would be too big without forcing
    if(
        length(VariableFeatures(.data)) == 0  &
        is.null(features) &
        all == FALSE
    ) {
        stop("Your object do not contain variable trancript labels,\n",
			 " feature argument is empty and all argument is set to FALSE.\n",
			 " Either:\n",
			 " 1. use detect_variable_features() to select variable feature\n",
			 " 2. pass an array of features names\n",
			 " 3. set all=TRUE (this will output a very large object;",
             " does your computer have enough RAM?)\n")
    }

    # Get variable features if existing
    if(
        length(VariableFeatures(.data)) > 0  &
        is.null(features) &
        all == FALSE
    ) variable_genes <- VariableFeatures(.data)
    # Else
    else variable_genes <- NULL

    # Eliminate unneeded assays.
    # This because if a gene is not in an assay I am not interested about
    # this could cause an unneeded error
    DefaultAssay(.data) <- assay
    .data = .data |> DietSeurat(assays = assay)

    # Just grub last assay
    .data |> 
      GetAssayData(assay = assay, layer=slot) %>% 
        when(
            variable_genes %>% is.null %>% `!` ~ 
                (.)[ toupper(rownames(.)) %in% toupper(variable_genes),,drop=FALSE],
            features %>% is.null %>% `!` ~ 
                (.)[ toupper(rownames(.)) %in% toupper(features),,drop=FALSE],
            ~ stop("tidyseurat says: It is not convenient to",
                " extract all genes, you should have either variable",
                " features or feature list to extract.")
        ) |> 
        as.matrix() |> 
        t() |> 
        as_tibble(rownames=c_(.data)$name) %>%

        # Add prefix
        setNames(c(c_(.data)$name, sprintf("%s%s", prefix, colnames(.)[-1])))
}

#' get abundance long
#'
#' @keywords internal
#'
#' @importFrom magrittr "%$%"
#' @importFrom Seurat VariableFeatures
#' @importFrom tidyr pivot_longer
#' @importFrom tibble as_tibble
#' @importFrom purrr when
#' @importFrom purrr map2
#'
#' @param .data A tidyseurat
#' @param features A character
#' @param all A boolean
#' @param exclude_zeros A boolean
#' @param assay assay name to extract feature abundance
#' @param slot slot in the assay, e.g. `data` and `scale.data`
#'
#' @return A Seurat object
#' @examples
#' data(pbmc_small)
#' pbmc_small %>%
#'   get_abundance_sc_long(features=c("HLA-DRA", "LYZ"))
#'
#' @export
get_abundance_sc_long <- function(.data, features=NULL, all=FALSE,
    exclude_zeros=FALSE, assay=Assays(.data), slot="data"){

    # Solve CRAN warnings
    . <- NULL
  
    if (is.null(assay)) {
  	    assay <- Assays(.data)
    }
  
    # Check if output would be too big without forcing
    if (
        length(VariableFeatures(.data)) == 0  &
        is.null(features) &
        all == FALSE
    ) {
        stop("Your object do not contain variable trancript labels,\n",
        " feature argument is empty and all argument is set to FALSE.\n",
        " Either:\n",
        " 1. use detect_variable_features() to select variable feature\n",
        " 2. pass an array of features names\n",
        " 3. set all=TRUE (this will output a very large object;",
        " does your computer have enough RAM?)\n")
    }


    # Get variable features if existing
    if(
        length(VariableFeatures(.data)) > 0  &
        is.null(features) &
        all == FALSE
    ) variable_genes <- VariableFeatures(.data)
    # Else
    else variable_genes <- NULL

    .data@assays %>%
  	    .[assay] %>%
        # Take active assay
        map2(assay,
            ~ .x %>%
                GetAssayData(layer = slot) %>%
                when(
                    variable_genes %>% is.null %>% `!` ~
                        (.)[variable_genes,, drop=FALSE],
                    features %>% is.null %>% `!` ~ 
                        (.)[ toupper(rownames((.))) %in% 
                            toupper(features), , drop=FALSE],
                    all ~ (.),
                    ~ stop("tidyseurat says: It is not convenient to",
                        " extract all genes, you should have either variable",
                        " features or feature list to extract.")
                ) %>%

                # Replace 0 with NA
                when(exclude_zeros ~ 
                        (.) %>%
                        { x=(.); x[x == 0] <- NA; x }, ~ (.)) %>%
                        data.frame(check.names=FALSE) %>%
                        as_tibble(rownames=".feature") %>%
                        tidyr::pivot_longer(
                            cols= - .feature,
                            names_to=c_(.data)$name,
                            values_to=".abundance" %>% paste(.y, sep="_"),
                            values_drop_na=TRUE
                        ) #%>%
                #mutate_if(is.character, as.factor) %>%
        ) %>%
        Reduce(function(...)
            full_join(..., by=c(".feature", c_(.data)$name)), .)
}

#' @importFrom dplyr select_if
#' @importFrom tibble column_to_rownames
#'
#' @keywords internal
#'
#' @param .data A tibble
#' @param seurat_object A tidyseurat
#'
#' @noRd
as_meta_data <- function(.data, seurat_object){

    # Solve CRAN warnings
    . <- NULL

    col_to_exclude <- get_special_columns(seurat_object)

    .data %>%
        select_if(!colnames(.) %in% col_to_exclude) %>%
        #select(-one_of(col_to_exclude)) %>%
        column_to_rownames(c_(seurat_object)$name)
}

#' @importFrom purrr map_chr
#'
#' @keywords internal
#'
#' @param seurat_object A tidyseurat
#'
#' @noRd
get_special_columns <- function(seurat_object){
    get_special_datasets(seurat_object) %>%
        map(~ .x %>% colnames  ) %>%
        unlist %>%
        as.character
}

get_special_datasets <- function(seurat_object, n_dimensions_to_return=Inf){
    seurat_object@reductions %>%
        map(~ .x@cell.embeddings[,
            1:min(n_dimensions_to_return, ncol(.x@cell.embeddings)),
            drop=FALSE])
}

get_needed_columns <- function(.data){
    c(c_(.data)$name)
}

#' Convert array of quosure (e.g. c(col_a, col_b)) into character vector
#'
#' @keywords internal
#'
#' @importFrom rlang quo_name
#' @importFrom rlang quo_squash
#'
#' @param v A array of quosures (e.g. c(col_a, col_b))
#'
#' @return A character vector
quo_names <- function(v) {
    v <- quo_name(quo_squash(v))
    gsub('^c\\(|`|\\)$', '', v) %>%
        strsplit(', ') %>%
        unlist
}


#' returns variables from an expression
#' @param expression an expression
#' @importFrom rlang enexpr
#' @return list of symbols
return_arguments_of <- function(expression){
    variables <- enexpr(expression) |> as.list()
    if(length(variables) > 1) {
        variables <- variables[-1] # removes first element which is function
    }
    variables
}

#' @importFrom purrr when
#' @importFrom dplyr select
#' @importFrom rlang expr
select_helper <- function(.data, ...){
    loc <- tidyselect::eval_select(expr(c(...)), .data)
    dplyr::select( .data, loc)
}

data_frame_returned_message <- paste(
    "tidyseurat says:",
    "A data frame is returned for independent data analysis.")

duplicated_cell_names <- paste(
    "tidyseurat says:",
    "This operation lead to duplicated cell names.",
    "A data frame is returned for independent data analysis.")

#' @importFrom methods .hasSlot
clean_seurat_object <- function(.data){

    . <- NULL

    if (.hasSlot(.data, "images"))
        .data@images <-
            map(.data@images,
                ~ .x %>% when((.)@coordinates %>% nrow() %>% gt(0) ~ (.))) %>%

                # Drop NULL
                Filter(Negate(is.null), .)

        .data@assays <- .data@assays %>%
            map(~ {
                my_assay=.x
                if (.hasSlot(., "SCTModel.list"))
                    my_assay@SCTModel.list  =
                    map(my_assay@SCTModel.list,
                        ~ .x %>%
                            when((.)@cell.attributes %>%
                                nrow() %>% gt(0) ~ (.))) %>%

                            # Drop NULL
                            Filter(Negate(is.null), .)
                my_assay
            })

    .data
}


# This function is used for the change of special sample column to .sample
# Check if "sample" is included in the query and
# is not part of any other existing annotation
#' @importFrom stringr str_detect
#' @importFrom stringr regex
is_sample_feature_deprecated_used <- function(.data, 
    user_columns, use_old_special_names=FALSE) {
    
    cell <- any(str_detect(user_columns, regex("\\bcell\\b")))
    .cell <- any(str_detect(user_columns, regex("\\W*(\\.cell)\\W*")))
    
    old_standard_is_used <- 
        !"cell" %in% colnames(.data@meta.data) &&
        ("cell" %in% user_columns || (cell && !.cell))
    
    if (old_standard_is_used) {
        warning("tidyseurat says:",
            " from version 1.3.1, the special columns including",
            " cell id (colnames(se)) has changed to \".cell\".",
            " This dataset is returned with the old-style vocabulary (cell),",
            " however, we suggest to update your workflow",
            " to reflect the new vocabulary (.cell).")
        use_old_special_names <- TRUE
    }
    use_old_special_names
}

get_special_column_name_symbol <- function(name){
    list(name=name, symbol=as.symbol(name))
}

# Key column names
ping_old_special_column_into_metadata <- function(.data){
    .data@misc$cell__ <- get_special_column_name_symbol("cell")
    .data
}

get_special_column_name_cell <- function(name){
    list(name=name, symbol=as.symbol(name))
}

cell__ <- get_special_column_name_symbol(".cell")

c_ <- function(x){
    # Check if old deprecated columns are used
    if("cell__" %in% names(x@misc)) cell__ <- x@misc$cell__
    return(cell__)
}

#' Add attribute to abject
#'
#' @keywords internal
#' @noRd
#'
#' @importFrom dplyr vars
#'
#' @param var A tibble
#' @param attribute An object
#' @param name A character name of the attribute
#'
#' @return A tibble with an additional attribute
add_attr <- function(var, attribute, name) {
    attr(var, name) <- attribute
    var
}

#' Get specific annotation columns
#'
#' @keywords internal
#' @noRd
#' 
#' @importFrom rlang enquo
#' @importFrom purrr map
#' @importFrom dplyr distinct_at
#' @importFrom magrittr equals
#' @importFrom dplyr vars
#' 
#' @param .data A tibble
#' @param .col A vector of column names
#' 
#' @return A character
get_specific_annotation_columns <- function(.data, .col) {
    
    # Comply with CRAN NOTES
    . <- NULL
    
    # Make col names
    .col <- enquo(.col)
    
    # x-annotation df
    n_x <- .data |> distinct_at(vars(!!.col)) |> nrow()
    
    # element wise columns
    .data |>
        select(-!!.col) |>
        colnames() |>
        map(~ {
            n_.x <- .data |> distinct_at(vars(!!.col, .x)) |> nrow()
            if (n_.x == n_x) .x else NULL
        }) %>%
        # Drop NULL
        { (.)[lengths((.)) != 0] } |>
        unlist()
}

subset_tidyseurat <- function(.data, .column) {
    # Make col names
    .column <- enquo(.column)

    # Check if column present
    if (.data |> select(!!.column) |> colnames() %in% colnames(.data) %>% all %>% `!`)
        stop("tidyseurat says: some of the .column specified",
            " do not exist in the input data frame.")


    .data %>%
    # Selecting the right columns
        select(!!.column, get_specific_annotation_columns(.data, !!.column)) %>%
        distinct()
}

#' @importFrom Seurat GetAssayData
#' @importFrom methods is
GetAssayData_robust = function(seurat_assay, layer = NULL){
  
  if(
    seurat_assay |> is("Assay5") & 
    seurat_assay |> ncol() == 1
  ){
    m = seurat_assay@layers[[layer]] |> as.matrix()
    rownames(m) = rownames(seurat_assay)
    colnames(m) = colnames(seurat_assay)
    m
  }
    
  else 
    GetAssayData(seurat_assay, layer=layer)
}


================================================
FILE: R/utils-pipe.R
================================================
#' Pipe operator
#'
#' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
#'
#' @name %>%
#' @rdname pipe
#' @keywords internal
#' @export
#' @importFrom magrittr %>%
#' @usage lhs \%>\% rhs
#' @examples
#' data(pbmc_small)
#' pbmc_small %>% print()
#' @return void
NULL


================================================
FILE: R/zzz.R
================================================
#' @importFrom utils packageDescription
.onAttach = function(libname, pkgname) {
	version = packageDescription(pkgname, fields = "Version")
	
	msg = paste0("========================================
", pkgname, " version ", version, "
If you use TIDYSEURAT in published research, please cite:

Mangiola et al. Interfacing Seurat with the R tidy universe. Bioinformatics 2021.

This message can be suppressed by:
  suppressPackageStartupMessages(library(tidyseurat))
  
To restore the Seurat default display use options(\"restore_Seurat_show\" = TRUE) 
========================================
")	
	
	packageStartupMessage(msg)
    # Attach tidyverse
    attached <- tidyverse_attach()
}

# rv = R.Version()

# if(getRversion() >= "4.0.0" && as.numeric(rv$`svn rev`) >= 77889) {
# 	unitType = get("unitType", envir = asNamespace("grid"))
# } else {
# 	unitType = function(x, recurse = TRUE) attr(x, "unit")
# }

================================================
FILE: README.Rmd
================================================
---
title: "tidyseurat - part of tidytranscriptomics"
output: github_document
always_allow_html: true
---

<!-- badges: start -->
[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html) [![R build status](https://github.com/stemangiola/tidyseurat/workflows/R-CMD-check/badge.svg)](https://github.com/stemangiola/tidyseurat/actions/)
<!-- badges: end -->

<a href="https://www.youtube.com/watch?feature=player_embedded&v=wKnyocRCvW4" target="_blank">
 <img src="https://img.youtube.com/vi/wKnyocRCvW4/mqdefault.jpg" alt="Watch the video" width="280" height="180" border="10" />
</a>

```{r echo=FALSE}
knitr::opts_chunk$set( fig.path = "man/figures/")
```

```{r include=FALSE}
# Set path to plotly screenshot. We don't run the plotly code chunk as most servers do not have javascript libraries needed for interactive plotting
screenshot <- "man/figures/plotly.png"

# The chunk below uses Rmd in man/fragments to avoid duplication, as the content is shared with the vignette and README. As suggested here: https://www.garrickadenbuie.com/blog/dry-vignette-and-readme/

visual_cue <- "man/figures/logo_interaction-01.png"

```

```{r child="man/fragments/intro.Rmd"}
```


================================================
FILE: README.md
================================================
tidyseurat - part of tidytranscriptomics
================

<!-- badges: start -->

[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html)
[![R build
status](https://github.com/stemangiola/tidyseurat/workflows/R-CMD-check/badge.svg)](https://github.com/stemangiola/tidyseurat/actions/)
<!-- badges: end -->

<a href="https://www.youtube.com/watch?feature=player_embedded&v=wKnyocRCvW4" target="_blank">
<img src="https://img.youtube.com/vi/wKnyocRCvW4/mqdefault.jpg" alt="Watch the video" width="280" height="180" border="10" />
</a>

**Brings Seurat to the tidyverse!**

website:
[stemangiola.github.io/tidyseurat/](https://stemangiola.github.io/tidyseurat/)

Please also have a look at

- [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy
  single-cell RNA sequencing analysis
- [tidySummarizedExperiment](https://tidyomics.github.io/tidySummarizedExperiment/)
  for tidy bulk RNA sequencing analysis
- [tidybulk](https://tidyomics.github.io/tidybulk/) for tidy bulk
  RNA-seq analysis
- [tidygate](https://github.com/stemangiola/tidygate/) for adding custom
  gate information to your tibble
- [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps
  produced with tidy principles

<figure>
<img src="man/figures/logo_interaction-01.png" alt="visual cue" />
<figcaption aria-hidden="true">visual cue</figcaption>
</figure>

# Introduction

tidyseurat provides a bridge between the Seurat single-cell package
\[@butler2018integrating; @stuart2019comprehensive\] and the tidyverse
\[@wickham2019welcome\]. It creates an invisible layer that enables
viewing the Seurat object as a tidyverse tibble, and provides
Seurat-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions.

## Functions/utilities available

| Seurat-compatible Functions | Description |
|-----------------------------|-------------|
| `all`                       |             |

| tidyverse Packages | Description                          |
|--------------------|--------------------------------------|
| `dplyr`            | All `dplyr` APIs like for any tibble |
| `tidyr`            | All `tidyr` APIs like for any tibble |
| `ggplot2`          | `ggplot` like for any tibble         |
| `plotly`           | `plot_ly` like for any tibble        |

| Utilities | Description |
|----|----|
| `tidy` | Add `tidyseurat` invisible layer over a Seurat object |
| `as_tibble` | Convert cell-wise information to a `tbl_df` |
| `join_features` | Add feature-wise information, returns a `tbl_df` |
| `aggregate_cells` | Aggregate cell gene-transcription abundance as pseudobulk tissue |

## Installation

From CRAN

``` r
install.packages("tidyseurat")
```

From Github (development)

``` r
devtools::install_github("stemangiola/tidyseurat")
```

``` r
library(dplyr)
library(tidyr)
library(purrr)
library(magrittr)
library(ggplot2)
library(Seurat)
library(tidyseurat)
```

## Create `tidyseurat`, the best of both worlds!

This is a seurat object but it is evaluated as tibble. So it is fully
compatible both with Seurat and tidyverse APIs.

``` r
pbmc_small = SeuratObject::pbmc_small
```

**It looks like a tibble**

``` r
pbmc_small
```

    ## # A Seurat-tibble abstraction: 80 × 15
    ## # [90mFeatures=230 | Cells=80 | Active assay=RNA | Assays=RNA[0m
    ##    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
    ##    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
    ##  1 ATGC… SeuratPro…         70           47 0               A             g2    
    ##  2 CATG… SeuratPro…         85           52 0               A             g1    
    ##  3 GAAC… SeuratPro…         87           50 1               B             g2    
    ##  4 TGAC… SeuratPro…        127           56 0               A             g2    
    ##  5 AGTC… SeuratPro…        173           53 0               A             g2    
    ##  6 TCTG… SeuratPro…         70           48 0               A             g1    
    ##  7 TGGT… SeuratPro…         64           36 0               A             g1    
    ##  8 GCAG… SeuratPro…         72           45 0               A             g1    
    ##  9 GATA… SeuratPro…         52           36 0               A             g1    
    ## 10 AATG… SeuratPro…        100           41 0               A             g1    
    ## # ℹ 70 more rows
    ## # ℹ 8 more variables: RNA_snn_res.1 <fct>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>,
    ## #   PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>

**But it is a Seurat object after all**

``` r
pbmc_small@assays
```

    ## $RNA
    ## Assay data with 230 features for 80 cells
    ## Top 10 variable features:
    ##  PPBP, IGLL5, VDAC3, CD1C, AKR1C3, PF4, MYL9, GNLY, TREML1, CA2

# Preliminary plots

Set colours and theme for plots.

``` r
# Use colourblind-friendly colours
friendly_cols <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC")

# Set theme
my_theme <-
  list(
    scale_fill_manual(values = friendly_cols),
    scale_color_manual(values = friendly_cols),
    theme_bw() +
      theme(
        panel.border = element_blank(),
        axis.line = element_line(),
        panel.grid.major = element_line(size = 0.2),
        panel.grid.minor = element_line(size = 0.1),
        text = element_text(size = 12),
        legend.position = "bottom",
        aspect.ratio = 1,
        strip.background = element_blank(),
        axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10))
      )
  )
```

We can treat `pbmc_small` effectively as a normal tibble for plotting.

Here we plot number of features per cell.

``` r
pbmc_small %>%
  ggplot(aes(nFeature_RNA, fill = groups)) +
  geom_histogram() +
  my_theme
```

![](man/figures/plot1-1.png)<!-- -->

Here we plot total features per cell.

``` r
pbmc_small %>%
  ggplot(aes(groups, nCount_RNA, fill = groups)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  my_theme
```

![](man/figures/plot2-1.png)<!-- -->

Here we plot abundance of two features for each group.

``` r
pbmc_small %>%
  join_features(features = c("HLA-DRA", "LYZ"), shape = "long") %>%
  ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) +
  scale_y_log10() +
  my_theme
```

![](man/figures/unnamed-chunk-15-1.png)<!-- -->

# Preprocess the dataset

Also you can treat the object as Seurat object and proceed with data
processing.

``` r
pbmc_small_pca <-
  pbmc_small %>%
  SCTransform(verbose = FALSE) %>%
  FindVariableFeatures(verbose = FALSE) %>%
  RunPCA(verbose = FALSE)

pbmc_small_pca
```

    ## # A Seurat-tibble abstraction: 80 × 17
    ## # [90mFeatures=220 | Cells=80 | Active assay=SCT | Assays=RNA, SCT[0m
    ##    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
    ##    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
    ##  1 ATGC… SeuratPro…         70           47 0               A             g2    
    ##  2 CATG… SeuratPro…         85           52 0               A             g1    
    ##  3 GAAC… SeuratPro…         87           50 1               B             g2    
    ##  4 TGAC… SeuratPro…        127           56 0               A             g2    
    ##  5 AGTC… SeuratPro…        173           53 0               A             g2    
    ##  6 TCTG… SeuratPro…         70           48 0               A             g1    
    ##  7 TGGT… SeuratPro…         64           36 0               A             g1    
    ##  8 GCAG… SeuratPro…         72           45 0               A             g1    
    ##  9 GATA… SeuratPro…         52           36 0               A             g1    
    ## 10 AATG… SeuratPro…        100           41 0               A             g1    
    ## # ℹ 70 more rows
    ## # ℹ 10 more variables: RNA_snn_res.1 <fct>, nCount_SCT <dbl>,
    ## #   nFeature_SCT <int>, PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>,
    ## #   PC_5 <dbl>, tSNE_1 <dbl>, tSNE_2 <dbl>

If a tool is not included in the tidyseurat collection, we can use
`as_tibble` to permanently convert `tidyseurat` into tibble.

``` r
pbmc_small_pca %>%
  as_tibble() %>%
  select(contains("PC"), everything()) %>%
  GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) +
  my_theme
```

![](man/figures/pc_plot-1.png)<!-- -->

# Identify clusters

We proceed with cluster identification with Seurat.

``` r
pbmc_small_cluster <-
  pbmc_small_pca %>%
  FindNeighbors(verbose = FALSE) %>%
  FindClusters(method = "igraph", verbose = FALSE)

pbmc_small_cluster
```

    ## # A Seurat-tibble abstraction: 80 × 19
    ## # [90mFeatures=220 | Cells=80 | Active assay=SCT | Assays=RNA, SCT[0m
    ##    .cell orig.ident nCount_RNA nFeature_RNA RNA_snn_res.0.8 letter.idents groups
    ##    <chr> <fct>           <dbl>        <int> <fct>           <fct>         <chr> 
    ##  1 ATGC… SeuratPro…         70           47 0               A             g2    
    ##  2 CATG… SeuratPro…         85           52 0               A             g1    
    ##  3 GAAC… SeuratPro…         87           50 1               B             g2    
    ##  4 TGAC… SeuratPro…        127           56 0               A             g2    
    ##  5 AGTC… SeuratPro…        173           53 0               A             g2    
    ##  6 TCTG… SeuratPro…         70           48 0               A             g1    
    ##  7 TGGT… SeuratPro…         64           36 0               A             g1    
    ##  8 GCAG… SeuratPro…         72           45 0               A             g1    
    ##  9 GATA… SeuratPro…         52           36 0               A             g1    
    ## 10 AATG… SeuratPro…        100           41 0               A             g1    
    ## # ℹ 70 more rows
    ## # ℹ 12 more variables: RNA_snn_res.1 <fct>, nCount_SCT <dbl>,
    ## #   nFeature_SCT <int>, SCT_snn_res.0.8 <fct>, seurat_clusters <fct>,
    ## #   PC_1 <dbl>, PC_2 <dbl>, PC_3 <dbl>, PC_4 <dbl>, PC_5 <dbl>, tSNE_1 <dbl>,
    ## #   tSNE_2 <dbl>

Now we can interrogate the object as if it was a regular tibble data
frame.

``` r
pbmc_small_cluster %>%
  count(groups, seurat_clusters)
```

    ## # A tibble: 6 × 3
    ##   groups seurat_clusters     n
    ##   <chr>  <fct>           <int>
    ## 1 g1     0                  23
    ## 2 g1     1                  17
    ## 3 g1     2                   4
    ## 4 g2     0                  17
    ## 5 g2     1                  13
    ## 6 g2     2                   6

We can identify cluster markers using Seurat.

<!-- If this is Seurat v4, comment out the v3 markers -->

<!--
&#10;
``` r
# Identify top 10 markers per cluster
markers <-
  pbmc_small_cluster %>%
  mutate(orig.ident = seurat_clusters) %>% 
  FindAllMarkers(only.pos = TRUE) %>%
  group_by(cluster) %>%
  top_n(10, avg_logFC)
&#10;# Plot heatmap
pbmc_small_cluster %>%
  DoHeatmap(
    features = markers$gene,
    group.colors = friendly_cols
  )
```
&#10;-->

<!-- If this is Seurat v3, comment out the v4 markers -->

``` r
# Identify top 10 markers per cluster
markers <-
  pbmc_small_cluster %>%
  FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
  group_by(cluster) %>%
  top_n(10, avg_log2FC)

# Plot heatmap
pbmc_small_cluster %>%
  DoHeatmap(
    features = markers$gene,
    group.colors = friendly_cols
  )
```

# Reduce dimensions

We can calculate the first 3 UMAP dimensions using the Seurat framework.

``` r
pbmc_small_UMAP <-
  pbmc_small_cluster %>%
  RunUMAP(reduction = "pca", dims = 1:15, n.components = 3L)
```

And we can plot them using 3D plot using plotly.

``` r
pbmc_small_UMAP %>%
  plot_ly(
    x = ~`UMAP_1`,
    y = ~`UMAP_2`,
    z = ~`UMAP_3`,
    color = ~seurat_clusters,
    colors = friendly_cols[1:4]
  )
```

<figure>
<img src="man/figures/plotly.png" alt="screenshot plotly" />
<figcaption aria-hidden="true">screenshot plotly</figcaption>
</figure>

## Cell type prediction

We can infer cell type identities using *SingleR* \[@aran2019reference\]
and manipulate the output using tidyverse.

``` r
# Get cell type reference data
blueprint <- celldex::BlueprintEncodeData()

# Infer cell identities
cell_type_df <-
  GetAssayData(pbmc_small_UMAP, slot = 'counts', assay = "SCT") %>%
  log1p() %>%
  Matrix::Matrix(sparse = TRUE) %>%
  SingleR::SingleR(
    ref = blueprint,
    labels = blueprint$label.main,
    method = "single"
  ) %>%
  as.data.frame() %>%
  as_tibble(rownames = "cell") %>%
  select(cell, first.labels)
```

``` r
# Join UMAP and cell type info
pbmc_small_cell_type <-
  pbmc_small_UMAP %>%
  left_join(cell_type_df, by = "cell")

# Reorder columns
pbmc_small_cell_type %>%
  select(cell, first.labels, everything())
```

We can easily summarise the results. For example, we can see how cell
type classification overlaps with cluster classification.

``` r
pbmc_small_cell_type %>%
  count(seurat_clusters, first.labels)
```

We can easily reshape the data for building information-rich faceted
plots.

``` r
pbmc_small_cell_type %>%

  # Reshape and add classifier column
  pivot_longer(
    cols = c(seurat_clusters, first.labels),
    names_to = "classifier", values_to = "label"
  ) %>%

  # UMAP plots for cell type and cluster
  ggplot(aes(UMAP_1, UMAP_2, color = label)) +
  geom_point() +
  facet_wrap(~classifier) +
  my_theme
```

We can easily plot gene correlation per cell category, adding
multi-layer annotations.

``` r
pbmc_small_cell_type %>%

  # Add some mitochondrial abundance values
  mutate(mitochondrial = rnorm(n())) %>%

  # Plot correlation
  join_features(features = c("CST3", "LYZ"), shape = "wide") %>%
  ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) +
  geom_point() +
  facet_wrap(~first.labels, scales = "free") +
  scale_x_log10() +
  scale_y_log10() +
  my_theme
```

# Nested analyses

A powerful tool we can use with tidyseurat is `nest`. We can easily
perform independent analyses on subsets of the dataset. First we
classify cell types in lymphoid and myeloid; then, nest based on the new
classification

``` r
pbmc_small_nested <-
  pbmc_small_cell_type %>%
  filter(first.labels != "Erythrocytes") %>%
  mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
  nest(data = -cell_class)

pbmc_small_nested
```

Now we can independently for the lymphoid and myeloid subsets (i) find
variable features, (ii) reduce dimensions, and (iii) cluster using both
tidyverse and Seurat seamlessly.

``` r
pbmc_small_nested_reanalysed <-
  pbmc_small_nested %>%
  mutate(data = map(
    data, ~ .x %>%
      FindVariableFeatures(verbose = FALSE) %>%
      RunPCA(npcs = 10, verbose = FALSE) %>%
      FindNeighbors(verbose = FALSE) %>%
      FindClusters(method = "igraph", verbose = FALSE) %>%
      RunUMAP(reduction = "pca", dims = 1:10, n.components = 3L, verbose = FALSE)
  ))

pbmc_small_nested_reanalysed
```

Now we can unnest and plot the new classification.

``` r
pbmc_small_nested_reanalysed %>%

  # Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets.
  mutate(data = map(data, ~ .x %>% as_tibble())) %>%
  unnest(data) %>%

  # Define unique clusters
  unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>%

  # Plotting
  ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
  geom_point() +
  facet_wrap(~cell_class) +
  my_theme
```

# Aggregating cells

Sometimes, it is necessary to aggregate the gene-transcript abundance
from a group of cells into a single value. For example, when comparing
groups of cells across different samples with fixed-effect models.

In tidyseurat, cell aggregation can be achieved using the
`aggregate_cells` function.

``` r
pbmc_small %>%
  aggregate_cells(groups, assays = "RNA")
```


================================================
FILE: _pkgdown.yml
================================================
template:
  bootstrap: 5


================================================
FILE: codecov.yml
================================================
comment: false

coverage:
  status:
    project:
      default:
        target: auto
        threshold: 1%
    patch:
      default:
        target: auto
        threshold: 1%


================================================
FILE: dev/code_comparison.Rmd
================================================
---
title: "Code comparison with Seurat"
author: "Stefano Mangiola"
date: "`r Sys.Date()`"
package: tidyseurat
output:
  html_vignette:
    toc_float: true
vignette: >
  %\VignetteEngine{knitr::knitr}
  %\VignetteIndexEntry{Code comparison with Seurat}
  %\usepackage[UTF-8]{inputenc}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

# Case study code comparison


## Calculate gamma-delta signature and plot

tidyseurat

```{r}
seurat_obj <- readRDS("dev/PBMC_tidy_clean_scaled_UMAP_cluster_cell_type.rds")

seurat_obj = 
  seurat_obj %>%
  filter(first.labels == "T_cells") %>%
  RunPCA() %>% 
  RunUMAP(dims=1:30) %>%
  mutate(type=case_when(sample %in% c("GSE115189", "SRR11038995", "SRR7244582") ~ "A", TRUE ~ "B"))

```

```{r}

library(tidygate)
library(ggplot2)
library(purrr)
library(patchwork)

# Calculate gamma delta signature
seurat_obj_sig = 

	seurat_obj %>%
	join_features(
	  features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"), 
	  shape = "wide", 
	  assay = "SCT"
	) %>%
	

	mutate(signature_score = 
	         scales::rescale(CD3D + TRDC + TRGC1 + TRGC2, to=c(0,1)) - 
	         scales::rescale(CD8A + CD8B, to=c(0,1))
	) 

p1 = seurat_obj_sig %>%
  
  
  # Subsample
  add_count(sample, name = "tot_cells") %>%
  mutate(min_cells = min(tot_cells)) %>%
  group_by(sample) %>%
  sample_n(min_cells) %>%
  
  # Plot
  pivot_longer(cols=c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B", "signature_score")) %>%
  mutate(value = case_when(value>0 ~ value)) %>%
  group_by(name) %>%
  mutate(value = scale(value)) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=value)) +
  geom_point(shape=".") +
  facet_grid(type~name) +
  scale_color_viridis_c() +
  custom_theme

# Test differential abundance
p2 = seurat_obj_sig %>%
  
  # Gating
  mutate(gamma_delta = gate_chr(
		UMAP_1,
		UMAP_2, 
		.color =  signature_score, 
		.size=0.1
	)) %>%

  # Calculate proportions
  add_count(sample, name = "tot_cells") %>%
  count(sample, type, tot_cells, gamma_delta) %>%
  mutate(frac = n/tot_cells) %>%
  filter(gamma_delta == 1) %>%
  
  # Plot
  ggplot(aes(type, frac)) + 
  geom_boxplot() + 
  geom_point() +
  custom_theme

p = p1 / (p2  | plot_spacer()) +  plot_layout(guides = "collect")

ggsave("dev/summary_statistics.pdf", p,  device = "pdf", width = 183, height = 150, units = "mm", useDingbats=FALSE)

```

Seurat

```{r}
library(Seurat)
library(gatepoints)
library(dplyr)

# Calculate gamma delta signature
signature_score_1 = 
  seurat_obj[c("CD3D", "TRDC", "TRGC1", "TRGC2"),] %>%
  GetAssayData(assay="SCT", slot="data") %>%
  colSums() %>%
  scales::rescale(to=c(0,1))

signature_score_2 = 
  seurat_obj[c("CD8A", "CD8B"),] %>%
  GetAssayData(assay="SCT", slot="data") %>%
  colSums() %>%
  scales::rescale(to=c(0,1))
seurat_obj$signature_score = signature_score_1 - signature_score_2

# Subsample
splits = colnames(seurat_obj) %>% split(seurat_obj$sample) 
min_size = splits %>% sapply(length) %>%  min()
cell_subset = splits %>%  lapply(function(x) sample(x, min_size)) %>%  unlist()
seurat_obj = seurat_obj[,cell_subset]

# Plot
DefaultAssay(seurat_obj) = "SCT"

seurat_obj %>%
FeaturePlot(
  features = c("signature_score", "CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),
  split.by = "type",
  min.cutoff = 0.1
) 
  

# Gating
p = FeaturePlot(seurat_obj, features = "signature_score")
seurat_obj$within_gate = colnames(seurat_obj) %in% CellSelector(plot = p)

# Calculate proportions
seurat_obj[[]] %>%
  add_count(sample, name = "tot_cells") %>%
  count(sample, type, tot_cells, within_gate) %>%
  mutate(frac = n/tot_cells) %>%
  filter(within_gate == T) %>%
  
  # Plot
  ggplot(aes(type, frac)) + 
  geom_boxplot() + 
  geom_point()
```

================================================
FILE: dev/plot_seurat_structure.R
================================================

library( DataExplorer )

plot_str(pbmc_small, type = "r" )
plot_str(pbmc_small , type="d")

plot_str(pbmc_small %>%  join_features("CD3G"), type = "r" )
plot_str(pbmc_small %>%  join_features("CD3G"), type = "d" )


================================================
FILE: dev/use_cases_BioCAsia2021.R
================================================
library(tidyverse)
library(glue)

tibble(
  observation = glue("observation {1:100}"),
  variable_1 = rep("...", 100),
  variable_2 = rep("...", 100),
  variable_3 = map(1:100, ~ tibble(a = 1:10, b = 1:10)), 
  variable_4 = map(1:100, ~ ggplot()),
  variable_5 = map(1:100, ~ lm(y ~ x, data = data.frame(x=1:10, y=1:10))),
  variable_6 = map(1:100, ~ pbmc_small),
  variable_7 = map(1:100, ~ tidySingleCellExperiment::pbmc_small)
)


my_vector = seq(1, 20); 

# Imperative
my_vector_modified = c()
for(i in 1:length(my_vector)) {
  my_vector_modified[i] = my_vector[i] * 2L
}
# Functional
my_vector_modified = my_vector |> map_int(~ .x * 2L)

df = data.frame(a= rep("a", ncol(SeuratObject::pbmc_small)), b= rep("b", ncol(SeuratObject::pbmc_small)))
rownames(df)  = colnames(SeuratObject::pbmc_small)

info = rep(1, ncol(SeuratObject::pbmc_small))
SeuratObject::pbmc_small |>
  AddMetaData(info, "info")

colData(tidySingleCellExperiment::pbmc_small) |> cbind()


# Subsampling

single_cell_data |>
  add_count(sample, name = "tot_cells") |>
  mutate(median_cells = min(tot_cells)) |>
  nest(data = -c(sample, median_cells)) |>
  mutate(data = map2(data, median_cells, ~ sample_n(.x, .y, replace = TRUE))) |>
  unnest(data)

# Define cell categories for analysis plotting

single_cell_data |>
  
  mutate(cell_differentiation = 
           case_when(
             curated_cell_type_pretty %in% c("B immature", "B mem") ~ "B",
             curated_cell_type_pretty %in% c("pDC") ~ "pDC",
             cell_differentiation == "lymphoid" ~ "T+NK",
             cell_differentiation == "myeloid" ~ "Myeloid"
           )
  ) |> 
  
  mutate(
    curated_cell_type_pretty = if_else(
      curated_cell_type_pretty %in% c("T gd1",  "T gd2"), 
      "gamma_delta" , 
      curated_cell_type_pretty
    )
  ) 


# Quality control


# Gating gamma delta
seurat_obj_sig = seurat_obj |>
  
  
  join_features(
    features = c("CD3D", "TRDC", "TRGC1", "TRGC2", "CD8A", "CD8B"),
    shape = "wide",
    assay = "SCT"
    
  ) |>
  
  mutate(signature_score =
           scales::rescale(CD3D + TRDC + TRGC1+ TRGC2, to=c(0,1)) -
           scales::rescale(CD8A + CD8B, to=c(0,1))
  ) |>
  
  Seurat::FeaturePlot(signature_score) |>
  mutate( gate = tidygate::gate_int(UMAP_1, UMAP_2) ) |> 
  
  filter(gate == 1) %>%
  
  NormalizeData() |> 
  FindVariableFeatures( nfeatures = 100)

  split_group(sample) %>% 
  RunFastMNN() |> 
  RunUMAP(reduction = "mnn", dims = 1:20) |> 
  FindNeighbors( dims = 1:20, reduction = "mnn") |> 
  FindClusters( resolution = 0.3) |>


# gamma_delta_df = 
#   readRDS("cancer_only_analyses/integrated_counts_curated.rds")  |> 
#   #	{.x = (.); DefaultAssay(.x) = "RNA"; .x} |> 
#   filter(curated_cell_type_pretty %in% c("T gd1",  "T gd2")) |> 
#   
#   {
#     .x= (.)
#     DefaultAssay(.x) = "RNA"
#     .x[["SCT"]] = NULL
#     .x[["integrated"]] = NULL
#     .x
#   } |> 
#   NormalizeData() |> 
#   FindVariableFeatures( nfeatures = 100) |> 
#   mutate(batch_to_eliminate = sample) |> 
#   nest(data = -batch_to_eliminate) |>
#   pull(data) |> 
#   RunFastMNN() |> 
#   RunUMAP(reduction = "mnn", dims = 1:20) |> 
#   FindNeighbors( dims = 1:20, reduction = "mnn") |> 
#   FindClusters( resolution = 0.3) |>
#   mutate(gate = tidygate::gate_int(UMAP_1, UMAP_2, how_many_gates = 2, gate_list = readRDS("file66175abbca44.rds"))) |> 
#   tidysc::adjust_abundance(~ 1) |>
#   mutate(gamma_delta = case_when(
#     gate == 0 ~ "T gd vd2",
#     gate == 1 ~ "T gd vd1 LGALS1",
#     gate == 2 ~ "T gd vd1",
#   ))

================================================
FILE: dev/workflow_article.R
================================================
# Article workflow
library(tidyverse)
library(Seurat)
library(SingleR)
library(plotly)
library(tidyHeatmap)
library(tidyseurat)
options(future.globals.maxSize = 50068 * 1024^2)

PBMC <- readRDS("dev/PBMC_integrated.rds")

# Polishing

PBMC_clean <-
  PBMC_

  # Clean groups
  mutate(Phase = Phase %>% str_remove("^phase_")) %>%

  # Extract sample
  extract(sample, "sample", "./data/seurat/outs/([a-zA-Z0-9]+)")

# PBMC_clean = PBMC_clean %>% nest(data = -sample) %>% mutate(data = map(data, ~ .x %>% sample_n(200))) %>% unnest(data)

# Scaling
# PBMC_clean_scaled <-
#   PBMC_clean %>%
#    SCTransform(verbose = FALSE) %>%
#   FindVariableFeatures(verbose = FALSE)


# Dimensionality reduction
PBMC_clean_scaled_UMAP <-
  PBMC_clean %>%
  RunPCA(verbose = FALSE) %>%
  RunUMAP(reduction = "pca", dims = 1:15, n.components = 3L)

# Clustering
PBMC_clean_scaled_UMAP_cluster <-
  PBMC_clean_scaled_UMAP %>%
  FindNeighbors(verbose = FALSE) %>%
  FindClusters(method = "igraph", verbose = FALSE)

# Cell_type classification Manual
markers <-
  PBMC_clean_scaled_UMAP_cluster %>%
  FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
  group_by(cluster) %>%
  top_n(10, avg_logFC)

# Cell_type classification Automatic

# Get cell type reference data
hpca <- HumanPrimaryCellAtlasData()

# Infer cell identities
cell_type_df <-

  # extracting counts from Seurat object
  GetAssayData(PBMC_clean_scaled_UMAP_cluster, layer = 'counts', assay = "SCT") %>%
  log1p() %>%

  # SingleR
  SingleR(
    ref = hpca,
    labels = hpca$label.main,
    method = "cluster",
    clusters = PBMC_clean_scaled_UMAP_cluster %>% pull(seurat_clusters)
  ) %>%

  # Formatting results
  as.data.frame() %>%
  as_tibble(rownames = "seurat_clusters") %>%
  select(seurat_clusters, first.labels)


# Infer cell identities - cell wise
cell_type_df_single <-

  # extracting counts from Seurat object
  GetAssayData(PBMC_clean_scaled_UMAP_cluster, layer = 'counts', assay = "SCT") %>%
  log1p() %>%

  # SingleR
  SingleR(
    ref = hpca,
    labels = hpca$label.main,
    method = "single"
  ) %>%

  # Formatting results
  as.data.frame() %>%
  as_tibble(rownames = "cell") %>%
  select(cell, first.labels_single = first.labels)

# Join UMAP and cell type info
PBMC_clean_scaled_UMAP_cluster_cell_type <-
  PBMC_clean_scaled_UMAP_cluster %>%
  left_join(
    cell_type_df,
    by = "seurat_clusters"
  ) %>%
  left_join(
    cell_type_df_single,
    by = "cell"
  )


# Markers
PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
  group_by(cluster) %>%
  top_n(10, avg_logFC) %>%
  saveRDS("dev/PBMC_marker_df.rds")

# Nesting
PBMC_clean_scaled_UMAP_cluster_cell_type  %>%
  sample_n(1000) %>%

  # Label lymphoid and myeloid
  tidyseurat::filter(first.labels != "Platelets") %>%
  tidyseurat::mutate(cell_class =
                       if_else(
                         `first.labels` %in% c("Macrophage", "Monocyte"),
                         "myeloid",
                         "lymphoid"
                       )
  ) %>%

  # Nesting
  nest(data = -cell_class) %>%

  # Identification of variable gene features
  mutate(variable_genes = map_chr(
    data, ~ .x %>%
      FindVariableFeatures() %>%
      RunPCA(verbose = FALSE) %>%
      FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
      pull(gene) %>%
      head() %>%
      paste(collapse=", ")
  ))

# # Reorder columns
# PBMC_clean_scaled_UMAP_cluster_cell_type %>%
#   count(seurat_clusters, first.labels_cluster = first.labels)

saveRDS(PBMC_clean_scaled_UMAP_cluster_cell_type, "dev/PBMC_clean_scaled_UMAP_cluster_cell_type.rds")


================================================
FILE: dev/workflow_create_integrated_pbmc.R
================================================
# Article workflow

library(tidyverse)
library(Seurat)
library(SingleR)
library(plotly)
# library(future)
# plan(multisession, workers=10)
options(future.globals.maxSize = 50068 * 1024^2)
library(tidyseurat)
friendly_cols <- dittoSeq::dittoColors()

# PBMC = PBMC %>% 
#   select(1:11, -old.ident) %>%
#   mutate(sample = sprintf("./data/seurat/outs/%s", sample)) %>% 
#   mutate(Phase = sprintf("phase_%s", Phase))

PBMC <- readRDS("dev/PBMC.rds")

PBMC_clean_scaled <-
  PBMC_
  mutate(grouping = sample) %>%
  nest(sample_df = -grouping) %>%
  mutate(sample_df = map( sample_df,~  SCTransform(.x)))

my_features =
  PBMC_clean_scaled$sample_df %>%
  SelectIntegrationFeatures(nfeatures = 2000)

PBMC_integrated = 
  
  PBMC_clean_scaled$sample_df %>%
  PrepSCTIntegration(anchor.features = my_features) %>%
  FindIntegrationAnchors(
    normalization.method = "SCT",
    anchor.features = my_features
  ) %>%
  IntegrateData(normalization.method = "SCT")

PBMC_integrated %>% saveRDS("dev/PBMC_integrated.rds")

================================================
FILE: dev/workflow_figures.R
================================================
# Article workflow

library(tidyverse)
library(Seurat)
library(SingleR)
library(plotly)
library(tidyHeatmap)
library(ggalluvial)
library(ggplot2)
library(tidyseurat)
options(future.globals.maxSize = 50068 * 1024^2)

# Use colourblind-friendly colours
friendly_cols <- dittoSeq::dittoColors()

# Set theme
custom_theme <-
  list(
    scale_fill_manual(values = friendly_cols),
    scale_color_manual(values = friendly_cols),
    theme_bw() +
      theme(
        panel.border = element_blank(),
        axis.line = element_line(),
        panel.grid.major = element_line(size = 0.2),
        panel.grid.minor = element_line(size = 0.1),
        text = element_text(size = 9),
        legend.position = "bottom",
        strip.background = element_blank(),
        axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1)
      )
  )


PBMC_clean_scaled_UMAP_cluster_cell_type <- readRDS("dev/PBMC_tidy_clean_scaled_UMAP_cluster_cell_type.rds")

p1 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  pivot_longer(
    c(mito.fraction, S.Score, G2M.Score), 
    names_to="property", 
    values_to="Value"
  ) %>%
  mutate(property =  factor(property, levels = c("mito.fraction", "G2M.Score", "S.Score"))) %>%
  ggplot(aes(sample, Value)) + 
  geom_boxplot(outlier.size = 0.5 ) + 
  facet_wrap(~property, scales = "free_y" ) +
  custom_theme +
  theme(aspect.ratio=1)

p2 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(20000) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) +
  geom_point(size=0.05, alpha=0.2) +
  custom_theme +
  theme(aspect.ratio=1)

PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(20000) %>%
  plot_ly(
    x = ~`UMAP_1`,
    y = ~`UMAP_2`,
    z = ~`UMAP_3`,
    color = ~seurat_clusters,
    colors = friendly_cols[1:24],sizes = 50, size = 1
  )

markers = readRDS("dev/PBMC_marker_df.rds")

p3 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  arrange(first.labels) %>%
  mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>%
  join_features(features=c("CD3D", "HLA-DRB1")) %>%
  ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) +
  geom_density_ridges(bandwidth = 0.2) +
  facet_wrap(~ .feature, nrow = 2) +
  coord_flip() +
  custom_theme

# Plot heatmap
p4 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(2000) %>%
  DoHeatmap(
    features = markers$gene,
    group.colors = friendly_cols
  )

p5 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(1000) %>%
  join_features(features=markers$gene) %>%
  mutate(seurat_clusters = as.integer(seurat_clusters)) %>%
  filter(seurat_clusters<10) %>%
  group_by(seurat_clusters) %>%
  
  # Plot heatmap
  heatmap(
    .row = .feature,
    .column = .cell, 
    .value = .abundance_SCT, 
    palette_grouping = list(rep("black",9)), 
    palette_value = circlize::colorRamp2(c(-1.5, 0, 1.5), c("purple", "black", "yellow")),
    
    # ComplexHeatmap parameters
    row_gap = unit(0.1, "mm"), column_gap = unit(0.1, "mm")
  ) %>%
    
  # Add annotation
  add_tile(sample, palette = friendly_cols[1:7]) %>%
  add_point(PC_1) 
  
p6 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  tidyseurat::unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>%
  pivot_longer(
    c(seurat_clusters, first.labels_single),
    names_to = "classification", values_to = "value"
  ) %>%
  
  ggplot(aes(x = classification, stratum = value, alluvium = cell,
           fill = first.labels, label = value)) +
  scale_x_discrete(expand = c(1, 1)) +
  geom_flow() +
  geom_stratum(alpha = .5) +
  # geom_text(stat = "stratum", size = 3) +
  geom_text_repel(stat = "stratum", size = 3,
             nudge_x      = 0.05,
             direction    = "y",
             angle        = 0,
             vjust        = 0,
             segment.size = 0.2
         ) +
  scale_fill_manual(values = friendly_cols) +
  #guides(fill = FALSE) +
  coord_flip() +
  theme_bw() +
  theme(
    panel.border = element_blank(),
    axis.line = element_line(),
    panel.grid.major = element_line(size = 0.2),
    panel.grid.minor = element_line(size = 0.1),
    text = element_text(size = 9),
    legend.position = "bottom",
    strip.background = element_blank(),
    axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
    axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
    axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1)
  )

ggsave("dev/summary_statistics.pdf", p1,  device = "pdf", width = 183/3, height = 50, units = "mm", useDingbats=FALSE)
ggsave("dev/UMAP_2D.pdf", p2,  device = "pdf", width = 89, height = 100, units = "mm", useDingbats=FALSE)
ggsave("dev/violin.pdf", p3,  device = "pdf", width = 89, height = 100, units = "mm", useDingbats=FALSE)
save_pdf(p5, filename = "dev/UMAPheatmap.pdf", width = 183+50, height = 150, units = "mm")
ggsave("dev/alluvial.pdf", p6,  device = "pdf", width = 89, height = 100, units = "mm", useDingbats=FALSE)


================================================
FILE: inst/CITATION
================================================
citHeader("To cite tidyseurat in publications use:")

 bibentry(
   bibtype = "Article",
   title = "Interfacing Seurat with the R tidy universe",
   author = as.person(" Stefano Mangiola [aut], Maria A Doyle [aut], Anthony T Papenfuss Anthony [aut]"),
   journal  = "Bioinformatics",
   year = "2021",
   volume   = "btab404",
   publisher = "Oxford Press",
   url = "https://doi.org/10.1093/bioinformatics/btab404"
 )


================================================
FILE: inst/NEWS.rd
================================================
\name{NEWS}
\title{News for Package \pkg{tidyseurat}}

\section{Changes in version 0.8.9}{
\itemize{
    \item CRAN fix: \code{add_count()} now uses \code{count(..., .add = TRUE)} instead of \code{dplyr::add_count()}, avoiding the defunct \code{.drop} argument (dplyr 1.0.0+).
}}

\section{Changes in version 0.8.8}{
\itemize{
    \item Removed deprecated \code{.drop} argument from \code{add_count.Seurat()} to align with dplyr's API changes
    \item Added generic methods for \code{add_count()} including a default method
}}

\section{Changes in version 0.5.1, Development}{
\itemize{
    \item Change default shape parameter in join_features() and join_transcripts() from "long" to "wide", resulting in a return type of Seurat by default
    \item Update documentation and tests accordingly
}}

\section{Changes in version 0.5.0, CRAN Release}{
\itemize{
    \item Rely of ttservice package for shared function with tidySingleCellExperiment to avoid clash
    \item Use .cell for cell column name to avoid errors when cell column is defined by the user
}}


================================================
FILE: man/add_class.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{add_class}
\alias{add_class}
\title{Add class to abject}
\usage{
add_class(var, name)
}
\arguments{
\item{var}{A tibble}

\item{name}{A character name of the attribute}
}
\value{
A tibble with an additional attribute
}
\description{
Add class to abject
}
\keyword{internal}


================================================
FILE: man/aggregate_cells.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{aggregate_cells}
\alias{aggregate_cells}
\alias{aggregate_cells,Seurat-method}
\title{Aggregate cells}
\usage{
\S4method{aggregate_cells}{Seurat}(
  .data,
  .sample = NULL,
  slot = "data",
  assays = NULL,
  aggregation_function = Matrix::rowSums,
  ...
)
}
\arguments{
\item{.data}{A tidyseurat object}

\item{.sample}{A vector of variables by which cells are aggregated}

\item{slot}{The slot to which the function is applied}

\item{assays}{The assay to which the function is applied}

\item{aggregation_function}{The method of cell-feature value aggregation}

\item{...}{Used for future extendibility}
}
\value{
A tibble object
}
\description{
Combine cells into groups based on shared variables and aggregate feature counts.
}
\examples{
data(pbmc_small)
pbmc_small_pseudo_bulk <- pbmc_small |>
  aggregate_cells(c(groups, letter.idents), assays="RNA")

}


================================================
FILE: man/arrange.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{arrange}
\alias{arrange}
\alias{arrange.Seurat}
\title{Order rows using column values}
\usage{
\method{arrange}{Seurat}(.data, ..., .by_group = FALSE)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variables, or
functions of variables. Use \code{\link[dplyr:desc]{desc()}} to sort a variable in descending
order.}

\item{.by_group}{If \code{TRUE}, will sort first by grouping variable. Applies to
grouped data frames only.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item All rows appear in the output, but (usually) in a different place.
\item Columns are not modified.
\item Groups are not modified.
\item Data frame attributes are preserved.
}
}
\description{
\code{arrange()} orders the rows of a data frame by the values of selected
columns.

Unlike other dplyr verbs, \code{arrange()} largely ignores grouping; you
need to explicitly mention grouping variables (or use  \code{.by_group = TRUE})
in order to group by them, and functions of variables are evaluated
once per data frame, not once per group.
}
\details{
\subsection{Missing values}{

Unlike base sorting with \code{sort()}, \code{NA} are:
\itemize{
\item always sorted to the end for local data, even when wrapped with \code{desc()}.
\item treated differently for remote data, depending on the backend.
}
}
}
\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("arrange")}.

}

\examples{
data(pbmc_small)
pbmc_small |>
    arrange(nFeature_RNA)

}
\seealso{
Other single table verbs: 
\code{\link{mutate}()},
\code{\link{rename}()},
\code{\link{slice}()},
\code{\link{summarise}()}
}
\concept{single table verbs}


================================================
FILE: man/as_tibble.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tibble_methods.R
\name{as_tibble}
\alias{as_tibble}
\alias{as_tibble.Seurat}
\title{Coerce lists, matrices, and more to data frames}
\usage{
\method{as_tibble}{Seurat}(
  x,
  ...,
  .name_repair = c("check_unique", "unique", "universal", "minimal"),
  rownames = NULL
)
}
\arguments{
\item{x}{A data frame, list, matrix, or other object that could reasonably be
coerced to a tibble.}

\item{...}{Unused, for extensibility.}

\item{.name_repair}{Treatment of problematic column names:
\itemize{
\item \code{"minimal"}: No name repair or checks, beyond basic existence,
\item \code{"unique"}: Make sure names are unique and not empty,
\item \code{"check_unique"}: (default value), no name repair, but check they are
\code{unique},
\item \code{"universal"}: Make the names \code{unique} and syntactic
\item \code{"unique_quiet"}: Same as \code{"unique"}, but "quiet"
\item \code{"universal_quiet"}: Same as \code{"universal"}, but "quiet"
\item a function: apply custom name repair (e.g., \code{.name_repair = make.names}
for names in the style of base R).
\item A purrr-style anonymous function, see \code{\link[rlang:as_function]{rlang::as_function()}}
}

This argument is passed on as \code{repair} to \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
See there for more details on these terms and the strategies used
to enforce them.}

\item{rownames}{How to treat existing row names of a data frame or matrix:
\itemize{
\item \code{NULL}: remove row names. This is the default.
\item \code{NA}: keep row names.
\item A string: the name of a new column. Existing rownames are transferred
into this column and the \code{row.names} attribute is deleted.
No name repair is applied to the new column name, even if \code{x} already contains
a column of that name.
Use \code{as_tibble(rownames_to_column(...))} to safeguard against this case.
}

Read more in \link[tibble]{rownames}.}
}
\value{
`tibble`
}
\description{
\code{as_tibble()} turns an existing object, such as a data frame or
matrix, into a so-called tibble, a data frame with class \code{\link[tibble]{tbl_df}}. This is
in contrast with \code{\link[tibble:tibble]{tibble()}}, which builds a tibble from individual columns.
\code{as_tibble()} is to \code{\link[tibble:tibble]{tibble()}} as \code{\link[base:as.data.frame]{base::as.data.frame()}} is to
\code{\link[base:data.frame]{base::data.frame()}}.

\code{as_tibble()} is an S3 generic, with methods for:
\itemize{
\item \code{\link[base:data.frame]{data.frame}}: Thin wrapper around the \code{list} method
that implements tibble's treatment of \link[tibble]{rownames}.
\item \code{\link[base:matrix]{matrix}}, \code{\link[stats:poly]{poly}},
\code{\link[stats:ts]{ts}}, \code{\link[base:table]{table}}
\item Default: Other inputs are first coerced with \code{\link[base:as.data.frame]{base::as.data.frame()}}.
}

\code{as_tibble_row()} converts a vector to a tibble with one row.
If the input is a list, all elements must have size one.

\code{as_tibble_col()} converts a vector to a tibble with one column.
}
\section{Row names}{


The default behavior is to silently remove row names.

New code should explicitly convert row names to a new column using the
\code{rownames} argument.

For existing code that relies on the retention of row names, call
\code{pkgconfig::set_config("tibble::rownames" = NA)} in your script or in your
package's \code{\link[=.onLoad]{.onLoad()}}  function.

}

\section{Life cycle}{


Using \code{as_tibble()} for vectors is superseded as of version 3.0.0,
prefer the more expressive \code{as_tibble_row()} and
\code{as_tibble_col()} variants for new code.

}

\examples{
data(pbmc_small)
pbmc_small |> as_tibble()

}
\seealso{
\code{\link[tibble:tibble]{tibble()}} constructs a tibble from individual columns. \code{\link[tibble:enframe]{enframe()}}
converts a named vector to a tibble with a column of names and column of
values. Name repair is implemented using \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}.
}


================================================
FILE: man/bind_rows.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{bind_rows}
\alias{bind_rows}
\alias{bind_rows.Seurat}
\alias{bind_cols.Seurat}
\alias{bind_cols}
\title{Efficiently bind multiple data frames by row and column}
\usage{
\method{bind_rows}{Seurat}(..., .id = NULL, add.cell.ids = NULL)

\method{bind_cols}{Seurat}(..., .id = NULL)
}
\arguments{
\item{...}{Data frames to combine.

  Each argument can either be a data frame, a list that could be a data
  frame, or a list of data frames.

  When row-binding, columns are matched by name, and any missing
  columns will be filled with NA.

  When column-binding, rows are matched by position, so all data
  frames must have the same number of rows. To match by value, not
  position, see mutate-joins.}

\item{.id}{Data frame identifier.

  When `.id` is supplied, a new column of identifiers is
  created to link each row to its original data frame. The labels
  are taken from the named arguments to `bind_rows()`. When a
  list of data frames is supplied, the labels are taken from the
  names of the list. If no names are found a numeric sequence is
  used instead.}

\item{add.cell.ids}{from Seurat 3.0 A character vector of length(x = c(x, y)). Appends the corresponding values to the start of each objects' cell names.}
}
\value{
`bind_rows()` and `bind_cols()` return the same type as
  the first input, either a data frame, `tbl_df`, or `grouped_df`.

`bind_rows()` and `bind_cols()` return the same type as
  the first input, either a data frame, `tbl_df`, or `grouped_df`.
}
\description{
This is an efficient implementation of the common pattern of
`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many
data frames into one.

This is an efficient implementation of the common pattern of
`do.call(rbind, dfs)` or `do.call(cbind, dfs)` for binding many
data frames into one.
}
\details{
The output of `bind_rows()` will contain a column if that column
appears in any of the inputs.

The output of `bind_rows()` will contain a column if that column
appears in any of the inputs.
}
\examples{
data(pbmc_small)
tt <- pbmc_small
ttservice::bind_rows(tt, tt)

tt_bind <- tt |> select(nCount_RNA ,nFeature_RNA)
tt |> ttservice::bind_cols(tt_bind)

}


================================================
FILE: man/cell_type_df.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{cell_type_df}
\alias{cell_type_df}
\title{Cell types of 80 PBMC single cells}
\format{
A tibble containing 80 rows and 2 columns.
  Cells are a subsample of the Peripheral Blood Mononuclear Cells (PBMC) 
  dataset of 2,700 single cell. Cell types were identified with SingleR.
\describe{
  \item{cell}{cell identifier, barcode}
  \item{first.labels}{cell type}
}
}
\source{
\url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html}
}
\usage{
data(cell_type_df)
}
\value{
`tibble`
}
\description{
A dataset containing the barcodes and cell types of 80 PBMC single cells.
}
\keyword{datasets}


================================================
FILE: man/count.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{count}
\alias{count}
\alias{count.Seurat}
\alias{add_count}
\alias{add_count.default}
\alias{add_count.Seurat}
\title{Count observations by group}
\usage{
\method{count}{Seurat}(
  x,
  ...,
  wt = NULL,
  sort = FALSE,
  name = NULL,
  .drop = group_by_drop_default(x)
)

add_count(x, ..., wt = NULL, sort = FALSE, name = NULL)

\method{add_count}{default}(x, ..., wt = NULL, sort = FALSE, name = NULL)

\method{add_count}{Seurat}(x, ..., wt = NULL, sort = FALSE, name = NULL)
}
\arguments{
\item{x}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr).}

\item{...}{<[`data-masking`][dplyr_data_masking]> Variables to group by.}

\item{wt}{<[`data-masking`][dplyr_data_masking]> Frequency weights.
  Can be `NULL` or a variable:

  * If `NULL` (the default), counts the number of rows in each group.
  * If a variable, computes `sum(wt)` for each group.}

\item{sort}{If `TRUE`, will show the largest groups at the top.}

\item{name}{The name of the new column in the output.

  If omitted, it will default to `n`. If there's already a column called `n`,
  it will error, and require you to specify the name.}

\item{.drop}{For `count()`: if `FALSE` will include counts for empty groups
(i.e. for levels of factors that don't exist in the data).}
}
\value{
An object of the same type as `.data`. `count()` and `add_count()`
group transiently, so the output has the same groups as the input.
}
\description{
`count()` lets you quickly count the unique values of one or more variables:
`df %>% count(a, b)` is roughly equivalent to
`df %>% group_by(a, b) %>% summarise(n = n())`.
`count()` is paired with `tally()`, a lower-level helper that is equivalent
to `df %>% summarise(n = n())`. Supply `wt` to perform weighted counts,
switching the summary from `n = n()` to `n = sum(wt)`.

`add_count()` and `add_tally()` are equivalents to `count()` and `tally()`
but use `mutate()` instead of `summarise()` so that they add a new column
with group-wise counts.
}
\examples{
data(pbmc_small)
pbmc_small |> count(groups)
    
}


================================================
FILE: man/distinct.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{distinct}
\alias{distinct}
\alias{distinct.Seurat}
\title{Keep distinct/unique rows}
\usage{
\method{distinct}{Seurat}(.data, ..., .keep_all = FALSE)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Optional variables to
use when determining uniqueness. If there are multiple rows for a given
combination of inputs, only the first row will be preserved. If omitted,
will use all variables in the data frame.}

\item{.keep_all}{If \code{TRUE}, keep all variables in \code{.data}.
If a combination of \code{...} is not distinct, this keeps the
first row of values.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Rows are a subset of the input but appear in the same order.
\item Columns are not modified if \code{...} is empty or \code{.keep_all} is \code{TRUE}.
Otherwise, \code{distinct()} first calls \code{mutate()} to create new columns.
\item Groups are not modified.
\item Data frame attributes are preserved.
}
}
\description{
Keep only unique/distinct rows from a data frame. This is similar
to \code{\link[=unique.data.frame]{unique.data.frame()}} but considerably faster.
}
\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("distinct")}.

}

\examples{
data("pbmc_small")
pbmc_small |> distinct(groups)

}


================================================
FILE: man/drop_class.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{drop_class}
\alias{drop_class}
\title{Remove class to abject}
\usage{
drop_class(var, name)
}
\arguments{
\item{var}{A tibble}

\item{name}{A character name of the class}
}
\value{
A tibble with an additional attribute
}
\description{
Remove class to abject
}
\keyword{internal}


================================================
FILE: man/extract.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidyr_methods.R
\name{extract}
\alias{extract}
\alias{extract.Seurat}
\title{Extract a character column into multiple columns using regular
expression groups}
\usage{
\method{extract}{Seurat}(
  data,
  col,
  into,
  regex = "([[:alnum:]]+)",
  remove = TRUE,
  convert = FALSE,
  ...
)
}
\arguments{
\item{data}{A data frame.}

\item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.}

\item{into}{Names of new variables to create as character vector.
Use \code{NA} to omit the variable in the output.}

\item{regex}{A string representing a regular expression used to extract the
desired values. There should be one group (defined by \verb{()}) for each
element of \code{into}.}

\item{remove}{If \code{TRUE}, remove input column from output data frame.}

\item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with
\code{as.is = TRUE} on new columns. This is useful if the component
columns are integer, numeric or logical.

NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.}

\item{...}{Additional arguments passed on to methods.}
}
\value{
`tidyseurat`
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}

\code{extract()} has been superseded in favour of \code{\link[tidyr:separate_wider_regex]{separate_wider_regex()}}
because it has a more polished API and better handling of problems.
Superseded functions will not go away, but will only receive critical bug
fixes.

Given a regular expression with capturing groups, \code{extract()} turns
each group into a new column. If the groups don't match, or the input
is NA, the output will be NA.
}
\examples{
data(pbmc_small)
pbmc_small |>
  extract(groups, 
    into="g", 
    regex="g([0-9])", 
    convert=TRUE)

}
\seealso{
\code{\link[tidyr:separate]{separate()}} to split up by a separator.
}


================================================
FILE: man/filter.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{filter}
\alias{filter}
\alias{filter.Seurat}
\title{Keep or drop rows that match a condition}
\usage{
\method{filter}{Seurat}(.data, ..., .preserve = FALSE)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Expressions that
return a logical vector, defined in terms of the variables in \code{.data}. If
multiple expressions are included, they are combined with the \code{&} operator.
To combine expressions using \code{|} instead, wrap them in \code{\link[dplyr:when_any]{when_any()}}. Only
rows for which all expressions evaluate to \code{TRUE} are kept (for \code{filter()})
or dropped (for \code{filter_out()}).}

\item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the
resulting data, otherwise the grouping is kept as is.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Rows are a subset of the input, but appear in the same order.
\item Columns are not modified.
\item The number of groups may be reduced (if \code{.preserve} is not \code{TRUE}).
\item Data frame attributes are preserved.
}
}
\description{
These functions are used to subset a data frame, applying the expressions in
\code{...} to determine which rows should be kept (for \code{filter()}) or dropped (
for \code{filter_out()}).

Multiple conditions can be supplied separated by a comma. These will be
combined with the \code{&} operator. To combine comma separated conditions using
\code{|} instead, wrap them in \code{\link[dplyr:when_any]{when_any()}}.

Both \code{filter()} and \code{filter_out()} treat \code{NA} like \code{FALSE}. This subtle
behavior can impact how you write your conditions when missing values are
involved. See the section on \verb{Missing values} for important details and
examples.
}
\section{Missing values}{


Both \code{filter()} and \code{filter_out()} treat \code{NA} like \code{FALSE}. This results in
the following behavior:
\itemize{
\item \code{filter()} \emph{drops} both \code{NA} and \code{FALSE}.
\item \code{filter_out()} \emph{keeps} both \code{NA} and \code{FALSE}.
}

This means that \verb{filter(data, <conditions>) + filter_out(data, <conditions>)}
captures every row within \code{data} exactly once.

The \code{NA} handling of these functions has been designed to match your
\emph{intent}. When your intent is to keep rows, use \code{filter()}. When your intent
is to drop rows, use \code{filter_out()}.

For example, if your goal with this \code{cars} data is to "drop rows where the
\code{class} is suv", then you might write this in one of two ways:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{cars <- tibble(class = c("suv", NA, "coupe"))
cars
#> # A tibble: 3 x 1
#>   class
#>   <chr>
#> 1 suv  
#> 2 <NA> 
#> 3 coupe
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode r">}}\preformatted{cars |> filter(class != "suv")
#> # A tibble: 1 x 1
#>   class
#>   <chr>
#> 1 coupe
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode r">}}\preformatted{cars |> filter_out(class == "suv")
#> # A tibble: 2 x 1
#>   class
#>   <chr>
#> 1 <NA> 
#> 2 coupe
}\if{html}{\out{</div>}}

Note how \code{filter()} drops the \code{NA} rows even though our goal was only to drop
\code{"suv"} rows, but \code{filter_out()} matches our intuition.

To generate the correct result with \code{filter()}, you'd need to use:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{cars |> filter(class != "suv" | is.na(class))
#> # A tibble: 2 x 1
#>   class
#>   <chr>
#> 1 <NA> 
#> 2 coupe
}\if{html}{\out{</div>}}

This quickly gets unwieldy when multiple conditions are involved.

In general, if you find yourself:
\itemize{
\item Using "negative" operators like \code{!=} or \code{!}
\item Adding in \code{NA} handling like \verb{| is.na(col)} or \verb{& !is.na(col)}
}

then you should consider if swapping to the other filtering variant would
make your conditions simpler.
\subsection{Comparison to base subsetting}{

Base subsetting with \code{[} doesn't treat \code{NA} like \code{TRUE} or \code{FALSE}. Instead,
it generates a fully missing row, which is different from how both \code{filter()}
and \code{filter_out()} work.

\if{html}{\out{<div class="sourceCode r">}}\preformatted{cars <- tibble(class = c("suv", NA, "coupe"), mpg = c(10, 12, 14))
cars
#> # A tibble: 3 x 2
#>   class   mpg
#>   <chr> <dbl>
#> 1 suv      10
#> 2 <NA>     12
#> 3 coupe    14
}\if{html}{\out{</div>}}

\if{html}{\out{<div class="sourceCode r">}}\preformatted{cars[cars$class == "suv",]
#> # A tibble: 2 x 2
#>   class   mpg
#>   <chr> <dbl>
#> 1 suv      10
#> 2 <NA>     NA

cars |> filter(class == "suv")
#> # A tibble: 1 x 2
#>   class   mpg
#>   <chr> <dbl>
#> 1 suv      10
}\if{html}{\out{</div>}}
}

}

\section{Useful filter functions}{


There are many functions and operators that are useful when constructing the
expressions used to filter the data:
\itemize{
\item \code{\link{==}}, \code{\link{>}}, \code{\link{>=}} etc
\item \code{\link{&}}, \code{\link{|}}, \code{\link{!}}, \code{\link[=xor]{xor()}}
\item \code{\link[=is.na]{is.na()}}
\item \code{\link[dplyr:between]{between()}}, \code{\link[dplyr:near]{near()}}
\item \code{\link[dplyr:when_any]{when_any()}}, \code{\link[dplyr:when_all]{when_all()}}
}

}

\section{Grouped tibbles}{


Because filtering expressions are computed within groups, they may yield
different results on grouped tibbles. This will be the case as soon as an
aggregating, lagging, or ranking function is involved. Compare this ungrouped
filtering:

\if{html}{\out{<div class="sourceCode">}}\preformatted{starwars |> filter(mass > mean(mass, na.rm = TRUE))
}\if{html}{\out{</div>}}

With the grouped equivalent:

\if{html}{\out{<div class="sourceCode">}}\preformatted{starwars |> filter(mass > mean(mass, na.rm = TRUE), .by = gender)
}\if{html}{\out{</div>}}

In the ungrouped version, \code{filter()} compares the value of \code{mass} in each row
to the global average (taken over the whole data set), keeping only the rows
with \code{mass} greater than this global average. In contrast, the grouped
version calculates the average mass separately for each \code{gender} group, and
keeps rows with \code{mass} greater than the relevant within-gender average.

}

\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("filter")}.

}

\examples{
data("pbmc_small")
pbmc_small |>  filter(groups == "g1")

# Learn more in ?dplyr_eval

}
\seealso{
Other single table verbs: 
\code{\link[dplyr]{arrange}()},
\code{\link[dplyr]{mutate}()},
\code{\link[dplyr]{reframe}()},
\code{\link[dplyr]{rename}()},
\code{\link[dplyr]{select}()},
\code{\link[dplyr]{slice}()},
\code{\link[dplyr]{summarise}()}
}


================================================
FILE: man/formatting.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print_method.R
\name{formatting}
\alias{formatting}
\alias{print.Seurat}
\alias{print}
\title{Printing tibbles}
\usage{
\method{print}{Seurat}(x, ..., n = NULL, width = NULL, n_extra = NULL)
}
\arguments{
\item{x}{Object to format or print.}

\item{...}{Passed on to \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.}

\item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows
if less than the \code{print_max} \link[pillar:pillar_options]{option}.
Otherwise, will print as many rows as specified by the
\code{print_min} \link[pillar:pillar_options]{option}.}

\item{width}{Width of text output to generate. This defaults to \code{NULL}, which
means use the \code{width} \link[pillar:pillar_options]{option}.}

\item{n_extra}{Number of extra columns to print abbreviated information for,
if the width is too small for the entire tibble. If `NULL`, the default,
will print information about at most `tibble.max_extra_cols` extra columns.}
}
\value{
Prints a message to the console describing
  the contents of the `tidyseurat`.
}
\description{
One of the main features of the \code{tbl_df} class is the printing:
\itemize{
\item Tibbles only print as many rows and columns as fit on one screen,
supplemented by a summary of the remaining rows and columns.
\item Tibble reveals the type of each column, which keeps the user informed about
whether a variable is, e.g., \verb{<chr>} or \verb{<fct>} (character versus factor).
See \code{vignette("types")} for an overview of common
type abbreviations.
}

Printing can be tweaked for a one-off call by calling \code{print()} explicitly
and setting arguments like \code{n} and \code{width}. More persistent control is
available by setting the options described in \link[pillar:pillar_options]{pillar::pillar_options}.
See also \code{vignette("digits")} for a comparison to base options,
and \code{vignette("numbers")} that showcases \code{\link[tibble:num]{num()}} and \code{\link[tibble:char]{char()}}
for creating columns with custom formatting options.

As of tibble 3.1.0, printing is handled entirely by the \pkg{pillar} package.
If you implement a package that extends tibble,
the printed output can be customized in various ways.
See \code{vignette("extending", package = "pillar")} for details,
and \link[pillar:pillar_options]{pillar::pillar_options} for options that control the display in the console.
}
\examples{
data(pbmc_small)
print(pbmc_small)

}


================================================
FILE: man/fragments/intro.Rmd
================================================

**Brings Seurat to the tidyverse!**

website: [stemangiola.github.io/tidyseurat/](https://stemangiola.github.io/tidyseurat/)

Please also have a look at

- [tidyseurat](https://stemangiola.github.io/tidyseurat/) for tidy single-cell RNA sequencing analysis
- [tidySummarizedExperiment](https://tidyomics.github.io/tidySummarizedExperiment/) for tidy bulk RNA sequencing analysis
- [tidybulk](https://tidyomics.github.io/tidybulk/) for tidy bulk RNA-seq analysis
- [tidygate](https://github.com/stemangiola/tidygate/) for adding custom gate information to your tibble
- [tidyHeatmap](https://stemangiola.github.io/tidyHeatmap/) for heatmaps produced with tidy principles


```{r, echo=FALSE, include=FALSE, }
library(knitr)
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
```

![visual cue](`r visual_cue`)

# Introduction

tidyseurat provides a bridge between the Seurat single-cell package [@butler2018integrating; @stuart2019comprehensive] and the tidyverse [@wickham2019welcome]. It creates an invisible layer that enables viewing the
Seurat object as a tidyverse tibble, and provides Seurat-compatible *dplyr*, *tidyr*, *ggplot* and *plotly* functions.


## Functions/utilities available

Seurat-compatible Functions | Description
------------ | -------------
`all` | 

tidyverse Packages | Description
------------ | -------------
`dplyr` | All `dplyr` APIs like for any tibble
`tidyr` | All `tidyr` APIs like for any tibble
`ggplot2` | `ggplot` like for any tibble
`plotly` | `plot_ly` like for any tibble

Utilities | Description
------------ | -------------
`tidy` | Add `tidyseurat` invisible layer over a Seurat object
`as_tibble` | Convert cell-wise information to a `tbl_df`
`join_features` | Add feature-wise information, returns a `tbl_df`
`aggregate_cells`| Aggregate cell gene-transcription abundance as pseudobulk tissue                  |

## Installation

From CRAN
```{r eval=FALSE}
install.packages("tidyseurat")
```

From Github (development)
```{r, eval=FALSE}
devtools::install_github("stemangiola/tidyseurat")
```

```{r}
library(dplyr)
library(tidyr)
library(purrr)
library(magrittr)
library(ggplot2)
library(Seurat)
library(tidyseurat)
```


## Create `tidyseurat`, the best of both worlds!

This is a seurat object but it is evaluated as tibble. So it is fully compatible both with Seurat and tidyverse APIs.

```{r}
pbmc_small = SeuratObject::pbmc_small
```

**It looks like a tibble**

```{r}
pbmc_small
```

**But it is a Seurat object after all**

```{r}
pbmc_small@assays
```

# Preliminary plots

Set colours and theme for plots.

```{r}
# Use colourblind-friendly colours
friendly_cols <- c("#88CCEE", "#CC6677", "#DDCC77", "#117733", "#332288", "#AA4499", "#44AA99", "#999933", "#882255", "#661100", "#6699CC")

# Set theme
my_theme <-
  list(
    scale_fill_manual(values = friendly_cols),
    scale_color_manual(values = friendly_cols),
    theme_bw() +
      theme(
        panel.border = element_blank(),
        axis.line = element_line(),
        panel.grid.major = element_line(size = 0.2),
        panel.grid.minor = element_line(size = 0.1),
        text = element_text(size = 12),
        legend.position = "bottom",
        aspect.ratio = 1,
        strip.background = element_blank(),
        axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10))
      )
  )
```

We can treat `pbmc_small` effectively as a normal tibble for plotting.

Here we plot number of features per cell.

```{r plot1}
pbmc_small %>%
  ggplot(aes(nFeature_RNA, fill = groups)) +
  geom_histogram() +
  my_theme
```

Here we plot total features per cell.

```{r plot2}
pbmc_small %>%
  ggplot(aes(groups, nCount_RNA, fill = groups)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.1) +
  my_theme
```

Here we plot abundance of two features for each group.

```{r}
pbmc_small %>%
  join_features(features = c("HLA-DRA", "LYZ"), shape = "long") %>%
  ggplot(aes(groups, .abundance_RNA + 1, fill = groups)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(aes(size = nCount_RNA), alpha = 0.5, width = 0.2) +
  scale_y_log10() +
  my_theme
```

# Preprocess the dataset

Also you can treat the object as Seurat object and proceed with data processing.

```{r preprocess}
pbmc_small_pca <-
  pbmc_small %>%
  SCTransform(verbose = FALSE) %>%
  FindVariableFeatures(verbose = FALSE) %>%
  RunPCA(verbose = FALSE)

pbmc_small_pca
```


If a tool is not included in the tidyseurat collection, we can use `as_tibble` to permanently convert `tidyseurat` into tibble.

```{r pc_plot}
pbmc_small_pca %>%
  as_tibble() %>%
  select(contains("PC"), everything()) %>%
  GGally::ggpairs(columns = 1:5, ggplot2::aes(colour = groups)) +
  my_theme
```

# Identify clusters

We proceed with cluster identification with Seurat.

```{r cluster}
pbmc_small_cluster <-
  pbmc_small_pca %>%
  FindNeighbors(verbose = FALSE) %>%
  FindClusters(method = "igraph", verbose = FALSE)

pbmc_small_cluster
```

Now we can interrogate the object as if it was a regular tibble data frame.

```{r cluster count}
pbmc_small_cluster %>%
  count(groups, seurat_clusters)
```

We can identify cluster markers using Seurat.

<!-- If this is Seurat v4, comment out the v3 markers -->

`r if (packageVersion("Seurat") >= package_version("4.0.0")) {"<!--"}`

```{r markers_v3, eval=(packageVersion("Seurat") < package_version("4.0.0"))}
# Identify top 10 markers per cluster
markers <-
  pbmc_small_cluster %>%
  mutate(orig.ident = seurat_clusters) %>% 
  FindAllMarkers(only.pos = TRUE) %>%
  group_by(cluster) %>%
  top_n(10, avg_logFC)

# Plot heatmap
pbmc_small_cluster %>%
  DoHeatmap(
    features = markers$gene,
    group.colors = friendly_cols
  )
```

`r if (packageVersion("Seurat") >= package_version("4.0.0")) {"-->"}`

<!-- If this is Seurat v3, comment out the v4 markers -->

`r if (packageVersion("Seurat") < package_version("4.0.0")) {"<!--"}`

```{r markers_v4, eval=FALSE}
# Identify top 10 markers per cluster
markers <-
  pbmc_small_cluster %>%
  FindAllMarkers(only.pos = TRUE, min.pct = 0.25, thresh.use = 0.25) %>%
  group_by(cluster) %>%
  top_n(10, avg_log2FC)

# Plot heatmap
pbmc_small_cluster %>%
  DoHeatmap(
    features = markers$gene,
    group.colors = friendly_cols
  )
```

`r if (packageVersion("Seurat") < package_version("4.0.0")) {"-->"}`

# Reduce dimensions

We can calculate the first 3 UMAP dimensions using the Seurat framework.

```{r umap, eval=FALSE}
pbmc_small_UMAP <-
  pbmc_small_cluster %>%
  RunUMAP(reduction = "pca", dims = 1:15, n.components = 3L)
```

And we can plot them using 3D plot using plotly.

```{r umap plot, eval=FALSE}
pbmc_small_UMAP %>%
  plot_ly(
    x = ~`UMAP_1`,
    y = ~`UMAP_2`,
    z = ~`UMAP_3`,
    color = ~seurat_clusters,
    colors = friendly_cols[1:4]
  )
```
![screenshot plotly](`r screenshot`)


## Cell type prediction

We can infer cell type identities using *SingleR* [@aran2019reference] and manipulate the output using tidyverse.

```{r eval=FALSE}
# Get cell type reference data
blueprint <- celldex::BlueprintEncodeData()

# Infer cell identities
cell_type_df <-
  GetAssayData(pbmc_small_UMAP, slot = 'counts', assay = "SCT") %>%
  log1p() %>%
  Matrix::Matrix(sparse = TRUE) %>%
  SingleR::SingleR(
    ref = blueprint,
    labels = blueprint$label.main,
    method = "single"
  ) %>%
  as.data.frame() %>%
  as_tibble(rownames = "cell") %>%
  select(cell, first.labels)
```

```{r, eval=FALSE}
# Join UMAP and cell type info
pbmc_small_cell_type <-
  pbmc_small_UMAP %>%
  left_join(cell_type_df, by = "cell")

# Reorder columns
pbmc_small_cell_type %>%
  select(cell, first.labels, everything())
```

We can easily summarise the results. For example, we can see how cell type classification overlaps with cluster classification.

```{r, eval=FALSE}
pbmc_small_cell_type %>%
  count(seurat_clusters, first.labels)
```

We can easily reshape the data for building information-rich faceted plots.

```{r eval=FALSE}
pbmc_small_cell_type %>%

  # Reshape and add classifier column
  pivot_longer(
    cols = c(seurat_clusters, first.labels),
    names_to = "classifier", values_to = "label"
  ) %>%

  # UMAP plots for cell type and cluster
  ggplot(aes(UMAP_1, UMAP_2, color = label)) +
  geom_point() +
  facet_wrap(~classifier) +
  my_theme
```

We can easily plot gene correlation per cell category, adding multi-layer annotations.

```{r eval=FALSE}
pbmc_small_cell_type %>%

  # Add some mitochondrial abundance values
  mutate(mitochondrial = rnorm(n())) %>%

  # Plot correlation
  join_features(features = c("CST3", "LYZ"), shape = "wide") %>%
  ggplot(aes(CST3 + 1, LYZ + 1, color = groups, size = mitochondrial)) +
  geom_point() +
  facet_wrap(~first.labels, scales = "free") +
  scale_x_log10() +
  scale_y_log10() +
  my_theme
```

#  Nested analyses

A powerful tool we can use with tidyseurat is `nest`. We can easily perform independent analyses on subsets of the dataset. First we classify cell types in lymphoid and myeloid; then, nest based on the new classification

```{r eval=FALSE}
pbmc_small_nested <-
  pbmc_small_cell_type %>%
  filter(first.labels != "Erythrocytes") %>%
  mutate(cell_class = if_else(`first.labels` %in% c("Macrophages", "Monocytes"), "myeloid", "lymphoid")) %>%
  nest(data = -cell_class)

pbmc_small_nested
```

Now we can independently for the lymphoid and myeloid subsets (i) find variable features, (ii) reduce dimensions, and (iii) cluster using both tidyverse and Seurat seamlessly.

```{r eval=FALSE}
pbmc_small_nested_reanalysed <-
  pbmc_small_nested %>%
  mutate(data = map(
    data, ~ .x %>%
      FindVariableFeatures(verbose = FALSE) %>%
      RunPCA(npcs = 10, verbose = FALSE) %>%
      FindNeighbors(verbose = FALSE) %>%
      FindClusters(method = "igraph", verbose = FALSE) %>%
      RunUMAP(reduction = "pca", dims = 1:10, n.components = 3L, verbose = FALSE)
  ))

pbmc_small_nested_reanalysed
```

Now we can unnest and plot the new classification.

```{r eval=FALSE}
pbmc_small_nested_reanalysed %>%

  # Convert to tibble otherwise Seurat drops reduced dimensions when unifying data sets.
  mutate(data = map(data, ~ .x %>% as_tibble())) %>%
  unnest(data) %>%

  # Define unique clusters
  unite("cluster", c(cell_class, seurat_clusters), remove = FALSE) %>%

  # Plotting
  ggplot(aes(UMAP_1, UMAP_2, color = cluster)) +
  geom_point() +
  facet_wrap(~cell_class) +
  my_theme
```

#  Aggregating cells 

Sometimes, it is necessary to aggregate the gene-transcript abundance from a group of cells into a single value. For example, when comparing groups of cells across different samples with fixed-effect models.

In tidyseurat, cell aggregation can be achieved using the `aggregate_cells` function.
 
 
```{r, eval=FALSE}
pbmc_small %>%
  aggregate_cells(groups, assays = "RNA")
```

   
================================================
FILE: man/full_join.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{full_join}
\alias{full_join}
\alias{full_join.Seurat}
\title{Mutating joins}
\usage{
\method{full_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
}
\arguments{
\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
vector of variables to join by.

If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.

To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.

To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.

\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
these types of joins.

For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.

To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[dplyr:cross_join]{cross_join()}}.}

\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}.  This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}

\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}

\item{...}{Other parameters passed onto methods.}
}
\value{
An object of the same type as \code{x} (including the same groups). The order of
the rows and columns of \code{x} is preserved as much as possible. The output has
the following properties:
\itemize{
\item The rows are affect by the join type.
\itemize{
\item \code{inner_join()} returns matched \code{x} rows.
\item \code{left_join()} returns all \code{x} rows.
\item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
\item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
}
\item Output columns include all columns from \code{x} and all non-key columns from
\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
the same name, \code{suffix}es are added to disambiguate these as well.
\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
common type between \code{x} and \code{y}.
}
}
\description{
Mutating joins add columns from \code{y} to \code{x}, matching observations based on
the keys. There are four mutating joins: the inner join, and the three outer
joins.
\subsection{Inner join}{

An \code{inner_join()} only keeps observations from \code{x} that have a matching key
in \code{y}.

The most important property of an inner join is that unmatched rows in either
input are not included in the result. This means that generally inner joins
are not appropriate in most analyses, because it is too easy to lose
observations.
}

\subsection{Outer joins}{

The three outer joins keep observations that appear in at least one of the
data frames:
\itemize{
\item A \code{left_join()} keeps all observations in \code{x}.
\item A \code{right_join()} keeps all observations in \code{y}.
\item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
}
}
}
\section{Many-to-many relationships}{


By default, dplyr guards against many-to-many relationships in equality joins
by throwing a warning. These occur when both of the following are true:
\itemize{
\item A row in \code{x} matches multiple rows in \code{y}.
\item A row in \code{y} matches multiple rows in \code{x}.
}

This is typically surprising, as most joins involve a relationship of
one-to-one, one-to-many, or many-to-one, and is often the result of an
improperly specified join. Many-to-many relationships are particularly
problematic because they can result in a Cartesian explosion of the number of
rows returned from the join.

If a many-to-many relationship is expected, silence this warning by
explicitly setting \code{relationship = "many-to-many"}.

In production code, it is best to preemptively set \code{relationship} to whatever
relationship you expect to exist between the keys of \code{x} and \code{y}, as this
forces an error to occur immediately if the data doesn't align with your
expectations.

Inequality joins typically result in many-to-many relationships by nature, so
they don't warn on them by default, but you should still take extra care when
specifying an inequality join, because they also have the capability to
return a large number of rows.

Rolling joins don't warn on many-to-many relationships either, but many
rolling joins follow a many-to-one relationship, so it is often useful to
set \code{relationship = "many-to-one"} to enforce this.

Note that in SQL, most database providers won't let you specify a
many-to-many relationship between two tables, instead requiring that you
create a third \emph{junction table} that results in two one-to-many relationships
instead.

}

\section{Methods}{


These functions are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
}

}

\examples{
data(pbmc_small)
tt <- pbmc_small
tt |> full_join(tibble::tibble(groups="g1", other=1:4))

}
\seealso{
Other joins: 
\code{\link[dplyr]{cross_join}()},
\code{\link[dplyr]{filter-joins}},
\code{\link[dplyr]{nest_join}()}
}


================================================
FILE: man/get_abundance_sc_long.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{get_abundance_sc_long}
\alias{get_abundance_sc_long}
\title{get abundance long}
\usage{
get_abundance_sc_long(
  .data,
  features = NULL,
  all = FALSE,
  exclude_zeros = FALSE,
  assay = Assays(.data),
  slot = "data"
)
}
\arguments{
\item{.data}{A tidyseurat}

\item{features}{A character}

\item{all}{A boolean}

\item{exclude_zeros}{A boolean}

\item{assay}{assay name to extract feature abundance}

\item{slot}{slot in the assay, e.g. `data` and `scale.data`}
}
\value{
A Seurat object
}
\description{
get abundance long
}
\examples{
data(pbmc_small)
pbmc_small \%>\%
  get_abundance_sc_long(features=c("HLA-DRA", "LYZ"))

}
\keyword{internal}


================================================
FILE: man/get_abundance_sc_wide.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{get_abundance_sc_wide}
\alias{get_abundance_sc_wide}
\title{get abundance wide}
\usage{
get_abundance_sc_wide(
  .data,
  features = NULL,
  all = FALSE,
  assay = .data@active.assay,
  slot = "data",
  prefix = ""
)
}
\arguments{
\item{.data}{A tidyseurat}

\item{features}{A character}

\item{all}{A boolean}

\item{assay}{assay name to extract feature abundance}

\item{slot}{slot in the assay, e.g. `data` and `scale.data`}

\item{prefix}{prefix for the feature names}
}
\value{
A Seurat object
}
\description{
get abundance wide
}
\examples{
data(pbmc_small)
pbmc_small \%>\%
  get_abundance_sc_wide(features=c("HLA-DRA", "LYZ"))

}
\keyword{internal}


================================================
FILE: man/ggplot.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/ggplot2_methods.R
\name{ggplot}
\alias{ggplot}
\alias{ggplot.Seurat}
\title{Create a new \code{ggplot} from a \code{tidyseurat}}
\usage{
\method{ggplot}{Seurat}(data = NULL, mapping = aes(), ..., environment = parent.frame())
}
\arguments{
\item{data}{Default dataset to use for plot. If not already a data.frame,
will be converted to one by \code{\link[ggplot2:fortify]{fortify()}}. If not specified,
must be supplied in each layer added to the plot.}

\item{mapping}{Default list of aesthetic mappings to use for plot.
If not specified, must be supplied in each layer added to the plot.}

\item{...}{Other arguments passed on to methods. Not currently used.}

\item{environment}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Used prior to tidy
evaluation.}
}
\value{
`ggplot`
}
\description{
\code{ggplot()} initializes a ggplot object. It can be used to
declare the input data frame for a graphic and to specify the
set of aesthetic mappings for the plot, intended to be common throughout all
subsequent layers unless specifically overridden.
}
\details{
\code{ggplot()} is used to construct the initial plot object,
and is almost always followed by a plus sign (\code{+}) to add
components to the plot.

There are three common patterns used to invoke \code{ggplot()}:
\itemize{
\item \verb{ggplot(data = df, mapping = aes(x, y, other aesthetics))}
\item \code{ggplot(data = df)}
\item \code{ggplot()}
}

The first pattern is recommended if all layers use the same
data and the same set of aesthetics, although this method
can also be used when adding a layer using data from another
data frame.

The second pattern specifies the default data frame to use
for the plot, but no aesthetics are defined up front. This
is useful when one data frame is used predominantly for the
plot, but the aesthetics vary from one layer to another.

The third pattern initializes a skeleton \code{ggplot} object, which
is fleshed out as layers are added. This is useful when
multiple data frames are used to produce different layers, as
is often the case in complex graphics.

The \verb{data =} and \verb{mapping =} specifications in the arguments are optional
(and are often omitted in practice), so long as the data and the mapping
values are passed into the function in the right order. In the examples
below, however, they are left in place for clarity.
}
\examples{
library(ggplot2)
data(pbmc_small)
pbmc_small |> 
  ggplot(aes(groups, nCount_RNA)) +
  geom_boxplot()

}
\seealso{
The \href{https://ggplot2-book.org/getting-started}{first steps chapter} of the online ggplot2 book.
}


================================================
FILE: man/glimpse.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tibble_methods.R
\name{glimpse}
\alias{glimpse}
\alias{glimpse.tidyseurat}
\title{Get a glimpse of your data}
\usage{
\method{glimpse}{tidyseurat}(x, width = NULL, ...)
}
\arguments{
\item{x}{An object to glimpse at.}

\item{width}{Width of output: defaults to the setting of the
\code{width} \link[pillar:pillar_options]{option} (if finite)
or the width of the console.}

\item{...}{Unused, for extensibility.}
}
\value{
x original x is (invisibly) returned, allowing \code{glimpse()} to be
used within a data pipe line.
}
\description{
\code{glimpse()} is like a transposed version of \code{print()}:
columns run down the page, and data runs across.
This makes it possible to see every column in a data frame.
It's a little like \code{\link[=str]{str()}} applied to a data frame
but it tries to show you as much data as possible.
(And it always shows the underlying data, even when applied
to a remote data source.)

See \code{\link[pillar:format_glimpse]{format_glimpse()}} for details on the formatting.
}
\section{S3 methods}{


\code{glimpse} is an S3 generic with a customised method for \code{tbl}s and
\code{data.frames}, and a default method that calls \code{\link[=str]{str()}}.

}

\examples{
data(pbmc_small)
pbmc_small |> glimpse()

}


================================================
FILE: man/group_by.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{group_by}
\alias{group_by}
\alias{group_by.Seurat}
\title{Group by one or more variables}
\usage{
\method{group_by}{Seurat}(.data, ..., .add = FALSE, .drop = group_by_drop_default(.data))
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> In \code{group_by()},
variables or computations to group by. Computations are always done on the
ungrouped data frame. To perform computations on the grouped data, you need
to use a separate \code{mutate()} step before the \code{group_by()}.
Computations are not allowed in \code{nest_by()}.
In \code{ungroup()}, variables to remove from the grouping.}

\item{.add}{When \code{FALSE}, the default, \code{group_by()} will
override existing groups. To add to the existing groups, use
\code{.add = TRUE}.}

\item{.drop}{Drop groups formed by factor levels that don't appear in the
data? The default is \code{TRUE} except when \code{.data} has been previously
grouped with \code{.drop = FALSE}. See \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} for details.}
}
\value{
A grouped data frame with class \code{\link[dplyr]{grouped_df}},
unless the combination of \code{...} and \code{add} yields a empty set of
grouping columns, in which case a tibble will be returned.
}
\description{
Most data operations are done on groups defined by variables.
\code{group_by()} takes an existing tbl and converts it into a grouped tbl
where operations are performed "by group". \code{ungroup()} removes grouping.
}
\section{Methods}{


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{group_by()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("group_by")}.
\item \code{ungroup()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("ungroup")}.
}

}

\section{Ordering}{


Currently, \code{group_by()} internally orders the groups in ascending order. This
results in ordered output from functions that aggregate groups, such as
\code{\link[dplyr:summarise]{summarise()}}.

When used as grouping columns, character vectors are ordered in the C locale
for performance and reproducibility across R sessions. If the resulting
ordering of your grouped operation matters and is dependent on the locale,
you should follow up the grouped operation with an explicit call to
\code{\link[dplyr:arrange]{arrange()}} and set the \code{.locale} argument. For example:

\if{html}{\out{<div class="sourceCode">}}\preformatted{data |>
  group_by(chr) |>
  summarise(avg = mean(x)) |>
  arrange(chr, .locale = "en")
}\if{html}{\out{</div>}}

This is often useful as a preliminary step before generating content intended
for humans, such as an HTML table.
\subsection{Legacy behavior}{

\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}

Prior to dplyr 1.1.0, character vector grouping columns were ordered in the
system locale. Setting the global option \code{dplyr.legacy_locale} to \code{TRUE}
retains this legacy behavior, but this has been deprecated. Update existing
code to explicitly call \code{arrange(.locale = )} instead. Run
\code{Sys.getlocale("LC_COLLATE")} to determine your system locale, and compare
that against the list in \code{\link[stringi:stri_locale_list]{stringi::stri_locale_list()}} to find an appropriate
value for \code{.locale}, i.e. for American English, \code{"en_US"}.
}

}

\examples{
data("pbmc_small")
pbmc_small |>  group_by(groups)

}
\seealso{
Other grouping functions: 
\code{\link[dplyr]{group_map}()},
\code{\link[dplyr]{group_nest}()},
\code{\link[dplyr]{group_split}()},
\code{\link[dplyr]{group_trim}()}
}


================================================
FILE: man/group_split.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{group_split}
\alias{group_split}
\alias{group_split.Seurat}
\title{Split data frame by groups}
\usage{
\method{group_split}{Seurat}(.tbl, ..., .keep = TRUE)
}
\arguments{
\item{.tbl}{A tbl.}

\item{...}{If \code{.tbl} is an ungrouped data frame, a grouping specification,
forwarded to \code{\link[dplyr:group_by]{group_by()}}.}

\item{.keep}{Should the grouping columns be kept?}
}
\value{
A list of tibbles. Each tibble contains the rows of \code{.tbl} for the
associated group and all the columns, including the grouping variables.
Note that this returns a \link[vctrs:list_of]{list_of} which is slightly
stricter than a simple list but is useful for representing lists where
every element has the same type.
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}

\code{\link[dplyr:group_split]{group_split()}} works like \code{\link[base:split]{base::split()}} but:
\itemize{
\item It uses the grouping structure from \code{\link[dplyr:group_by]{group_by()}} and therefore is subject
to the data mask
\item It does not name the elements of the list based on the grouping as this
only works well for a single character grouping variable. Instead,
use \code{\link[dplyr:group_keys]{group_keys()}} to access a data frame that defines the groups.
}

\code{group_split()} is primarily designed to work with grouped data frames.
You can pass \code{...} to group and split an ungrouped data frame, but this
is generally not very useful as you want have easy access to the group
metadata.
}
\section{Lifecycle}{


\code{group_split()} is not stable because you can achieve very similar results by
manipulating the nested column returned from
\code{\link[tidyr:nest]{tidyr::nest(.by =)}}. That also retains the group keys all
within a single data structure. \code{group_split()} may be deprecated in the
future.

}

\examples{
data(pbmc_small)
pbmc_small |> group_split(groups)

}
\seealso{
Other grouping functions: 
\code{\link[dplyr]{group_by}()},
\code{\link[dplyr]{group_map}()},
\code{\link[dplyr]{group_nest}()},
\code{\link[dplyr]{group_trim}()}
}


================================================
FILE: man/inner_join.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{inner_join}
\alias{inner_join}
\alias{inner_join.Seurat}
\title{Mutating joins}
\usage{
\method{inner_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
}
\arguments{
\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
vector of variables to join by.

If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.

To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.

To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.

\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
these types of joins.

For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.

To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[dplyr:cross_join]{cross_join()}}.}

\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}.  This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}

\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}

\item{...}{Other parameters passed onto methods.}
}
\value{
An object of the same type as \code{x} (including the same groups). The order of
the rows and columns of \code{x} is preserved as much as possible. The output has
the following properties:
\itemize{
\item The rows are affect by the join type.
\itemize{
\item \code{inner_join()} returns matched \code{x} rows.
\item \code{left_join()} returns all \code{x} rows.
\item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
\item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
}
\item Output columns include all columns from \code{x} and all non-key columns from
\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
the same name, \code{suffix}es are added to disambiguate these as well.
\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
common type between \code{x} and \code{y}.
}
}
\description{
Mutating joins add columns from \code{y} to \code{x}, matching observations based on
the keys. There are four mutating joins: the inner join, and the three outer
joins.
\subsection{Inner join}{

An \code{inner_join()} only keeps observations from \code{x} that have a matching key
in \code{y}.

The most important property of an inner join is that unmatched rows in either
input are not included in the result. This means that generally inner joins
are not appropriate in most analyses, because it is too easy to lose
observations.
}

\subsection{Outer joins}{

The three outer joins keep observations that appear in at least one of the
data frames:
\itemize{
\item A \code{left_join()} keeps all observations in \code{x}.
\item A \code{right_join()} keeps all observations in \code{y}.
\item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
}
}
}
\section{Many-to-many relationships}{


By default, dplyr guards against many-to-many relationships in equality joins
by throwing a warning. These occur when both of the following are true:
\itemize{
\item A row in \code{x} matches multiple rows in \code{y}.
\item A row in \code{y} matches multiple rows in \code{x}.
}

This is typically surprising, as most joins involve a relationship of
one-to-one, one-to-many, or many-to-one, and is often the result of an
improperly specified join. Many-to-many relationships are particularly
problematic because they can result in a Cartesian explosion of the number of
rows returned from the join.

If a many-to-many relationship is expected, silence this warning by
explicitly setting \code{relationship = "many-to-many"}.

In production code, it is best to preemptively set \code{relationship} to whatever
relationship you expect to exist between the keys of \code{x} and \code{y}, as this
forces an error to occur immediately if the data doesn't align with your
expectations.

Inequality joins typically result in many-to-many relationships by nature, so
they don't warn on them by default, but you should still take extra care when
specifying an inequality join, because they also have the capability to
return a large number of rows.

Rolling joins don't warn on many-to-many relationships either, but many
rolling joins follow a many-to-one relationship, so it is often useful to
set \code{relationship = "many-to-one"} to enforce this.

Note that in SQL, most database providers won't let you specify a
many-to-many relationship between two tables, instead requiring that you
create a third \emph{junction table} that results in two one-to-many relationships
instead.

}

\section{Methods}{


These functions are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
}

}

\examples{
data(pbmc_small)
tt <- pbmc_small
tt |> inner_join(tt |> 
  distinct(groups) |>  
  mutate(new_column=1:2) |> 
  slice(1))

}
\seealso{
Other joins: 
\code{\link[dplyr]{cross_join}()},
\code{\link[dplyr]{filter-joins}},
\code{\link[dplyr]{nest_join}()}
}


================================================
FILE: man/join_features.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{join_features}
\alias{join_features}
\alias{join_features,Seurat-method}
\title{join_features}
\usage{
\S4method{join_features}{Seurat}(
  .data,
  features = NULL,
  all = FALSE,
  exclude_zeros = FALSE,
  shape = "wide",
  assay = NULL,
  slot = "data",
  ...
)
}
\arguments{
\item{.data}{A tidyseurat object}

\item{features}{A vector of feature identifiers to join}

\item{all}{If TRUE return all}

\item{exclude_zeros}{If TRUE exclude zero values}

\item{shape}{Format of the returned table "long" or "wide"}

\item{assay}{assay name to extract feature abundance}

\item{slot}{slot name to extract feature abundance}

\item{...}{Parameters to pass to join wide, i.e. assay name to extract feature abundance from and gene prefix, for shape="wide"}
}
\value{
A `tidyseurat` object
  containing information for the specified features.
}
\description{
join_features() extracts and joins information for specific
  features
}
\details{
This function extracts information for specified features and
  returns the information in either long or wide format.
}
\examples{
data(pbmc_small)
pbmc_small \%>\% join_features(
  features=c("HLA-DRA", "LYZ"))

}


================================================
FILE: man/join_transcripts.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods_DEPRECATED.R
\name{join_transcripts}
\alias{join_transcripts}
\title{(DEPRECATED) Extract and join information for transcripts.}
\usage{
join_transcripts(
  .data,
  transcripts = NULL,
  all = FALSE,
  exclude_zeros = FALSE,
  shape = "wide",
  ...
)
}
\arguments{
\item{.data}{A tidyseurat object}

\item{transcripts}{A vector of transcript identifiers to join}

\item{all}{If TRUE return all}

\item{exclude_zeros}{If TRUE exclude zero values}

\item{shape}{Format of the returned table "long" or "wide"}

\item{...}{Parameters to pass to join wide, i.e. assay name to extract transcript abundance from}
}
\value{
A `tbl` containing the information.for the specified transcripts
}
\description{
join_transcripts() extracts and joins information for specified transcripts
}
\details{
DEPRECATED, please use join_features()
}
\examples{

print("DEPRECATED")


}


================================================
FILE: man/left_join.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{left_join}
\alias{left_join}
\alias{left_join.Seurat}
\title{Mutating joins}
\usage{
\method{left_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
}
\arguments{
\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
vector of variables to join by.

If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.

To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.

To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.

\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
these types of joins.

For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.

To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[dplyr:cross_join]{cross_join()}}.}

\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}.  This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}

\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}

\item{...}{Other parameters passed onto methods.}
}
\value{
An object of the same type as \code{x} (including the same groups). The order of
the rows and columns of \code{x} is preserved as much as possible. The output has
the following properties:
\itemize{
\item The rows are affect by the join type.
\itemize{
\item \code{inner_join()} returns matched \code{x} rows.
\item \code{left_join()} returns all \code{x} rows.
\item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
\item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
}
\item Output columns include all columns from \code{x} and all non-key columns from
\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
the same name, \code{suffix}es are added to disambiguate these as well.
\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
common type between \code{x} and \code{y}.
}
}
\description{
Mutating joins add columns from \code{y} to \code{x}, matching observations based on
the keys. There are four mutating joins: the inner join, and the three outer
joins.
\subsection{Inner join}{

An \code{inner_join()} only keeps observations from \code{x} that have a matching key
in \code{y}.

The most important property of an inner join is that unmatched rows in either
input are not included in the result. This means that generally inner joins
are not appropriate in most analyses, because it is too easy to lose
observations.
}

\subsection{Outer joins}{

The three outer joins keep observations that appear in at least one of the
data frames:
\itemize{
\item A \code{left_join()} keeps all observations in \code{x}.
\item A \code{right_join()} keeps all observations in \code{y}.
\item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
}
}
}
\section{Many-to-many relationships}{


By default, dplyr guards against many-to-many relationships in equality joins
by throwing a warning. These occur when both of the following are true:
\itemize{
\item A row in \code{x} matches multiple rows in \code{y}.
\item A row in \code{y} matches multiple rows in \code{x}.
}

This is typically surprising, as most joins involve a relationship of
one-to-one, one-to-many, or many-to-one, and is often the result of an
improperly specified join. Many-to-many relationships are particularly
problematic because they can result in a Cartesian explosion of the number of
rows returned from the join.

If a many-to-many relationship is expected, silence this warning by
explicitly setting \code{relationship = "many-to-many"}.

In production code, it is best to preemptively set \code{relationship} to whatever
relationship you expect to exist between the keys of \code{x} and \code{y}, as this
forces an error to occur immediately if the data doesn't align with your
expectations.

Inequality joins typically result in many-to-many relationships by nature, so
they don't warn on them by default, but you should still take extra care when
specifying an inequality join, because they also have the capability to
return a large number of rows.

Rolling joins don't warn on many-to-many relationships either, but many
rolling joins follow a many-to-one relationship, so it is often useful to
set \code{relationship = "many-to-one"} to enforce this.

Note that in SQL, most database providers won't let you specify a
many-to-many relationship between two tables, instead requiring that you
create a third \emph{junction table} that results in two one-to-many relationships
instead.

}

\section{Methods}{


These functions are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
}

}

\examples{
data(pbmc_small)
tt <- pbmc_small
tt |> left_join(tt |>  
  distinct(groups) |> 
  mutate(new_column=1:2))

}
\seealso{
Other joins: 
\code{\link[dplyr]{cross_join}()},
\code{\link[dplyr]{filter-joins}},
\code{\link[dplyr]{nest_join}()}
}


================================================
FILE: man/mutate.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{mutate}
\alias{mutate}
\alias{mutate.Seurat}
\title{Create, modify, and delete columns}
\usage{
\method{mutate}{Seurat}(.data, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs.
The name gives the name of the column in the output.

The value can be:
\itemize{
\item A vector of length 1, which will be recycled to the correct length.
\item A vector the same length as the current group (or the whole data frame
if ungrouped).
\item \code{NULL}, to remove the column.
\item A data frame or tibble, to create multiple columns in the output.
}}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Columns from \code{.data} will be preserved according to the \code{.keep} argument.
\item Existing columns that are modified by \code{...} will always be returned in
their original location.
\item New columns created through \code{...} will be placed according to the
\code{.before} and \code{.after} arguments.
\item The number of rows is not affected.
\item Columns given the value \code{NULL} will be removed.
\item Groups will be recomputed if a grouping variable is mutated.
\item Data frame attributes are preserved.
}
}
\description{
\code{mutate()} creates new columns that are functions of existing variables.
It can also modify (if the name is the same as an existing
column) and delete columns (by setting their value to \code{NULL}).
}
\section{Useful mutate functions}{


\itemize{
\item \code{\link{+}}, \code{\link{-}}, \code{\link[=log]{log()}}, etc., for their usual mathematical meanings
\item \code{\link[dplyr:lead]{lead()}}, \code{\link[dplyr:lag]{lag()}}
\item \code{\link[dplyr:dense_rank]{dense_rank()}}, \code{\link[dplyr:min_rank]{min_rank()}}, \code{\link[dplyr:percent_rank]{percent_rank()}}, \code{\link[dplyr:row_number]{row_number()}},
\code{\link[dplyr:cume_dist]{cume_dist()}}, \code{\link[dplyr:ntile]{ntile()}}
\item \code{\link[=cumsum]{cumsum()}}, \code{\link[dplyr:cummean]{cummean()}}, \code{\link[=cummin]{cummin()}}, \code{\link[=cummax]{cummax()}}, \code{\link[dplyr:cumany]{cumany()}}, \code{\link[dplyr:cumall]{cumall()}}
\item \code{\link[dplyr:na_if]{na_if()}}, \code{\link[dplyr:coalesce]{coalesce()}}
\item \code{\link[dplyr:if_else]{if_else()}}, \code{\link[dplyr:recode]{recode()}}, \code{\link[dplyr:case_when]{case_when()}}
}

}

\section{Grouped tibbles}{


Because mutating expressions are computed within groups, they may
yield different results on grouped tibbles. This will be the case
as soon as an aggregating, lagging, or ranking function is
involved. Compare this ungrouped mutate:

\if{html}{\out{<div class="sourceCode">}}\preformatted{starwars |>
  select(name, mass, species) |>
  mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
}\if{html}{\out{</div>}}

With the grouped equivalent:

\if{html}{\out{<div class="sourceCode">}}\preformatted{starwars |>
  select(name, mass, species) |>
  group_by(species) |>
  mutate(mass_norm = mass / mean(mass, na.rm = TRUE))
}\if{html}{\out{</div>}}

The former normalises \code{mass} by the global average whereas the
latter normalises by the averages within species levels.

}

\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("mutate")}.

}

\examples{
data(pbmc_small)
pbmc_small |> mutate(nFeature_RNA=1)

}
\seealso{
Other single table verbs: 
\code{\link{arrange}()},
\code{\link{rename}()},
\code{\link{slice}()},
\code{\link{summarise}()}
}
\concept{single table verbs}


================================================
FILE: man/nest.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidyr_methods.R
\name{nest}
\alias{nest}
\alias{nest.Seurat}
\title{Nest rows into a list-column of data frames}
\usage{
\method{nest}{Seurat}(.data, ..., .names_sep = NULL)
}
\arguments{
\item{.data}{A data frame.}

\item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to nest; these will
appear in the inner data frames.

Specified using name-variable pairs of the form
\code{new_col = c(col1, col2, col3)}. The right hand side can be any valid
tidyselect expression.

If not supplied, then \code{...} is derived as all columns \emph{not} selected by
\code{.by}, and will use the column name from \code{.key}.

\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
previously you could write \code{df |> nest(x, y, z)}.
Convert to \code{df |> nest(data = c(x, y, z))}.}

\item{.names_sep}{If \code{NULL}, the default, the inner names will come from
the former outer names. If a string, the  new inner names will use the
outer names with \code{names_sep} automatically stripped. This makes
\code{names_sep} roughly symmetric between nesting and unnesting.}
}
\value{
`tidyseurat_nested`
}
\description{
Nesting creates a list-column of data frames; unnesting flattens it back out
into regular columns. Nesting is implicitly a summarising operation: you
get one row for each group defined by the non-nested columns. This is useful
in conjunction with other summaries that work with whole datasets, most
notably models.

Learn more in \code{vignette("nest")}.
}
\details{
If neither \code{...} nor \code{.by} are supplied, \code{nest()} will nest all variables,
and will use the column name supplied through \code{.key}.
}
\section{New syntax}{


tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's
designed to be more similar to other functions. Converting to the new syntax
should be straightforward (guided by the message you'll receive) but if
you just need to run an old analysis, you can easily revert to the previous
behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows:

\if{html}{\out{<div class="sourceCode">}}\preformatted{library(tidyr)
nest <- nest_legacy
unnest <- unnest_legacy
}\if{html}{\out{</div>}}

}

\section{Grouped data frames}{


\code{df |> nest(data = c(x, y))} specifies the columns to be nested; i.e. the
columns that will appear in the inner data frame. \code{df |> nest(.by = c(x, y))} specifies the columns to nest \emph{by}; i.e. the columns that will remain in
the outer data frame. An alternative way to achieve the latter is to \code{nest()}
a grouped data frame created by \code{\link[dplyr:group_by]{dplyr::group_by()}}. The grouping variables
remain in the outer data frame and the others are nested. The result
preserves the grouping of the input.

Variables supplied to \code{nest()} will override grouping variables so that
\code{df |> group_by(x, y) |> nest(data = !z)} will be equivalent to
\code{df |> nest(data = !z)}.

You can't supply \code{.by} with a grouped data frame, as the groups already
represent what you are nesting by.

}

\examples{
data(pbmc_small)
pbmc_small |> 
    nest(data=-groups) |> 
    unnest(data)

}


================================================
FILE: man/pbmc_small_nested_interactions.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/data.R
\docType{data}
\name{pbmc_small_nested_interactions}
\alias{pbmc_small_nested_interactions}
\title{Intercellular ligand-receptor interactions for 
38 ligands from a single cell RNA-seq cluster.}
\format{
A `tibble` containing 100 rows and 9 columns.
  Cells are a subsample of the PBMC dataset of 2,700 single cells. 
  Cell interactions were identified with `SingleCellSignalR`.
\describe{
  \item{sample}{sample identifier}
  \item{ligand}{cluster and ligand identifier}
  \item{receptor}{cluster and receptor identifier}
  \item{ligand.name}{ligand name}
  \item{receptor.name}{receptor name}
  \item{origin}{cluster containing ligand}
  \item{destination}{cluster containing receptor}
  \item{interaction.type}{type of interation, paracrine or autocrine}
  \item{LRscore}{interaction score}
}
}
\source{
\url{https://satijalab.org/seurat/v3.1/pbmc3k_tutorial.html}
}
\usage{
data(pbmc_small_nested_interactions)
}
\value{
`tibble`
}
\description{
A dataset containing ligand-receptor interactions within a sample.
There are 38 ligands from a single cell cluster versus 35 receptors 
in 6 other clusters.
}
\keyword{datasets}


================================================
FILE: man/pipe.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils-pipe.R
\name{\%>\%}
\alias{\%>\%}
\title{Pipe operator}
\usage{
lhs \%>\% rhs
}
\value{
void
}
\description{
See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
}
\examples{
data(pbmc_small)
pbmc_small \%>\% print()
}
\keyword{internal}


================================================
FILE: man/pivot_longer.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidyr_methods.R
\name{pivot_longer}
\alias{pivot_longer}
\alias{pivot_longer.Seurat}
\title{Pivot data from wide to long}
\usage{
\method{pivot_longer}{Seurat}(
  data,
  cols,
  names_to = "name",
  names_prefix = NULL,
  names_sep = NULL,
  names_pattern = NULL,
  names_ptypes = NULL,
  names_transform = NULL,
  names_repair = "check_unique",
  values_to = "value",
  values_drop_na = FALSE,
  values_ptypes = NULL,
  values_transform = NULL,
  ...
)
}
\arguments{
\item{data}{A data frame to pivot.}

\item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to pivot into
longer format.}

\item{names_to}{A character vector specifying the new column or columns to
create from the information stored in the column names of \code{data} specified
by \code{cols}.
\itemize{
\item If length 0, or if \code{NULL} is supplied, no columns will be created.
\item If length 1, a single column will be created which will contain the
column names specified by \code{cols}.
\item If length >1, multiple columns will be created. In this case, one of
\code{names_sep} or \code{names_pattern} must be supplied to specify how the
column names should be split. There are also two additional character
values you can take advantage of:
\itemize{
\item \code{NA} will discard the corresponding component of the column name.
\item \code{".value"} indicates that the corresponding component of the column
name defines the name of the output column containing the cell values,
overriding \code{values_to} entirely.
}
}}

\item{names_prefix}{A regular expression used to remove matching text
from the start of each variable name.}

\item{names_sep, names_pattern}{If \code{names_to} contains multiple values,
these arguments control how the column name is broken up.

\code{names_sep} takes the same specification as \code{\link[tidyr:separate]{separate()}}, and can either
be a numeric vector (specifying positions to break on), or a single string
(specifying a regular expression to split on).

\code{names_pattern} takes the same specification as \code{\link[tidyr:extract]{extract()}}, a regular
expression containing matching groups (\verb{()}).

If these arguments do not give you enough control, use
\code{pivot_longer_spec()} to create a spec object and process manually as
needed.}

\item{names_ptypes, values_ptypes}{Optionally, a list of column name-prototype
pairs. Alternatively, a single empty prototype can be supplied, which will
be applied to all columns. A prototype (or ptype for short) is a
zero-length vector (like \code{integer()} or \code{numeric()}) that defines the type,
class, and attributes of a vector. Use these arguments if you want to
confirm that the created columns are the types that you expect. Note that
if you want to change (instead of confirm) the types of specific columns,
you should use \code{names_transform} or \code{values_transform} instead.}

\item{names_transform, values_transform}{Optionally, a list of column
name-function pairs. Alternatively, a single function can be supplied,
which will be applied to all columns. Use these arguments if you need to
change the types of specific columns. For example, \code{names_transform = list(week = as.integer)} would convert a character variable called \code{week}
to an integer.

If not specified, the type of the columns generated from \code{names_to} will
be character, and the type of the variables generated from \code{values_to}
will be the common type of the input columns used to generate them.}

\item{names_repair}{What happens if the output has invalid column names?
The default, \code{"check_unique"} is to error if the columns are duplicated.
Use \code{"minimal"} to allow duplicates in the output, or \code{"unique"} to
de-duplicated by adding numeric suffixes. See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}}
for more options.}

\item{values_to}{A string specifying the name of the column to create
from the data stored in cell values. If \code{names_to} is a character
containing the special \code{.value} sentinel, this value will be ignored,
and the name of the value column will be derived from part of the
existing column names.}

\item{values_drop_na}{If \code{TRUE}, will drop rows that contain only \code{NA}s
in the \code{values_to} column. This effectively converts explicit missing values
to implicit missing values, and should generally be used only when missing
values in \code{data} were created by its structure.}

\item{...}{Additional arguments passed on to methods.}
}
\value{
`tidyseurat`
}
\description{
\code{pivot_longer()} "lengthens" data, increasing the number of rows and
decreasing the number of columns. The inverse transformation is
\code{\link[tidyr:pivot_wider]{pivot_wider()}}

Learn more in \code{vignette("pivot")}.
}
\details{
\code{pivot_longer()} is an updated approach to \code{\link[tidyr:gather]{gather()}}, designed to be both
simpler to use and to handle more use cases. We recommend you use
\code{pivot_longer()} for new code; \code{gather()} isn't going away but is no longer
under active development.
}
\examples{
data(pbmc_small)
pbmc_small |> pivot_longer(
  cols=c(orig.ident, groups),
  names_to="name", values_to="value")

}


================================================
FILE: man/plotly.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plotly_methods.R
\name{plotly}
\alias{plotly}
\alias{plot_ly}
\alias{plot_ly.tbl_df}
\alias{plot_ly.Seurat}
\title{Initiate a plotly visualization}
\usage{
plot_ly(
  data = data.frame(),
  ...,
  type = NULL,
  name = NULL,
  color = NULL,
  colors = NULL,
  alpha = NULL,
  stroke = NULL,
  strokes = NULL,
  alpha_stroke = 1,
  size = NULL,
  sizes = c(10, 100),
  span = NULL,
  spans = c(1, 20),
  symbol = NULL,
  symbols = NULL,
  linetype = NULL,
  linetypes = NULL,
  split = NULL,
  frame = NULL,
  width = NULL,
  height = NULL,
  source = "A"
)

\method{plot_ly}{tbl_df}(
  data = data.frame(),
  ...,
  type = NULL,
  name = NULL,
  color = NULL,
  colors = NULL,
  alpha = NULL,
  stroke = NULL,
  strokes = NULL,
  alpha_stroke = 1,
  size = NULL,
  sizes = c(10, 100),
  span = NULL,
  spans = c(1, 20),
  symbol = NULL,
  symbols = NULL,
  linetype = NULL,
  linetypes = NULL,
  split = NULL,
  frame = NULL,
  width = NULL,
  height = NULL,
  source = "A"
)

\method{plot_ly}{Seurat}(
  data = data.frame(),
  ...,
  type = NULL,
  name = NULL,
  color = NULL,
  colors = NULL,
  alpha = NULL,
  stroke = NULL,
  strokes = NULL,
  alpha_stroke = 1,
  size = NULL,
  sizes = c(10, 100),
  span = NULL,
  spans = c(1, 20),
  symbol = NULL,
  symbols = NULL,
  linetype = NULL,
  linetypes = NULL,
  split = NULL,
  frame = NULL,
  width = NULL,
  height = NULL,
  source = "A"
)
}
\arguments{
\item{data}{A data frame (optional) or \link[crosstalk:SharedData]{crosstalk::SharedData} object.}

\item{...}{Arguments (i.e., attributes) passed along to the trace \code{type}.
See \code{\link[plotly:schema]{schema()}} for a list of acceptable attributes for a given trace \code{type}
(by going to \code{traces} -> \code{type} -> \code{attributes}). Note that attributes
provided at this level may override other arguments
(e.g. \code{plot_ly(x = 1:10, y = 1:10, color = I("red"), marker = list(color = "blue"))}).}

\item{type}{A character string specifying the trace type (e.g. \code{"scatter"}, \code{"bar"}, \code{"box"}, etc).
If specified, it \emph{always} creates a trace, otherwise}

\item{name}{Values mapped to the trace's name attribute. Since a trace can
only have one name, this argument acts very much like \code{split} in that it
creates one trace for every unique value.}

\item{color}{Values mapped to relevant 'fill-color' attribute(s)
(e.g. \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor},
\href{https://plotly.com/r/reference/#scatter-marker-color}{marker.color},
\href{https://plotly.com/r/reference/#scatter-textfont-color}{textfont.color}, etc.).
The mapping from data values to color codes may be controlled using
\code{colors} and \code{alpha}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{color = I("red")}).
Any color understood by \code{\link[grDevices:col2rgb]{grDevices::col2rgb()}} may be used in this way.}

\item{colors}{Either a colorbrewer2.org palette name (e.g. "YlOrRd" or "Blues"),
or a vector of colors to interpolate in hexadecimal "#RRGGBB" format,
or a color interpolation function like \code{colorRamp()}.}

\item{alpha}{A number between 0 and 1 specifying the alpha channel applied to \code{color}.
Defaults to 0.5 when mapping to \href{https://plotly.com/r/reference/#scatter-fillcolor}{fillcolor} and 1 otherwise.}

\item{stroke}{Similar to \code{color}, but values are mapped to relevant 'stroke-color' attribute(s)
(e.g., \href{https://plotly.com/r/reference/#scatter-marker-line-color}{marker.line.color}
and \href{https://plotly.com/r/reference/#scatter-line-color}{line.color}
for filled polygons). If not specified, \code{stroke} inherits from \code{color}.}

\item{strokes}{Similar to \code{colors}, but controls the \code{stroke} mapping.}

\item{alpha_stroke}{Similar to \code{alpha}, but applied to \code{stroke}.}

\item{size}{(Numeric) values mapped to relevant 'fill-size' attribute(s)
(e.g., \href{https://plotly.com/r/reference/#scatter-marker-size}{marker.size},
\href{https://plotly.com/r/reference/#scatter-textfont-size}{textfont.size},
and \href{https://plotly.com/r/reference/#scatter-error_x-width}{error_x.width}).
The mapping from data values to symbols may be controlled using
\code{sizes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{size = I(30)}).}

\item{sizes}{A numeric vector of length 2 used to scale \code{size} to pixels.}

\item{span}{(Numeric) values mapped to relevant 'stroke-size' attribute(s)
(e.g.,
\href{https://plotly.com/r/reference/#scatter-marker-line-width}{marker.line.width},
\href{https://plotly.com/r/reference/#scatter-line-width}{line.width} for filled polygons,
and \href{https://plotly.com/r/reference/#scatter-error_x-thickness}{error_x.thickness})
The mapping from data values to symbols may be controlled using
\code{spans}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{span = I(30)}).}

\item{spans}{A numeric vector of length 2 used to scale \code{span} to pixels.}

\item{symbol}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-marker-symbol}{marker.symbol}.
The mapping from data values to symbols may be controlled using
\code{symbols}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{symbol = I("pentagon")}).
Any \link{pch} value or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol name} may be used in this way.}

\item{symbols}{A character vector of \link{pch} values or \href{https://plotly.com/r/reference/#scatter-marker-symbol}{symbol names}.}

\item{linetype}{(Discrete) values mapped to \href{https://plotly.com/r/reference/#scatter-line-dash}{line.dash}.
The mapping from data values to symbols may be controlled using
\code{linetypes}, or avoided altogether via \code{\link[=I]{I()}} (e.g., \code{linetype = I("dash")}).
Any \code{lty} (see \link{par}) value or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash name} may be used in this way.}

\item{linetypes}{A character vector of \code{lty} values or \href{https://plotly.com/r/reference/#scatter-line-dash}{dash names}}

\item{split}{(Discrete) values used to create multiple traces (one trace per value).}

\item{frame}{(Discrete) values used to create animation frames.}

\item{width}{Width in pixels (optional, defaults to automatic sizing).}

\item{height}{Height in pixels (optional, defaults to automatic sizing).}

\item{source}{a character string of length 1. Match the value of this string
with the source argument in \code{\link[plotly:event_data]{event_data()}} to retrieve the
event data corresponding to a specific plot (shiny apps can have multiple plots).}
}
\value{
`plotly`
}
\description{
This function maps R objects to \href{https://plotly.com/javascript/}{plotly.js},
an (MIT licensed) web-based interactive charting library. It provides
abstractions for doing common things (e.g. mapping data values to
fill colors (via \code{color}) or creating \link[plotly]{animation}s (via \code{frame})) and sets
some different defaults to make the interface feel more 'R-like'
(i.e., closer to \code{\link[=plot]{plot()}} and \code{\link[ggplot2:qplot]{ggplot2::qplot()}}).
}
\details{
Unless \code{type} is specified, this function just initiates a plotly
object with 'global' attributes that are passed onto downstream uses of
\code{\link[plotly:add_trace]{add_trace()}} (or similar). A \link{formula} must always be used when
referencing column name(s) in \code{data} (e.g. \code{plot_ly(mtcars, x = ~wt)}).
Formulas are optional when supplying values directly, but they do
help inform default axis/scale titles
(e.g., \code{plot_ly(x = mtcars$wt)} vs \code{plot_ly(x = ~mtcars$wt)})
}
\examples{
data(pbmc_small)
plot_ly(pbmc_small)

}
\references{
\url{https://plotly-r.com/overview.html}
}
\seealso{
\itemize{
\item For initializing a plotly-geo object: \code{\link[plotly:plot_geo]{plot_geo()}}
\item For initializing a plotly-mapbox object: \code{\link[plotly:plot_mapbox]{plot_mapbox()}}
\item For translating a ggplot2 object to a plotly object: \code{\link[plotly:ggplotly]{ggplotly()}}
\item For modifying any plotly object: \code{\link[plotly:layout]{layout()}}, \code{\link[plotly:add_trace]{add_trace()}}, \code{\link[plotly:style]{style()}}
\item For linked brushing: \code{\link[plotly:highlight]{highlight()}}
\item For arranging multiple plots: \code{\link[plotly:subplot]{subplot()}}, \code{\link[crosstalk:bscols]{crosstalk::bscols()}}
\item For inspecting plotly objects: \code{\link[plotly:plotly_json]{plotly_json()}}
\item For quick, accurate, and searchable plotly.js reference: \code{\link[plotly:schema]{schema()}}
}
}
\author{
Carson Sievert
}


================================================
FILE: man/pull.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{pull}
\alias{pull}
\alias{pull.Seurat}
\title{Extract a single column}
\usage{
\method{pull}{Seurat}(.data, var = -1, name = NULL, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{var}{A variable specified as:
\itemize{
\item a literal variable name
\item a positive integer, giving the position counting from the left
\item a negative integer, giving the position counting from the right.
}

The default returns the last column (on the assumption that's the
column you've created most recently).

This argument is taken by expression and supports
\link[rlang:topic-inject]{quasiquotation} (you can unquote column
names and column locations).}

\item{name}{An optional parameter that specifies the column to be used
as names for a named vector. Specified in a similar manner as \code{var}.}

\item{...}{For use by methods.}
}
\value{
A vector the same size as \code{.data}.
}
\description{
\code{pull()} is similar to \code{$}. It's mostly useful because it looks a little
nicer in pipes, it also works with remote data frames, and it can optionally
name the output.
}
\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("pull")}.

}

\examples{
data(pbmc_small)
pbmc_small |> pull(groups)

}


================================================
FILE: man/quo_names.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{quo_names}
\alias{quo_names}
\title{Convert array of quosure (e.g. c(col_a, col_b)) into character vector}
\usage{
quo_names(v)
}
\arguments{
\item{v}{A array of quosures (e.g. c(col_a, col_b))}
}
\value{
A character vector
}
\description{
Convert array of quosure (e.g. c(col_a, col_b)) into character vector
}
\keyword{internal}


================================================
FILE: man/rename.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{rename}
\alias{rename}
\alias{rename.Seurat}
\title{Rename columns}
\usage{
\method{rename}{Seurat}(.data, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{For \code{rename()}: <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Use
\code{new_name = old_name} to rename selected variables.

For \code{rename_with()}: additional arguments passed onto \code{.fn}.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Rows are not affected.
\item Column names are changed; column order is preserved.
\item Data frame attributes are preserved.
\item Groups are updated to reflect new names.
}
}
\description{
\code{rename()} changes the names of individual variables using
\code{new_name = old_name} syntax; \code{rename_with()} renames columns using a
function.
}
\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("rename")}.

}

\examples{
data(pbmc_small)
pbmc_small |> rename(s_score=nFeature_RNA)

}
\seealso{
Other single table verbs: 
\code{\link{arrange}()},
\code{\link{mutate}()},
\code{\link{slice}()},
\code{\link{summarise}()}
}
\concept{single table verbs}


================================================
FILE: man/return_arguments_of.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utilities.R
\name{return_arguments_of}
\alias{return_arguments_of}
\title{returns variables from an expression}
\usage{
return_arguments_of(expression)
}
\arguments{
\item{expression}{an expression}
}
\value{
list of symbols
}
\description{
returns variables from an expression
}


================================================
FILE: man/right_join.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{right_join}
\alias{right_join}
\alias{right_join.Seurat}
\title{Mutating joins}
\usage{
\method{right_join}{Seurat}(x, y, by = NULL, copy = FALSE, suffix = c(".x", ".y"), ...)
}
\arguments{
\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
vector of variables to join by.

If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
variables in common across \code{x} and \code{y}. A message lists the variables so
that you can check they're correct; suppress the message by supplying \code{by}
explicitly.

To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.

To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
multiple expressions. For example, \code{join_by(a == b, c == d)} will match
\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
\code{x} and \code{y}, you can shorten this by listing only the variable names, like
\code{join_by(a, c)}.

\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
these types of joins.

For simple equality joins, you can alternatively specify a character vector
of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.

To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
\code{\link[dplyr:cross_join]{cross_join()}}.}

\item{copy}{If \code{x} and \code{y} are not from the same data source,
and \code{copy} is \code{TRUE}, then \code{y} will be copied into the
same src as \code{x}.  This allows you to join tables across srcs, but
it is a potentially expensive operation so you must opt into it.}

\item{suffix}{If there are non-joined duplicate variables in \code{x} and
\code{y}, these suffixes will be added to the output to disambiguate them.
Should be a character vector of length 2.}

\item{...}{Other parameters passed onto methods.}
}
\value{
An object of the same type as \code{x} (including the same groups). The order of
the rows and columns of \code{x} is preserved as much as possible. The output has
the following properties:
\itemize{
\item The rows are affect by the join type.
\itemize{
\item \code{inner_join()} returns matched \code{x} rows.
\item \code{left_join()} returns all \code{x} rows.
\item \code{right_join()}  returns matched of \code{x} rows, followed by unmatched \code{y} rows.
\item \code{full_join()}  returns all \code{x} rows, followed by unmatched \code{y} rows.
}
\item Output columns include all columns from \code{x} and all non-key columns from
\code{y}. If \code{keep = TRUE}, the key columns from \code{y} are included as well.
\item If non-key columns in \code{x} and \code{y} have the same name, \code{suffix}es are added
to disambiguate. If \code{keep = TRUE} and key columns in \code{x} and \code{y} have
the same name, \code{suffix}es are added to disambiguate these as well.
\item If \code{keep = FALSE}, output columns included in \code{by} are coerced to their
common type between \code{x} and \code{y}.
}
}
\description{
Mutating joins add columns from \code{y} to \code{x}, matching observations based on
the keys. There are four mutating joins: the inner join, and the three outer
joins.
\subsection{Inner join}{

An \code{inner_join()} only keeps observations from \code{x} that have a matching key
in \code{y}.

The most important property of an inner join is that unmatched rows in either
input are not included in the result. This means that generally inner joins
are not appropriate in most analyses, because it is too easy to lose
observations.
}

\subsection{Outer joins}{

The three outer joins keep observations that appear in at least one of the
data frames:
\itemize{
\item A \code{left_join()} keeps all observations in \code{x}.
\item A \code{right_join()} keeps all observations in \code{y}.
\item A \code{full_join()} keeps all observations in \code{x} and \code{y}.
}
}
}
\section{Many-to-many relationships}{


By default, dplyr guards against many-to-many relationships in equality joins
by throwing a warning. These occur when both of the following are true:
\itemize{
\item A row in \code{x} matches multiple rows in \code{y}.
\item A row in \code{y} matches multiple rows in \code{x}.
}

This is typically surprising, as most joins involve a relationship of
one-to-one, one-to-many, or many-to-one, and is often the result of an
improperly specified join. Many-to-many relationships are particularly
problematic because they can result in a Cartesian explosion of the number of
rows returned from the join.

If a many-to-many relationship is expected, silence this warning by
explicitly setting \code{relationship = "many-to-many"}.

In production code, it is best to preemptively set \code{relationship} to whatever
relationship you expect to exist between the keys of \code{x} and \code{y}, as this
forces an error to occur immediately if the data doesn't align with your
expectations.

Inequality joins typically result in many-to-many relationships by nature, so
they don't warn on them by default, but you should still take extra care when
specifying an inequality join, because they also have the capability to
return a large number of rows.

Rolling joins don't warn on many-to-many relationships either, but many
rolling joins follow a many-to-one relationship, so it is often useful to
set \code{relationship = "many-to-one"} to enforce this.

Note that in SQL, most database providers won't let you specify a
many-to-many relationship between two tables, instead requiring that you
create a third \emph{junction table} that results in two one-to-many relationships
instead.

}

\section{Methods}{


These functions are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{inner_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("inner_join")}.
\item \code{left_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("left_join")}.
\item \code{right_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("right_join")}.
\item \code{full_join()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("full_join")}.
}

}

\examples{
data(pbmc_small)
tt <- pbmc_small
tt |> right_join(tt |> 
  distinct(groups) |> 
  mutate(new_column=1:2) |> 
  slice(1))

}
\seealso{
Other joins: 
\code{\link[dplyr]{cross_join}()},
\code{\link[dplyr]{filter-joins}},
\code{\link[dplyr]{nest_join}()}
}


================================================
FILE: man/rowwise.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{rowwise}
\alias{rowwise}
\alias{rowwise.Seurat}
\title{Group input by rows}
\usage{
\method{rowwise}{Seurat}(data, ...)
}
\arguments{
\item{data}{Input data frame.}

\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Variables to be preserved
when calling \code{\link[dplyr:summarise]{summarise()}}. This is typically a set of variables whose
combination uniquely identify each row.

\strong{NB}: unlike \code{group_by()} you can not create new variables here but
instead you can select multiple variables with (e.g.) \code{everything()}.}
}
\value{
A row-wise data frame with class \code{rowwise_df}. Note that a
\code{rowwise_df} is implicitly grouped by row, but is not a \code{grouped_df}.
}
\description{
\code{rowwise()} allows you to compute on a data frame a row-at-a-time.
This is most useful when a vectorised function doesn't exist.

Most dplyr verbs preserve row-wise grouping. The exception is \code{\link[dplyr:summarise]{summarise()}},
which return a \link[dplyr]{grouped_df}. You can explicitly ungroup with \code{\link[dplyr:ungroup]{ungroup()}}
or \code{\link[dplyr:as_tibble]{as_tibble()}}, or convert to a \link[dplyr]{grouped_df} with \code{\link[dplyr:group_by]{group_by()}}.
}
\section{List-columns}{


Because a rowwise has exactly one row per group it offers a small
convenience for working with list-columns. Normally, \code{summarise()} and
\code{mutate()} extract a groups worth of data with \code{[}. But when you index
a list in this way, you get back another list. When you're working with
a \code{rowwise} tibble, then dplyr will use \code{[[} instead of \code{[} to make your
life a little easier.

}

\examples{
# TODO

}
\seealso{
\code{\link[dplyr:nest_by]{nest_by()}} for a convenient way of creating rowwise data frames
with nested data.
}


================================================
FILE: man/sample_n.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{sample_n}
\alias{sample_n}
\alias{sample_n.Seurat}
\alias{sample_frac}
\alias{sample_frac.Seurat}
\title{Sample n rows from a table}
\usage{
\method{sample_n}{Seurat}(tbl, size, replace = FALSE, weight = NULL, .env = NULL, ...)

\method{sample_frac}{Seurat}(tbl, size = 1, replace = FALSE, weight = NULL, .env = NULL, ...)
}
\arguments{
\item{tbl}{A data.frame.}

\item{size}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}>
For \code{sample_n()}, the number of rows to select.
For \code{sample_frac()}, the fraction of rows to select.
If \code{tbl} is grouped, \code{size} applies to each group.}

\item{replace}{Sample with or without replacement?}

\item{weight}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Sampling weights.
This must evaluate to a vector of non-negative numbers the same length as
the input. Weights are automatically standardised to sum to 1.}

\item{.env}{DEPRECATED.}

\item{...}{ignored}
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}
\code{sample_n()} and \code{sample_frac()} have been superseded in favour of
\code{\link[dplyr:slice_sample]{slice_sample()}}. While they will not be deprecated in the near future,
retirement means that we will only perform critical bug fixes, so we recommend
moving to the newer alternative.

These functions were superseded because we realised it was more convenient to
have two mutually exclusive arguments to one function, rather than two
separate functions. This also made it to clean up a few other smaller
design issues with \code{sample_n()}/\code{sample_frac}:
\itemize{
\item The connection to \code{slice()} was not obvious.
\item The name of the first argument, \code{tbl}, is inconsistent with other
single table verbs which use \code{.data}.
\item The \code{size} argument uses tidy evaluation, which is surprising and
undocumented.
\item It was easier to remove the deprecated \code{.env} argument.
\item \code{...} was in a suboptimal position.
}
}
\examples{
data(pbmc_small)
pbmc_small |> sample_n(50)
pbmc_small |> sample_frac(0.1)

}


================================================
FILE: man/select.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{select}
\alias{select}
\alias{select.Seurat}
\title{Keep or drop columns using their names and types}
\usage{
\method{select}{Seurat}(.data, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> One or more unquoted
expressions separated by commas. Variable names can be used as if they
were positions in the data frame, so expressions like \code{x:y} can
be used to select a range of variables.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Rows are not affected.
\item Output columns are a subset of input columns, potentially with a different
order. Columns will be renamed if \code{new_name = old_name} form is used.
\item Data frame attributes are preserved.
\item Groups are maintained; you can't select off grouping variables.
}
}
\description{
Select (and optionally rename) variables in a data frame, using a concise
mini-language that makes it easy to refer to variables based on their name
(e.g. \code{a:f} selects all columns from \code{a} on the left to \code{f} on the
right) or type (e.g. \code{where(is.numeric)} selects all numeric columns).
\subsection{Overview of selection features}{

Tidyverse selections implement a dialect of R where operators make
it easy to select variables:
\itemize{
\item \code{:} for selecting a range of consecutive variables.
\item \code{!} for taking the complement of a set of variables.
\item \code{&} and \code{|} for selecting the intersection or the union of two
sets of variables.
\item \code{c()} for combining selections.
}

In addition, you can use \strong{selection helpers}. Some helpers select specific
columns:
\itemize{
\item \code{\link[tidyselect:everything]{everything()}}: Matches all variables.
\item \code{\link[tidyselect:everything]{last_col()}}: Select last variable, possibly with an offset.
\item \code{\link[dplyr:group_cols]{group_cols()}}: Select all grouping columns.
}

Other helpers select variables by matching patterns in their names:
\itemize{
\item \code{\link[tidyselect:starts_with]{starts_with()}}: Starts with a prefix.
\item \code{\link[tidyselect:starts_with]{ends_with()}}: Ends with a suffix.
\item \code{\link[tidyselect:starts_with]{contains()}}: Contains a literal string.
\item \code{\link[tidyselect:starts_with]{matches()}}: Matches a regular expression.
\item \code{\link[tidyselect:starts_with]{num_range()}}: Matches a numerical range like x01, x02, x03.
}

Or from variables stored in a character vector:
\itemize{
\item \code{\link[tidyselect:all_of]{all_of()}}: Matches variable names in a character vector. All
names must be present, otherwise an out-of-bounds error is
thrown.
\item \code{\link[tidyselect:all_of]{any_of()}}: Same as \code{all_of()}, except that no error is thrown
for names that don't exist.
}

Or using a predicate function:
\itemize{
\item \code{\link[tidyselect:where]{where()}}: Applies a function to all variables and selects those
for which the function returns \code{TRUE}.
}
}
}
\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("select")}.

}

\section{Examples}{


Here we show the usage for the basic selection operators. See the
specific help pages to learn about helpers like \code{\link[dplyr:starts_with]{starts_with()}}.

The selection language can be used in functions like
\code{dplyr::select()}. Let's first attach
the tidyverse:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{library(tidyverse)

# For better printing
iris <- as_tibble(iris)
}\if{html}{\out{</div>}}

Select variables by name:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars |> select(height)
#> # A tibble: 87 x 1
#>   height
#>    <int>
#> 1    172
#> 2    167
#> 3     96
#> 4    202
#> # i 83 more rows

iris |> select(Sepal.Length)
#> # A tibble: 150 x 1
#>   Sepal.Length
#>          <dbl>
#> 1          5.1
#> 2          4.9
#> 3          4.7
#> 4          4.6
#> # i 146 more rows
}\if{html}{\out{</div>}}

Select multiple variables by separating them with commas. Note how
the order of columns is determined by the order of inputs:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars |> select(homeworld, height, mass)
#> # A tibble: 87 x 3
#>   homeworld height  mass
#>   <chr>      <int> <dbl>
#> 1 Tatooine     172    77
#> 2 Tatooine     167    75
#> 3 Naboo         96    32
#> 4 Tatooine     202   136
#> # i 83 more rows

iris |> select(Sepal.Length, Petal.Length)
#> # A tibble: 150 x 2
#>   Sepal.Length Petal.Length
#>          <dbl>        <dbl>
#> 1          5.1          1.4
#> 2          4.9          1.4
#> 3          4.7          1.3
#> 4          4.6          1.5
#> # i 146 more rows
}\if{html}{\out{</div>}}

If you use a named vector to select columns, the output will have
its columns renamed:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{selection <- c(
  new_homeworld = "homeworld",
  new_height = "height",
  new_mass = "mass"
)
starwars |> select(all_of(selection))
#> # A tibble: 87 x 3
#>   new_homeworld new_height new_mass
#>   <chr>              <int>    <dbl>
#> 1 Tatooine             172       77
#> 2 Tatooine             167       75
#> 3 Naboo                 96       32
#> 4 Tatooine             202      136
#> # i 83 more rows
}\if{html}{\out{</div>}}
\subsection{Operators:}{

The \code{:} operator selects a range of consecutive variables:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars |> select(name:mass)
#> # A tibble: 87 x 3
#>   name           height  mass
#>   <chr>           <int> <dbl>
#> 1 Luke Skywalker    172    77
#> 2 C-3PO             167    75
#> 3 R2-D2              96    32
#> 4 Darth Vader       202   136
#> # i 83 more rows
}\if{html}{\out{</div>}}

The \code{!} operator negates a selection:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{starwars |> select(!(name:mass))
#> # A tibble: 87 x 11
#>   hair_color skin_color  eye_color birth_year sex   gender    homeworld species
#>   <chr>      <chr>       <chr>          <dbl> <chr> <chr>     <chr>     <chr>  
#> 1 blond      fair        blue            19   male  masculine Tatooine  Human  
#> 2 <NA>       gold        yellow         112   none  masculine Tatooine  Droid  
#> 3 <NA>       white, blue red             33   none  masculine Naboo     Droid  
#> 4 none       white       yellow          41.9 male  masculine Tatooine  Human  
#> # i 83 more rows
#> # i 3 more variables: films <list>, vehicles <list>, starships <list>

iris |> select(!c(Sepal.Length, Petal.Length))
#> # A tibble: 150 x 3
#>   Sepal.Width Petal.Width Species
#>         <dbl>       <dbl> <fct>  
#> 1         3.5         0.2 setosa 
#> 2         3           0.2 setosa 
#> 3         3.2         0.2 setosa 
#> 4         3.1         0.2 setosa 
#> # i 146 more rows

iris |> select(!ends_with("Width"))
#> # A tibble: 150 x 3
#>   Sepal.Length Petal.Length Species
#>          <dbl>        <dbl> <fct>  
#> 1          5.1          1.4 setosa 
#> 2          4.9          1.4 setosa 
#> 3          4.7          1.3 setosa 
#> 4          4.6          1.5 setosa 
#> # i 146 more rows
}\if{html}{\out{</div>}}

\code{&} and \code{|} take the intersection or the union of two selections:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{iris |> select(starts_with("Petal") & ends_with("Width"))
#> # A tibble: 150 x 1
#>   Petal.Width
#>         <dbl>
#> 1         0.2
#> 2         0.2
#> 3         0.2
#> 4         0.2
#> # i 146 more rows

iris |> select(starts_with("Petal") | ends_with("Width"))
#> # A tibble: 150 x 3
#>   Petal.Length Petal.Width Sepal.Width
#>          <dbl>       <dbl>       <dbl>
#> 1          1.4         0.2         3.5
#> 2          1.4         0.2         3  
#> 3          1.3         0.2         3.2
#> 4          1.5         0.2         3.1
#> # i 146 more rows
}\if{html}{\out{</div>}}

To take the difference between two selections, combine the \code{&} and
\code{!} operators:

\if{html}{\out{<div class="sourceCode r">}}\preformatted{iris |> select(starts_with("Petal") & !ends_with("Width"))
#> # A tibble: 150 x 1
#>   Petal.Length
#>          <dbl>
#> 1          1.4
#> 2          1.4
#> 3          1.3
#> 4          1.5
#> # i 146 more rows
}\if{html}{\out{</div>}}
}

}

\examples{
data(pbmc_small)
pbmc_small |> select(cell, orig.ident)

}
\seealso{
Other single table verbs: 
\code{\link[dplyr]{arrange}()},
\code{\link[dplyr]{filter}()},
\code{\link[dplyr]{mutate}()},
\code{\link[dplyr]{reframe}()},
\code{\link[dplyr]{rename}()},
\code{\link[dplyr]{slice}()},
\code{\link[dplyr]{summarise}()}
}


================================================
FILE: man/separate.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidyr_methods.R
\name{separate}
\alias{separate}
\alias{separate.Seurat}
\title{Separate a character column into multiple columns with a regular
expression or numeric locations}
\usage{
\method{separate}{Seurat}(
  data,
  col,
  into,
  sep = "[^[:alnum:]]+",
  remove = TRUE,
  convert = FALSE,
  extra = "warn",
  fill = "warn",
  ...
)
}
\arguments{
\item{data}{A data frame.}

\item{col}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Column to expand.}

\item{into}{Names of new variables to create as character vector.
Use \code{NA} to omit the variable in the output.}

\item{sep}{Separator between columns.

If character, \code{sep} is interpreted as a regular expression. The default
value is a regular expression that matches any sequence of
non-alphanumeric values.

If numeric, \code{sep} is interpreted as character positions to split at. Positive
values start at 1 at the far-left of the string; negative value start at -1 at
the far-right of the string. The length of \code{sep} should be one less than
\code{into}.}

\item{remove}{If \code{TRUE}, remove input column from output data frame.}

\item{convert}{If \code{TRUE}, will run \code{\link[=type.convert]{type.convert()}} with
\code{as.is = TRUE} on new columns. This is useful if the component
columns are integer, numeric or logical.

NB: this will cause string \code{"NA"}s to be converted to \code{NA}s.}

\item{extra}{If \code{sep} is a character vector, this controls what
happens when there are too many pieces. There are three valid options:
\itemize{
\item \code{"warn"} (the default): emit a warning and drop extra values.
\item \code{"drop"}: drop any extra values without a warning.
\item \code{"merge"}: only splits at most \code{length(into)} times
}}

\item{fill}{If \code{sep} is a character vector, this controls what
happens when there are not enough pieces. There are three valid options:
\itemize{
\item \code{"warn"} (the default): emit a warning and fill from the right
\item \code{"right"}: fill with missing values on the right
\item \code{"left"}: fill with missing values on the left
}}

\item{...}{Additional arguments passed on to methods.}
}
\value{
`tidyseurat`
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#superseded}{\figure{lifecycle-superseded.svg}{options: alt='[Superseded]'}}}{\strong{[Superseded]}}

\code{separate()} has been superseded in favour of \code{\link[tidyr:separate_wider_position]{separate_wider_position()}}
and \code{\link[tidyr:separate_wider_delim]{separate_wider_delim()}} because the two functions make the two uses
more obvious, the API is more polished, and the handling of problems is
better. Superseded functions will not go away, but will only receive
critical bug fixes.

Given either a regular expression or a vector of character positions,
\code{separate()} turns a single character column into multiple columns.
}
\examples{
data(pbmc_small)
un <- pbmc_small |> unite("new_col", c(orig.ident, groups))
un |> separate(new_col, c("orig.ident", "groups"))

}
\seealso{
\code{\link[tidyr:unite]{unite()}}, the complement, \code{\link[tidyr:extract]{extract()}} which uses regular
expression capturing groups.
}


================================================
FILE: man/slice.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{slice}
\alias{slice}
\alias{slice.Seurat}
\alias{slice_head}
\alias{slice_tail}
\alias{slice_sample}
\alias{slice_min}
\alias{slice_max}
\alias{slice_sample.Seurat}
\alias{slice_head.Seurat}
\alias{slice_tail.Seurat}
\alias{slice_min.Seurat}
\alias{slice_max.Seurat}
\title{Subset rows using their positions}
\usage{
\method{slice}{Seurat}(.data, ..., .by = NULL, .preserve = FALSE)

\method{slice_sample}{Seurat}(
  .data,
  ...,
  n = NULL,
  prop = NULL,
  by = NULL,
  weight_by = NULL,
  replace = FALSE
)

\method{slice_head}{Seurat}(.data, ..., n, prop, by = NULL)

\method{slice_tail}{Seurat}(.data, ..., n, prop, by = NULL)

\method{slice_min}{Seurat}(
  .data,
  order_by,
  ...,
  n,
  prop,
  by = NULL,
  with_ties = TRUE,
  na_rm = FALSE
)

\method{slice_max}{Seurat}(
  .data,
  order_by,
  ...,
  n,
  prop,
  by = NULL,
  with_ties = TRUE,
  na_rm = FALSE
)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{For \code{slice()}: <\code{\link[rlang:args_data_masking]{data-masking}}>
Integer row values.

Provide either positive values to keep, or negative values to drop.
The values provided must be either all positive or all negative.
Indices beyond the number of rows in the input are silently ignored.

For \verb{slice_*()}, these arguments are passed on to methods.}

\item{.by, by}{<\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> Optionally, a selection of columns to
group by for just this operation, functioning as an alternative to \code{\link[dplyr:group_by]{group_by()}}. For
details and examples, see \link[dplyr:dplyr_by]{?dplyr_by}.}

\item{.preserve}{Relevant when the \code{.data} input is grouped. If \code{.preserve = FALSE} (the default), the grouping structure is recalculated based on the
resulting data, otherwise the grouping is kept as is.}

\item{n, prop}{Provide either \code{n}, the number of rows, or \code{prop}, the
proportion of rows to select. If neither are supplied, \code{n = 1} will be
used. If \code{n} is greater than the number of rows in the group
(or \code{prop > 1}), the result will be silently truncated to the group size.
\code{prop} will be rounded towards zero to generate an integer number of
rows.

A negative value of \code{n} or \code{prop} will be subtracted from the group
size. For example, \code{n = -2} with a group of 5 rows will select 5 - 2 = 3
rows; \code{prop = -0.25} with 8 rows will select 8 * (1 - 0.25) = 6 rows.}

\item{weight_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Sampling
weights. This must evaluate to a vector of non-negative numbers the same
length as the input. Weights are automatically standardised to sum to 1.
See the \code{Details} section for more technical details regarding these
weights.}

\item{replace}{Should sampling be performed with (\code{TRUE}) or without
(\code{FALSE}, the default) replacement.}

\item{order_by}{<\code{\link[rlang:args_data_masking]{data-masking}}> Variable or
function of variables to order by. To order by multiple variables, wrap
them in a data frame or tibble.}

\item{with_ties}{Should ties be kept together? The default, \code{TRUE},
may return more rows than you request. Use \code{FALSE} to ignore ties,
and return the first \code{n} rows.}

\item{na_rm}{Should missing values in \code{order_by} be removed from the result?
If \code{FALSE}, \code{NA} values are sorted to the end (like in \code{\link[dplyr:arrange]{arrange()}}), so
they will only be included if there are insufficient non-missing values to
reach \code{n}/\code{prop}.}
}
\value{
An object of the same type as \code{.data}. The output has the following
properties:
\itemize{
\item Each row may appear 0, 1, or many times in the output.
\item Columns are not modified.
\item Groups are not modified.
\item Data frame attributes are preserved.
}
}
\description{
\code{slice()} lets you index rows by their (integer) locations. It allows you
to select, remove, and duplicate rows. It is accompanied by a number of
helpers for common use cases:
\itemize{
\item \code{slice_head()} and \code{slice_tail()} select the first or last rows.
\item \code{slice_sample()} randomly selects rows.
\item \code{slice_min()} and \code{slice_max()} select rows with the smallest or largest
values of a variable.
}

If \code{.data} is a \link[dplyr]{grouped_df}, the operation will be performed on each group,
so that (e.g.) \code{slice_head(df, n = 5)} will select the first five rows in
each group.
}
\details{
Slice does not work with relational databases because they have no
intrinsic notion of row order. If you want to perform the equivalent
operation, use \code{\link[dplyr:filter]{filter()}} and \code{\link[dplyr:row_number]{row_number()}}.

For \code{slice_sample()}, note that the weights provided in \code{weight_by} are
passed through to the \code{prob} argument of \code{\link[base:sample]{base::sample.int()}}. This means
they cannot be used to reconstruct summary statistics from the underlying
population. See \href{https://stats.stackexchange.com/q/639211/}{this discussion}
for more details.
}
\section{Methods}{


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
}


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
}


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
}


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
}


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
}


These function are \strong{generic}s, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

Methods available in currently loaded packages:
\itemize{
\item \code{slice()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice")}.
\item \code{slice_head()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_head")}.
\item \code{slice_tail()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_tail")}.
\item \code{slice_min()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_min")}.
\item \code{slice_max()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_max")}.
\item \code{slice_sample()}: \Sexpr[stage=render,results=rd]{dplyr:::methods_rd("slice_sample")}.
}

}

\examples{
data(pbmc_small)
pbmc_small |> slice(1)

# Slice group-wise using .by
pbmc_small |> slice(1:2, .by=groups)


# slice_sample() allows you to random select with or without replacement
pbmc_small |> slice_sample(n=5)

# if using replacement, and duplicate cells are returned, a tibble will be
# returned because duplicate cells cannot exist in Seurat objects
pbmc_small |> slice_sample(n=1, replace=TRUE) # returns Seurat
pbmc_small |> slice_sample(n=100, replace=TRUE) # returns tibble

# weight by a variable
pbmc_small |> slice_sample(n=5, weight_by=nCount_RNA)

# sample by group
pbmc_small |> slice_sample(n=5, by=groups)

# sample using proportions
pbmc_small |> slice_sample(prop=0.10)


# First rows based on existing order
pbmc_small |> slice_head(n=5)


# Last rows based on existing order
pbmc_small |> slice_tail(n=5)


# Rows with minimum and maximum values of a metadata variable
pbmc_small |> slice_min(nFeature_RNA, n=5)

# slice_min() and slice_max() may return more rows than requested
# in the presence of ties.
pbmc_small |>  slice_min(nFeature_RNA, n=2)

# Use with_ties=FALSE to return exactly n matches
pbmc_small |> slice_min(nFeature_RNA, n=2, with_ties=FALSE)

# Or use additional variables to break the tie:
pbmc_small |> slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n=2)

# Use by for group-wise operations
pbmc_small |> slice_min(nFeature_RNA, n=5, by=groups)


# Rows with minimum and maximum values of a metadata variable
pbmc_small |> slice_max(nFeature_RNA, n=5)

}
\seealso{
Other single table verbs: 
\code{\link{arrange}()},
\code{\link{mutate}()},
\code{\link{rename}()},
\code{\link{summarise}()}
}
\concept{single table verbs}


================================================
FILE: man/summarise.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dplyr_methods.R
\name{summarise}
\alias{summarise}
\alias{summarise.Seurat}
\alias{summarize}
\alias{summarize.Seurat}
\title{Summarise each group down to one row}
\usage{
\method{summarise}{Seurat}(.data, ...)

\method{summarize}{Seurat}(.data, ...)
}
\arguments{
\item{.data}{A data frame, data frame extension (e.g. a tibble), or a
lazy data frame (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
more details.}

\item{...}{<\code{\link[rlang:args_data_masking]{data-masking}}> Name-value pairs of
summary functions. The name will be the name of the variable in the result.

The value can be:
\itemize{
\item A vector of length 1, e.g. \code{min(x)}, \code{n()}, or \code{sum(is.na(y))}.
\item A data frame with 1 row, to add multiple columns from a single expression.
}}
}
\value{
An object \emph{usually} of the same type as \code{.data}.
\itemize{
\item The rows come from the underlying \code{\link[dplyr:group_keys]{group_keys()}}.
\item The columns are a combination of the grouping keys and the summary
expressions that you provide.
\item The grouping structure is controlled by the \verb{.groups=} argument, the
output may be another \link[dplyr]{grouped_df}, a \link[dplyr]{tibble} or a \link[dplyr]{rowwise} data frame.
\item Data frame attributes are \strong{not} preserved, because \code{summarise()}
fundamentally creates a new data frame.
}
}
\description{
\code{summarise()} creates a new data frame. It returns one row for each
combination of grouping variables; if there are no grouping variables, the
output will have a single row summarising all observations in the input. It
will contain one column for each grouping variable and one column for each of
the summary statistics that you have specified.

\code{summarise()} and \code{summarize()} are synonyms.
}
\section{Useful functions}{


\itemize{
\item Center: \code{\link[=mean]{mean()}}, \code{\link[=median]{median()}}
\item Spread: \code{\link[=sd]{sd()}}, \code{\link[=IQR]{IQR()}}, \code{\link[=mad]{mad()}}
\item Range: \code{\link[=min]{min()}}, \code{\link[=max]{max()}},
\item Position: \code{\link[dplyr:first]{first()}}, \code{\link[dplyr:last]{last()}}, \code{\link[dplyr:nth]{nth()}},
\item Count: \code{\link[dplyr:n]{n()}}, \code{\link[dplyr:n_distinct]{n_distinct()}}
\item Logical: \code{\link[=any]{any()}}, \code{\link[=all]{all()}}
}

}

\section{Backend variations}{


The data frame backend supports creating a variable and using it in the
same summary. This means that previously created summary variables can be
further transformed or combined within the summary, as in \code{\link[dplyr:mutate]{mutate()}}.
However, it also means that summary variables with the same names as previous
variables overwrite them, making those variables unavailable to later summary
variables.

This behaviour may not be supported in other backends. To avoid unexpected
results, consider using new names for your summary variables, especially when
creating multiple summaries.

}

\section{Methods}{


This function is a \strong{generic}, which means that packages can provide
implementations (methods) for other classes. See the documentation of
individual methods for extra arguments and differences in behaviour.

The following methods are currently available in loaded packages:
\Sexpr[stage=render,results=rd]{dplyr:::methods_rd("summarise")}.

}

\examples{
data(pbmc_small)
pbmc_small |> summarise(mean(nCount_RNA))

}
\seealso{
Other single table verbs: 
\code{\link{arrange}()},
\code{\link{mutate}()},
\code{\link{rename}()},
\code{\link{slice}()}
}
\concept{single table verbs}


================================================
FILE: man/tbl_format_header.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/print_method.R
\name{tbl_format_header}
\alias{tbl_format_header}
\alias{tbl_format_header.tidySeurat}
\title{Format the header of a tibble}
\usage{
\method{tbl_format_header}{tidySeurat}(x, setup, ...)
}
\arguments{
\item{x}{A tibble-like object.}

\item{setup}{A setup object returned from \code{\link[pillar:tbl_format_setup]{tbl_format_setup()}}.}

\item{...}{These dots are for future extensions and must be empty.}
}
\value{
A character vector.
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}

For easier customization, the formatting of a tibble is split
into three components: header, body, and footer.
The \code{tbl_format_header()} method is responsible for formatting the header
of a tibble.

Override this method if you need to change the appearance
of the entire header.
If you only need to change or extend the components shown in the header,
override or extend \code{\link[pillar:tbl_sum]{tbl_sum()}} for your class which is called by the
default method.
}
\examples{
# TODO

}


================================================
FILE: man/tidy.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{tidy}
\alias{tidy}
\alias{tidy.Seurat}
\title{tidy for Seurat objects}
\usage{
\method{tidy}{Seurat}(x, ...)
}
\arguments{
\item{x}{A Seurat object}

\item{...}{Additional arguments (not used)}
}
\value{
A tidyseurat object
}
\description{
tidy for Seurat objects
}


================================================
FILE: man/unite.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidyr_methods.R
\name{unite}
\alias{unite}
\alias{unite.Seurat}
\title{Unite multiple columns into one by pasting strings together}
\usage{
\method{unite}{Seurat}(data, col, ..., sep = "_", remove = TRUE, na.rm = FALSE)
}
\arguments{
\item{data}{A data frame.}

\item{col}{The name of the new column, as a string or symbol.

This argument is passed by expression and supports
\link[rlang:topic-inject]{quasiquotation} (you can unquote strings
and symbols). The name is captured from the expression with
\code{\link[rlang:defusing-advanced]{rlang::ensym()}} (note that this kind of interface where
symbols do not represent actual objects is now discouraged in the
tidyverse; we support it here for backward compatibility).}

\item{...}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> Columns to unite}

\item{sep}{Separator to use between values.}

\item{remove}{If \code{TRUE}, remove input columns from output data frame.}

\item{na.rm}{If \code{TRUE}, missing values will be removed prior to uniting
each value.}
}
\value{
`tidyseurat`
}
\description{
Convenience function to paste together multiple columns into one.
}
\examples{
data(pbmc_small)
pbmc_small |> unite(
  col="new_col", 
  c("orig.ident", "groups"))
    
}
\seealso{
\code{\link[tidyr:separate]{separate()}}, the complement.
}


================================================
FILE: man/unnest.Rd
================================================
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tidyr_methods.R
\name{unnest}
\alias{unnest}
\alias{unnest.tidyseurat_nested}
\alias{unnest_seurat}
\title{Unnest a list-column of data frames into rows and columns}
\usage{
\method{unnest}{tidyseurat_nested}(
  data,
  cols,
  ...,
  keep_empty = FALSE,
  ptype = NULL,
  names_sep = NULL,
  names_repair = "check_unique",
  .drop,
  .id,
  .sep,
  .preserve
)

unnest_seurat(
  data,
  cols,
  ...,
  keep_empty = FALSE,
  ptype = NULL,
  names_sep = NULL,
  names_repair = "check_unique",
  .drop,
  .id,
  .sep,
  .preserve
)
}
\arguments{
\item{data}{A data frame.}

\item{cols}{<\code{\link[tidyr:tidyr_tidy_select]{tidy-select}}> List-columns to unnest.

When selecting multiple columns, values from the same row will be recycled
to their common size.}

\item{...}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
previously you could write \code{df |> unnest(x, y, z)}.
Convert to \code{df |> unnest(c(x, y, z))}. If you previously created a new
variable in \code{unnest()} you'll now need to do it explicitly with \code{mutate()}.
Convert \code{df |> unnest(y = fun(x, y, z))}
to \code{df |> mutate(y = fun(x, y, z)) |> unnest(y)}.}

\item{keep_empty}{By default, you get one row of output for each element
of the list that you are unchopping/unnesting. This means that if there's a
size-0 element (like \code{NULL} or an empty data frame or vector), then that
entire row will be dropped from the output. If you want to preserve all
rows, use \code{keep_empty = TRUE} to replace size-0 elements with a single row
of missing values.}

\item{ptype}{Optionally, a named list of column name-prototype pairs to
coerce \code{cols} to, overriding the default that will be guessed from
combining the individual values. Alternatively, a single empty ptype
can be supplied, which will be applied to all \code{cols}.}

\item{names_sep}{If \code{NULL}, the default, the outer names will come from the
inner names. If a string, the outer names will be formed by pasting
together the outer and the inner column names, separated by \code{names_sep}.}

\item{names_repair}{Used to check that output data frame has valid
names. Must be one of the following options:
\itemize{
\item \verb{"minimal}": no name repair or checks, beyond basic existence,
\item \verb{"unique}": make sure names are unique and not empty,
\item \verb{"check_unique}": (the default), no name repair, but check they are unique,
\item \verb{"universal}": make the names unique and syntactic
\item a function: apply custom name repair.
\item \link[tidyr]{tidyr_legacy}: use the name repair from tidyr 0.8.
\item a formula: a purrr-style anonymous function (see \code{\link[rlang:as_function]{rlang::as_function()}})
}

See \code{\link[vctrs:vec_as_names]{vctrs::vec_as_names()}} for more details on these terms and the
strategies used to enforce them.}

\item{.drop, .preserve}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
all list-columns are now preserved; If there are any that you
don't want in the output use \code{select()} to remove them prior to
unnesting.}

\item{.id}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
convert \code{df |> unnest(x, .id = "id")} to \verb{df |> mutate(id = names(x)) |> unnest(x))}.}

\item{.sep}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}}:
use \code{names_sep} instead.}
}
\value{
`tidyseurat`
}
\description{
Unnest expands a list-column containing data frames into rows and columns.
}
\section{New syntax}{


tidyr 1.0.0 introduced a new syntax for \code{nest()} and \code{unnest()} that's
designed to be more similar to other functions. Converting to the new syntax
should be straightforward (guided by the message you'll receive) but if
you just need to run an old analysis, you can easily revert to the previous
behaviour using \code{\link[tidyr:nest_legacy]{nest_legacy()}} and \code{\link[tidyr:unnest_legacy]{unnest_legacy()}} as follows:

\if{html}{\out{<div class="sourceCode">}}\preformatted{library(tidyr)
nest <- nest_legacy
unnest <- unnest_legacy
}\if{html}{\out{</div>}}

}

\examples{
data(pbmc_small)
pbmc_small |> 
    nest(data=-groups) |> 
    unnest(data)

}
\seealso{
Other rectangling: 
\code{\link[tidyr]{hoist}()},
\code{\link[tidyr]{unnest_longer}()},
\code{\link[tidyr]{unnest_wider}()}
}


================================================
FILE: tests/testthat/test-dplyr.R
================================================
context('dplyr test')

library(Seurat)
data("pbmc_small")
set.seed(42)

test_that("arrange", {
  
  pbmc_small |> 
    arrange(nFeature_RNA) |> 
    expect_warning(regexp = "`arrange\\(\\)` was deprecated in tidyseurat .*")
  
  # pbmc_small_pca_arranged <- pbmc_small |> arrange(nFeature_RNA) |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA()
  # pbmc_small_pca <- pbmc_small |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA()
  # expect_equal(
  #   Seurat::VariableFeatures(pbmc_small_pca_arranged),
  #   Seurat::VariableFeatures(pbmc_small_pca)
  # )
  
  # # Failing only for ATLAS CRAN, but succeding for the rest
  # expect_equal(
  #   pbmc_small_pca_arranged[["pca"]]@cell.embeddings,
  #   pbmc_small_pca[["pca"]]@cell.embeddings,
  #   tolerance=0.1
  # )
  # expect_equal(
  #   pbmc_small_pca_arranged |> as_tibble() |>dplyr::slice_head(n = 1),
  #   pbmc_small_pca |> as_tibble() |> dplyr::slice_min(nFeature_RNA, n = 1)
  # )

})

test_that("bind_cols", {
  pbmc_small_bind <- pbmc_small |> select(nCount_RNA, nFeature_RNA)
  pbmc_small |>
    ttservice::bind_cols(pbmc_small_bind) |>
    select(nCount_RNA...2, nFeature_RNA...3) |>
    ncol() |>
    expect_equal(2)
})

test_that("distinct", {
  expect_equal(pbmc_small |> distinct(groups) |> ncol(), 1)
})

test_that("filter", {
  expect_equal(
    pbmc_small |> filter(groups == "g1") |> ncol(),
    sum(pbmc_small[[]]$groups == "g1")
  )
})

test_that("group_by", {
  expect_equal(
    pbmc_small |> group_by(groups) |> nrow(),
    nrow(pbmc_small[[]])
  )
})

test_that("summarise", {
  expect_equal(pbmc_small |> summarise(mean(nCount_RNA)) |> nrow(), 1)
})

test_that("mutate", {
  expect_equal(pbmc_small |> mutate(nFeature_RNA = 1) |> distinct(nFeature_RNA) |> nrow(), 1)
})

test_that("rename", {
  expect_equal(pbmc_small |> rename(s_score = nFeature_RNA) |> select(s_score) |> ncol(), 1)
})

test_that("left_join", {
  expect_equal(
    pbmc_small |> left_join(pbmc_small |> distinct(groups) |> mutate(new_column = 1:2) |> slice(1)) |> ncol(),
    nrow(pbmc_small[[]])
  )
})

test_that("inner_join", {
  expect_equal(
    pbmc_small |> inner_join(pbmc_small |> distinct(groups) |> mutate(new_column = 1:2) |> slice(1)) |> ncol(),
    sum(pbmc_small[[]]$groups == "g2")
  )
})

test_that("right_join", {
  expect_equal(
    pbmc_small |> right_join(pbmc_small |> distinct(groups) |> mutate(new_column = 1:2) |> slice(1)) |> ncol(),
    sum(pbmc_small[[]]$groups == "g2")
  )
})

test_that("full_join", {
  expect_equal(
    pbmc_small |> full_join(tibble::tibble(groups = "g1", other = 1:4)) |> nrow(),
    sum(pbmc_small[[]]$groups == "g1") * 4 + sum(pbmc_small[[]]$groups == "g2")
  )
})

test_that("slice", {
  expect_equal(pbmc_small |> slice(1) |> ncol(), 1)
  expect_equal(
    pbmc_small |> slice(1:6) |> colnames(),
    colnames(pbmc_small) |> head(6))
})

test_that("sample_n", {
  expect_equal(pbmc_small |> sample_n(50) |> ncol(), 50)
  expect_equal(
    pbmc_small |> sample_n(500, replace = TRUE) |> ncol(),
    pbmc_small |> as_tibble() |> ncol()
  )
})

test_that("slice_sample", {
  pbmc_small |>
    slice_sample(n = 50) |>
    ncol() |>
    expect_equal(50)
})

test_that("slice_head", {
  pbmc_small |>
    slice_head(n = 50) |>
    ncol() |>
    expect_equal(50)
  expect_equal(
    colnames(pbmc_small) |> head(n = 50),
    pbmc_small |> slice_head(n = 50) |> colnames()
  )
})

test_that("slice_tail", {
  pbmc_small |>
    slice_tail(n = 50) |>
    ncol() |>
    expect_equal(50)
  expect_equal(
    colnames(pbmc_small) |> tail(n = 50),
    pbmc_small |> slice_tail(n = 50) |> colnames()
  )
})

test_that("slice_min", {
  pbmc_small |>
    slice_min(nFeature_RNA, n = 5) |>
    ncol() |>
    expect_equal(5)
  
  # Arrange is deprecated
  # expect_equal(
  #   pbmc_small |> as_tibble() |> arrange(nFeature_RNA) |> head(n = 5) %>% pull(.cell),
  #   pbmc_small |> slice_min(nFeature_RNA, n = 5) |> colnames()
  # )
})

test_that("slice_max", {
  pbmc_small |>
    slice_max(nFeature_RNA, n = 5) |>
    ncol() |>
    expect_equal(5)
  
  # Arrange is deprecated
  # expect_equal(
  #   pbmc_small |> as_tibble() |> arrange(desc(nFeature_RNA)) |> head(n = 5) %>% pull(.cell),
  #   pbmc_small |> slice_max(nFeature_RNA, n = 5) |> colnames()
  # )
})

test_that("slice_min slice_max tibble input for order_by", {
  pbmc_small |>
    slice_min(tibble::tibble(nFeature_RNA, nCount_RNA), n = 5) |>
    ncol() |>
    expect_equal(5)
  pbmc_small |>
    slice_max(tibble::tibble(nFeature_RNA, nCount_RNA), n = 5) |>
    ncol() |>
    expect_equal(5)
})

test_that("select", {
  expect_equal(pbmc_small |> select(cell, orig.ident) |> class() |> as.character(), "Seurat")
  expect_equal(pbmc_small |> select(orig.ident) |> class() |> as.character() |> purrr::pluck(1), "tbl_df")
})

test_that("sample_frac", {
  expect_equal(
    pbmc_small |> sample_frac(0.1) |> ncol(),
    nrow(pbmc_small[[]]) * 0.1
  )
  expect_equal(
    pbmc_small |> sample_frac(10, replace = TRUE) |> ncol(),
    pbmc_small |> as_tibble() |> ncol()
  )
})

test_that("count", {
  expect_equal(
    pbmc_small |> count(groups) |> nrow(),
    pbmc_small[[]]$groups |> unique() |> length()
  )
})

test_that("add_count", {
  expect_equal(
    pbmc_small |> add_count(groups) |> nrow(),
    pbmc_small |> rownames() |> length()
  )
})

test_that("rowwise", {
  expect_equal(
    pbmc_small |> rowwise() |> mutate(m = mean(c(nCount_RNA, nFeature_RNA))) |> purrr::pluck("m", 1),
    ((pbmc_small[, 1]$nCount_RNA + pbmc_small[, 1]$nFeature_RNA) / 2) |> unname()
  )
})

test_that("group_split() works for one variable", {
  fd <- pbmc_small |> 
    group_split(groups)
  expect_equal(length(fd), length(unique(pbmc_small$groups)))
})

test_that("group_split() works for combination of variables", {
  fd <- pbmc_small |> 
    group_split(groups, letter.idents)
  expect_equal(length(fd), length(unique(pbmc_small$groups)) *
                 length(unique(pbmc_small$letter.idents)))
})

test_that("group_split() works for one logical statement", {
  fd_log <- pbmc_small |> 
    group_split(groups=="g1")
  fd_var <- pbmc_small |> 
    group_split(groups=="g1")
  expect_equal(lapply(fd_var, count), lapply(fd_log, count))
})

test_that("group_split() works for two logical statements", {
  fd <- pbmc_small |>
    group_split(PC_1>0 & groups=="g1")
  fd_counts <- lapply(fd, count)
  expect_equal(c(fd_counts[[1]], fd_counts[[2]], use.names = FALSE), 
               list(75, 5))
})


================================================
FILE: tests/testthat/test-ggplotly_methods.R
================================================
context('ggplot test')

data("pbmc_small")

df <- pbmc_small
df$number <- rnorm(ncol(df))
df$factor <- sample(gl(3, 1, ncol(df)))

test_that("ggplot", {
  # cell metadata
  p <- ggplot(df, aes(factor, number)) 
  expect_silent(show(p))
  expect_s3_class(p, "ggplot")
  # assay data
  g <- sample(rownames(df), 1)
  fd <- join_features(df, g, shape="wide")
  p <- ggplot(fd, aes(factor, .data[[g]]))
  expect_silent(show(p))
  expect_s3_class(p, "ggplot")
  # reduced dimensions
  p <- ggplot(df, aes(PC_1, PC_2, col=factor))
  expect_silent(show(p))
  expect_s3_class(p, "ggplot")
})

test_that("plotly", {
  # cell metadata
  p <- plot_ly(df, x=~factor, y=~number, type="violin") 
  expect_silent(show(p))
  expect_s3_class(p, "plotly")
  # assay data
  g <- sample(rownames(df), 1)
  fd <- join_features(df, g, shape="wide")
  p <- plot_ly(fd, x=~factor, y=g, type="violin") 
  expect_silent(show(p))
  expect_s3_class(p, "plotly")
  # reduced dimensions
  p <- plot_ly(fd, x=~PC_1, y=~PC_2, type="scatter", mode="markers") 
  expect_silent(show(p))
  expect_s3_class(p, "plotly")
})


================================================
FILE: tests/testthat/test-methods.R
================================================
context('methods test')

data("pbmc_small")

test_that("join_features_long", {
  pbmc_small |> 
    join_features("CD3D", shape="long") |> 
    slice(1) |>
    pull(.abundance_RNA) |>
    expect_equal(6.35, tolerance = 0.1)
})

test_that("join_features_wide", {
  pbmc_small |> 
    join_features("CD3D", shape="wide") |> 
    slice(1) |>
    pull(CD3D) |>
    expect_equal(6.35, tolerance = 0.1)
})

test_that("join_features_default_wide", {
  pbmc_small |> 
    join_features("CD3D") |> 
    slice(1) |>
    pull(CD3D) |>
    expect_equal(6.35, tolerance = 0.1)
})

test_that("aggregate_cells() returns expected values", {
  # Create pseudo-bulk object for testing
  pbmc_pseudo_bulk <-
    pbmc_small |>
    aggregate_cells(c(groups, letter.idents), assays = "RNA")
  
  # Check row length is unchanged
  pbmc_pseudo_bulk |>
    distinct(.feature) |> 
    nrow() |>
    expect_equal(pbmc_small |> nrow())
  
  # Check column length is correctly modified
  pbmc_pseudo_bulk |> 
    distinct(.sample) |> 
    nrow() |>
    expect_equal(pbmc_small |>
                   as_tibble() |>
                   select(groups, letter.idents) |>
                   unique() |>
                   nrow()
    )
  # Spot check for correctly aggregated count value of ACAP1 gene
  pbmc_pseudo_bulk |> 
    filter(.feature == "ACAP1" & .sample == "g1___A") |> 
    select(RNA) |> 
    as.numeric() |> 
    expect_equal(
      Seurat::DietSeurat(pbmc_small, assays = "RNA", features = "ACAP1")[, pbmc_small |>
                                  as_tibble() |>
                                  filter(groups == "g1", letter.idents == "A") |>
                                  pull(.cell)] |>
        LayerData() |> 
        sum())
  
  # Aggregate with tidyselect
  pbmc_small |>
    aggregate_cells(c(any_of("groups"), letter.idents), assays = "RNA") |> 
    expect_no_error()
})

test_that("get_abundance_sc_wide", {
  expect_equal(
    pbmc_small |> get_abundance_sc_wide() |> nrow(),
    pbmc_small[[]] |> nrow()
  )
  expect_equal(
    pbmc_small |> get_abundance_sc_wide() |> pull("S100A9") |> sum(),
    pbmc_small |> FetchData("S100A9") |> sum(), 
    tolerance = 0.1
 )
})

test_that("get_abundance_sc_long", {
  expect_equal(pbmc_small |> get_abundance_sc_long() |> ncol(), 3)
  expect_equal(
    pbmc_small |> get_abundance_sc_long() |> filter(.feature == "S100A9") |> pull(".abundance_RNA") |> sum(),
    pbmc_small |> FetchData("S100A9") |> sum(),
    tolerance = 0.1
  )
})


================================================
FILE: tests/testthat/test-pillar.R
================================================
context('pillar test')

test_string <- "A small string to test the function of pillar utilities."

test_that("pillar___format_comment", {
  test_string |>
    pillar___format_comment(width = 20) |>
    stringr::str_count("# ") |> 
    expect_equal(5)
})

test_that("pillar___strwrap2", {
  test_string |>
    pillar___strwrap2(width = 20, indent = 4) |>
    stringr::str_count("      ") |> 
    expect_equal(c(0, 1, 1, 1, 1))
})

test_that("pillar___wrap", {
  test_string |>
    pillar___wrap(width = 20) |>
    stringr::str_count("\n") |> 
    expect_equal(3)
})

================================================
FILE: tests/testthat/test-print.R
================================================
context('print test')

data("pbmc_small")

test_that("print", {
  text <- capture.output(print(pbmc_small))
  expect_equal(grep("Seurat-tibble abstraction", text), 1)
  i <- grep(str <- ".*Features=([0-9]+).*", text)
  expect_equal(gsub(str, "\\1", text[i]), paste(nrow(pbmc_small)))
  i <- grep(str <- ".*Cells=([0-9]+).*", text)
  expect_equal(gsub(str, "\\1", text[i]), paste(ncol(pbmc_small)))
})

test_that("glimpse", {
  text <- capture.output(glimpse(pbmc_small))
  expect_equal(length(text), 37)
})


================================================
FILE: tests/testthat/test-tidyr.R
================================================
context('tidyr test')

data("pbmc_small")
tt <- GetAssayData(pbmc_small, layer = 'counts', assay = "RNA") |> CreateSeuratObject() |> mutate(groups = sprintf("g%s", rep(1:2, dplyr::n()/2)))

test_that("nest_unnest", {
  col_names <- colnames(tt[[]]) |> c("cell")
  x <- tt |> nest(data = -groups) |> unnest(data) |> Seurat::NormalizeData() |>  Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA()
  y <- tt |> Seurat::NormalizeData() |> Seurat::ScaleData() |> Seurat::FindVariableFeatures() |> Seurat::RunPCA()
  expect_equal(
    x[["pca"]]@cell.embeddings |> as_tibble(rownames = "cell") |> arrange(cell) |> pull(PC_1),
    y[["pca"]]@cell.embeddings |> as_tibble(rownames = "cell") |> arrange(cell) |> pull(PC_1)
  )
})

test_that("fast_vs_slow_nest", {
  expect_identical(
    tt |> mutate(groups2 = groups) |> nest(data = -c(groups, groups2)) |> select(-groups2),
    tt |> nest(data = -groups) 
  )
})

test_that("nest_unnest_slice_1", {
  expect_equal(
    tt |> nest(data = -groups) |> slice(1) |> unnest(data) |> ncol(),
    sum(tt[[]]$groups == "g1")
  )
})
test_that("unite separate", {
  un <- tt |> unite("new_col", c(orig.ident, groups))
  se <- un |> separate(col = new_col, into = c("orig.ident", "groups"))
  expect_equal(un |> select(new_col) |> slice(1) |> pull(new_col), "SeuratProject_g1")
  expect_equal(se |> select(orig.ident) |> ncol(), 1)
})

test_that("extract", {
  expect_equal(
    tt |> extract(groups, into = "g", regex = "g([0-9])", convert = TRUE) |> pull(g) |> class(),
    "integer"
  )
})

test_that("pivot_longer", {
  expect_equal(
    tt |> pivot_longer(c(orig.ident, groups), names_to = "name", values_to = "value")  |> class() |> magrittr::extract2(1),
    "tbl_df"
  )
})


================================================
FILE: tests/testthat/test-utilities.R
================================================
context('utilities test')

data("pbmc_small")

test_that("get_special_column_name_symbol", {
  expect_equal(get_special_column_name_symbol(".cell")$symbol, rlang::sym(".cell"))
  expect_equal(get_special_column_name_symbol(".cell")$name, c(".cell"))
})

test_that("ping_old_special_column_into_metadata", {
  ping_old_special_column_into_metadata(pbmc_small) |>
    as_tibble() |>
    colnames() |>
    purrr::pluck(1) |>
    expect_equal("cell")
})


================================================
FILE: tests/testthat.R
================================================
library(testthat)
library(tidyseurat)

test_check("tidyseurat")


================================================
FILE: vignettes/figures_article.Rmd
================================================
---
title: "Code for producing the figures in the article"
author: "Stefano Mangiola"
date: "`r Sys.Date()`"
package: tidyseurat
output:
  html_vignette:
    toc_float: true
vignette: >
  %\VignetteEngine{knitr::knitr}
  %\VignetteIndexEntry{Code for producing the figures in the article}
  %\usepackage[UTF-8]{inputenc}
---

<!-- badges: start -->
[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html)
<!-- badges: end -->

```{r include=FALSE}
# Set path to plotly screenshot. We don't run the plotly code chunk as most servers do not have javascript libraries needed for interactive plotting
screenshot <- "../man/figures/plotly.png"

# The chunk below uses Rmd in man/fragments to avoid duplication, as the content is shared with the vignette and README. As suggested here: https://www.garrickadenbuie.com/blog/dry-vignette-and-readme/

visual_cue <- "../man/figures/logo_interaction-01.png"

```

```{r eval=FALSE}
# Article workflow

library(tidyverse)
library(Seurat)
library(SingleR)
library(plotly)
library(tidyHeatmap)
library(ggalluvial)
library(ggplot2)
library(tidyseurat)
options(future.globals.maxSize = 50068 * 1024^2)

# Use colourblind-friendly colours
friendly_cols <- dittoSeq::dittoColors()

# Set theme
custom_theme <-
  list(
    scale_fill_manual(values = friendly_cols),
    scale_color_manual(values = friendly_cols),
    theme_bw() +
      theme(
        panel.border = element_blank(),
        axis.line = element_line(),
        panel.grid.major = element_line(size = 0.2),
        panel.grid.minor = element_line(size = 0.1),
        text = element_text(size = 9),
        legend.position = "bottom",
        strip.background = element_blank(),
        axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
        axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1)
      )
  )

PBMC_clean_scaled_UMAP_cluster_cell_type <- readRDS("dev/PBMC_clean_scaled_UMAP_cluster_cell_type.rds")
```

```{r eval=FALSE}
p1 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  pivot_longer(
    c(mito.fraction, S.Score, G2M.Score), 
    names_to="property", 
    values_to="Value"
  ) %>%
  mutate(property =  factor(property, levels = c("mito.fraction", "G2M.Score", "S.Score"))) %>%
  ggplot(aes(sample, Value)) + 
  geom_boxplot(outlier.size = 0.5 ) + 
  facet_wrap(~property, scales = "free_y" ) +
  custom_theme +
  theme(aspect.ratio=1)
```

```{r eval=FALSE}
p2 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(20000) %>%
  ggplot(aes(UMAP_1, UMAP_2, color=seurat_clusters)) +
  geom_point(size=0.05, alpha=0.2) +
  custom_theme +
  theme(aspect.ratio=1)

PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(20000) %>%
  plot_ly(
    x = ~`UMAP_1`,
    y = ~`UMAP_2`,
    z = ~`UMAP_3`,
    color = ~seurat_clusters,
    colors = friendly_cols[1:24],sizes = 50, size = 1
  )

markers = readRDS("dev/PBMC_marker_df.rds")
```

```{r eval=FALSE}
p3 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  arrange(first.labels) %>%
  mutate(seurat_clusters = fct_inorder(seurat_clusters)) %>%
  join_features(features=c("CD3D", "HLA-DRB1")) %>%
  ggplot(aes(y=seurat_clusters , x=.abundance_SCT, fill=first.labels)) +
  geom_density_ridges(bandwidth = 0.2) +
  facet_wrap(~ .feature, nrow = 2) +
  coord_flip() +
  custom_theme
```

```{r eval=FALSE}
# Plot heatmap
p4 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(2000) %>%
  DoHeatmap(
    features = markers$gene,
    group.colors = friendly_cols
  )
```

```{r eval=FALSE}
p5 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  sample_n(1000) %>%
  join_features(features=markers$gene) %>%
  mutate(seurat_clusters = as.integer(seurat_clusters)) %>%
  filter(seurat_clusters<10) %>%
  group_by(seurat_clusters) %>%
  
  # Plot heatmap
  heatmap(
    .row = .feature,
    .column = .cell, 
    .value = .abundance_SCT, 
    palette_grouping = list(rep("black",9)), 
    palette_value = circlize::colorRamp2(c(-1.5, 0, 1.5), c("purple", "black", "yellow")),
    
    # ComplexHeatmap parameters
    row_gap = unit(0.1, "mm"), column_gap = unit(0.1, "mm")
  ) %>%
    
  # Add annotation
  add_tile(sample, palette = friendly_cols[1:7]) %>%
  add_point(PC_1) 
```

```{r eval=FALSE}
p6 = 
  PBMC_clean_scaled_UMAP_cluster_cell_type %>%
  unite("cluster_cell_type", c(first.labels, seurat_clusters), remove=FALSE) %>%
  pivot_longer(
    c(seurat_clusters, first.labels_single),
    names_to = "classification", values_to = "value"
  ) %>%
  
  ggplot(aes(x = classification, stratum = value, alluvium = cell,
           fill = first.labels, label = value)) +
  scale_x_discrete(expand = c(1, 1)) +
  geom_flow() +
  geom_stratum(alpha = .5) +
  # geom_text(stat = "stratum", size = 3) +
  geom_text_repel(stat = "stratum", size = 3,
             nudge_x      = 0.05,
             direction    = "y",
             angle        = 0,
             vjust        = 0,
             segment.size = 0.2
         ) +
  scale_fill_manual(values = friendly_cols) +
  #guides(fill = FALSE) +
  coord_flip() +
  theme_bw() +
  theme(
    panel.border = element_blank(),
    axis.line = element_line(),
    panel.grid.major = element_line(size = 0.2),
    panel.grid.minor = element_line(size = 0.1),
    text = element_text(size = 9),
    legend.position = "bottom",
    strip.background = element_blank(),
    axis.title.x = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
    axis.title.y = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
    axis.text.x = element_text(angle = 30, hjust = 1, vjust = 1)
  )
```


================================================
FILE: vignettes/introduction.Rmd
================================================
---
title: "Overview of the tidyseurat package"
author: "Stefano Mangiola"
date: "`r Sys.Date()`"
package: tidyseurat
output:
  html_vignette:
    toc_float: true
bibliography: tidyseurat.bib
vignette: >
  %\VignetteEngine{knitr::knitr}
  %\VignetteIndexEntry{Overview of the tidyseurat package}
  %\usepackage[UTF-8]{inputenc}
---

<!-- badges: start -->
[![Lifecycle:maturing](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://lifecycle.r-lib.org/articles/stages.html)
<!-- badges: end -->

```{r include=FALSE}
# Set path to plotly screenshot. We don't run the plotly code chunk as most servers do not have javascript libraries needed for interactive plotting
screenshot <- "../man/figures/plotly.png"

# The chunk below uses Rmd in man/fragments to avoid duplication, as the content is shared with the vignette and README. As suggested here: https://www.garrickadenbuie.com/blog/dry-vignette-and-readme/

visual_cue <- "../man/figures/logo_interaction-01.png"

```

```{r child="../man/fragments/intro.Rmd"}
```

# Session Info

```{r}
sessionInfo()
```


# References

================================================
FILE: vignettes/tidyseurat.bib
================================================
@article{butler2018integrating,
  title={Integrating single-cell transcriptomic data across different conditions, technologies, and species},
  author={Butler, Andrew and Hoffman, Paul and Smibert, Peter and Papalexi, Efthymia and Satija, Rahul},
  journal={Nature biotechnology},
  volume={36},
  number={5},
  pages={411--420},
  year={2018},
  publisher={Nature Publishing Group}
}

@article{stuart2019comprehensive,
  title={Comprehensive integration of single-cell data},
  author={Stuart, Tim and Butler, Andrew and Hoffman, Paul and Hafemeister, Christoph and Papalexi, Efthymia and Mauck III, William M and Hao, Yuhan and Stoeckius, Marlon and Smibert, Peter and Satija, Rahul},
  journal={Cell},
  volume={177},
  number={7},
  pages={1888--1902},
  year={2019},
  publisher={Elsevier}
}

@article{aran2019reference,
  title={Reference-based analysis of lung single-cell sequencing reveals a transitional profibrotic macrophage},
  author={Aran, Dvir and Looney, Agnieszka P and Liu, Leqian and Wu, Esther and Fong, Valerie and Hsu, Austin and Chak, Suzanna and Naikawadi, Ram P and Wolters, Paul J and Abate, Adam R and others},
  journal={Nature immunology},
  volume={20},
  number={2},
  pages={163--172},
  year={2019},
  publisher={Nature Publishing Group}
}

@article{cabello2020singlecellsignalr,
  title={SingleCellSignalR: inference of intercellular networks from single-cell transcriptomics},
  author={Cabello-Aguilar, Simon and Alame, M{\'e}lissa and Kon-Sun-Tack, Fabien and Fau, Caroline and Lacroix, Matthieu and Colinge, Jacques},
  journal={Nucleic acids research},
  volume={48},
  number={10},
  pages={e55--e55},
  year={2020},
  publisher={Oxford University Press}
}

@article{wickham2019welcome,
  title={Welcome to the Tidyverse},
  author={Wickham, Hadley and Averick, Mara and Bryan, Jennifer and Chang, Winston and McGowan, Lucy D'Agostino and Fran{\c{c}}ois, Romain and Grolemund, Garrett and Hayes, Alex and Henry, Lionel and Hester, Jim and others},
  journal={Journal of Open Source Software},
  volume={4},
  number={43},
  pages={1686},
  year={2019}
}