Full Code of campbio/celda for AI

master d9a104fb0959 cached

301 files

3.4 MB

915.5k tokens

51 symbols

1 requests

Download .txt

Showing preview only (3,659K chars total). Download the full file or copy to clipboard to get everything.

Repository: campbio/celda
Branch: master
Commit: d9a104fb0959
Files: 301
Total size: 3.4 MB

Directory structure:
gitextract_ojjfe_ko/

├── .Rbuildignore
├── .github/
│   ├── .gitignore
│   └── workflows/
│       ├── BioC-check.yaml
│       └── check-standard.yaml
├── .gitignore
├── CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── NOTICE
├── R/
│   ├── RcppExports.R
│   ├── aaa.R
│   ├── accessors.R
│   ├── celdaGridSearch.R
│   ├── celdaProbabilityMap.R
│   ├── celdaUMAP.R
│   ├── celda_C.R
│   ├── celda_CG.R
│   ├── celda_G.R
│   ├── celda_functions.R
│   ├── celda_heatmap.R
│   ├── celdatSNE.R
│   ├── celdatosce.R
│   ├── clusterProbability.R
│   ├── data.R
│   ├── decon.R
│   ├── elbow.R
│   ├── factorizeMatrix.R
│   ├── featureModuleLookup.R
│   ├── geneSetEnrich.R
│   ├── initialize_clusters.R
│   ├── loglikelihood.R
│   ├── matrixSums.R
│   ├── misc.R
│   ├── moduleHeatmap.R
│   ├── perplexity.R
│   ├── plotHeatmap.R
│   ├── plot_decontx.R
│   ├── plot_dr.R
│   ├── recursiveSplit.R
│   ├── reorderCelda.R
│   ├── reports.R
│   ├── selectFeatures.R
│   ├── semi_pheatmap.R
│   ├── simulateCells.R
│   ├── splitModule.R
│   ├── split_clusters.R
│   └── topRank.R
├── README.md
├── _pkgdown.yml
├── data/
│   ├── celdaCGGridSearchRes.rda
│   ├── celdaCGMod.rda
│   ├── celdaCGSim.rda
│   ├── celdaCMod.rda
│   ├── celdaCSim.rda
│   ├── celdaGMod.rda
│   ├── celdaGSim.rda
│   ├── contaminationSim.rda
│   ├── sampleCells.rda
│   ├── sceCeldaC.rda
│   ├── sceCeldaCG.rda
│   ├── sceCeldaCGGridSearch.rda
│   └── sceCeldaG.rda
├── docs/
│   ├── 404.html
│   ├── CONDUCT.html
│   ├── LICENSE-text.html
│   ├── articles/
│   │   ├── articles/
│   │   │   ├── celda_pbmc3k.html
│   │   │   ├── celda_pbmc3k_files/
│   │   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   │   └── empty-anchor.js
│   │   │   │   ├── header-attrs-2.7/
│   │   │   │   │   └── header-attrs.js
│   │   │   │   ├── kePrint-0.0.1/
│   │   │   │   │   └── kePrint.js
│   │   │   │   └── lightable-0.0.1/
│   │   │   │       └── lightable.css
│   │   │   ├── decontX_pbmc4k.html
│   │   │   ├── decontX_pbmc4k_files/
│   │   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   │   └── empty-anchor.js
│   │   │   │   └── header-attrs-2.7/
│   │   │   │       └── header-attrs.js
│   │   │   ├── installation.html
│   │   │   └── installation_files/
│   │   │       ├── accessible-code-block-0.0.1/
│   │   │       │   └── empty-anchor.js
│   │   │       └── header-attrs-2.7/
│   │   │           └── header-attrs.js
│   │   ├── celda.html
│   │   ├── celda_files/
│   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   └── empty-anchor.js
│   │   │   └── header-attrs-2.7/
│   │   │       └── header-attrs.js
│   │   ├── celda_pbmc3k.html
│   │   ├── celda_pbmc3k_files/
│   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   └── empty-anchor.js
│   │   │   ├── kePrint-0.0.1/
│   │   │   │   └── kePrint.js
│   │   │   └── lightable-0.0.1/
│   │   │       └── lightable.css
│   │   ├── decontX.html
│   │   ├── decontX_files/
│   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   └── empty-anchor.js
│   │   │   └── header-attrs-2.7/
│   │   │       └── header-attrs.js
│   │   ├── decontX_pbmc4k.html
│   │   ├── decontX_pbmc4k_files/
│   │   │   └── accessible-code-block-0.0.1/
│   │   │       └── empty-anchor.js
│   │   ├── index.html
│   │   ├── installation.html
│   │   └── installation_files/
│   │       └── accessible-code-block-0.0.1/
│   │           └── empty-anchor.js
│   ├── authors.html
│   ├── bootstrap-toc.css
│   ├── bootstrap-toc.js
│   ├── docsearch.css
│   ├── docsearch.js
│   ├── index.html
│   ├── news/
│   │   └── index.html
│   ├── pkgdown.css
│   ├── pkgdown.js
│   ├── pkgdown.yml
│   ├── reference/
│   │   ├── appendCeldaList.html
│   │   ├── availableModels.html
│   │   ├── bestLogLikelihood.html
│   │   ├── celda.html
│   │   ├── celdaCGGridSearchRes.html
│   │   ├── celdaCGMod.html
│   │   ├── celdaCGSim.html
│   │   ├── celdaCMod.html
│   │   ├── celdaCSim.html
│   │   ├── celdaClusters.html
│   │   ├── celdaGMod.html
│   │   ├── celdaGSim.html
│   │   ├── celdaGridSearch.html
│   │   ├── celdaHeatmap.html
│   │   ├── celdaModel.html
│   │   ├── celdaModules.html
│   │   ├── celdaPerplexity-celdaList-method.html
│   │   ├── celdaPerplexity.html
│   │   ├── celdaProbabilityMap.html
│   │   ├── celdaTsne.html
│   │   ├── celdaUmap.html
│   │   ├── celda_C.html
│   │   ├── celda_CG.html
│   │   ├── celda_G.html
│   │   ├── celdatosce.html
│   │   ├── clusterProbability.html
│   │   ├── compareCountMatrix.html
│   │   ├── contaminationSim.html
│   │   ├── countChecksum-celdaList-method.html
│   │   ├── countChecksum.html
│   │   ├── decontX.html
│   │   ├── decontXcounts.html
│   │   ├── distinctColors.html
│   │   ├── eigenMatMultInt.html
│   │   ├── eigenMatMultNumeric.html
│   │   ├── factorizeMatrix.html
│   │   ├── fastNormProp.html
│   │   ├── fastNormPropLog.html
│   │   ├── fastNormPropSqrt.html
│   │   ├── featureModuleLookup.html
│   │   ├── featureModuleTable.html
│   │   ├── geneSetEnrich.html
│   │   ├── index.html
│   │   ├── logLikelihood.html
│   │   ├── logLikelihoodHistory.html
│   │   ├── matrixNames.html
│   │   ├── moduleHeatmap.html
│   │   ├── nonzero.html
│   │   ├── normalizeCounts.html
│   │   ├── params.html
│   │   ├── perplexity.html
│   │   ├── plotCeldaViolin.html
│   │   ├── plotDecontXContamination.html
│   │   ├── plotDecontXMarkerExpression.html
│   │   ├── plotDecontXMarkerPercentage.html
│   │   ├── plotDimReduceCluster.html
│   │   ├── plotDimReduceFeature.html
│   │   ├── plotDimReduceGrid.html
│   │   ├── plotDimReduceModule.html
│   │   ├── plotGridSearchPerplexity.html
│   │   ├── plotHeatmap.html
│   │   ├── plotRPC.html
│   │   ├── recodeClusterY.html
│   │   ├── recodeClusterZ.html
│   │   ├── recursiveSplitCell.html
│   │   ├── recursiveSplitModule.html
│   │   ├── reorderCelda.html
│   │   ├── reportceldaCG.html
│   │   ├── resList.html
│   │   ├── resamplePerplexity.html
│   │   ├── retrieveFeatureIndex.html
│   │   ├── runParams.html
│   │   ├── sampleCells.html
│   │   ├── sampleLabel.html
│   │   ├── sceCeldaC.html
│   │   ├── sceCeldaCG.html
│   │   ├── sceCeldaCGGridSearch.html
│   │   ├── sceCeldaG.html
│   │   ├── selectBestModel.html
│   │   ├── selectFeatures.html
│   │   ├── semiPheatmap.html
│   │   ├── simulateCells.html
│   │   ├── simulateContamination.html
│   │   ├── splitModule.html
│   │   ├── subsetCeldaList.html
│   │   └── topRank.html
│   └── sitemap.xml
├── inst/
│   └── rmarkdown/
│       ├── CeldaCG_PlotResults.Rmd
│       └── CeldaCG_Run.Rmd
├── man/
│   ├── appendCeldaList.Rd
│   ├── availableModels.Rd
│   ├── bestLogLikelihood.Rd
│   ├── celda.Rd
│   ├── celdaCGGridSearchRes.Rd
│   ├── celdaCGMod.Rd
│   ├── celdaCGSim.Rd
│   ├── celdaCMod.Rd
│   ├── celdaCSim.Rd
│   ├── celdaClusters.Rd
│   ├── celdaGMod.Rd
│   ├── celdaGSim.Rd
│   ├── celdaGridSearch.Rd
│   ├── celdaHeatmap.Rd
│   ├── celdaModel.Rd
│   ├── celdaModules.Rd
│   ├── celdaPerplexity-celdaList-method.Rd
│   ├── celdaPerplexity.Rd
│   ├── celdaProbabilityMap.Rd
│   ├── celdaTsne.Rd
│   ├── celdaUmap.Rd
│   ├── celda_C.Rd
│   ├── celda_CG.Rd
│   ├── celda_G.Rd
│   ├── celdatosce.Rd
│   ├── clusterProbability.Rd
│   ├── compareCountMatrix.Rd
│   ├── contaminationSim.Rd
│   ├── countChecksum-celdaList-method.Rd
│   ├── countChecksum.Rd
│   ├── decontX.Rd
│   ├── decontXcounts.Rd
│   ├── distinctColors.Rd
│   ├── eigenMatMultInt.Rd
│   ├── eigenMatMultNumeric.Rd
│   ├── factorizeMatrix.Rd
│   ├── fastNormProp.Rd
│   ├── fastNormPropLog.Rd
│   ├── fastNormPropSqrt.Rd
│   ├── featureModuleLookup.Rd
│   ├── featureModuleTable.Rd
│   ├── geneSetEnrich.Rd
│   ├── logLikelihood.Rd
│   ├── logLikelihoodHistory.Rd
│   ├── matrixNames.Rd
│   ├── moduleHeatmap.Rd
│   ├── nonzero.Rd
│   ├── normalizeCounts.Rd
│   ├── params.Rd
│   ├── perplexity.Rd
│   ├── plotCeldaViolin.Rd
│   ├── plotDecontXContamination.Rd
│   ├── plotDecontXMarkerExpression.Rd
│   ├── plotDecontXMarkerPercentage.Rd
│   ├── plotDimReduceCluster.Rd
│   ├── plotDimReduceFeature.Rd
│   ├── plotDimReduceGrid.Rd
│   ├── plotDimReduceModule.Rd
│   ├── plotGridSearchPerplexity.Rd
│   ├── plotHeatmap.Rd
│   ├── plotRPC.Rd
│   ├── recodeClusterY.Rd
│   ├── recodeClusterZ.Rd
│   ├── recursiveSplitCell.Rd
│   ├── recursiveSplitModule.Rd
│   ├── reorderCelda.Rd
│   ├── reportceldaCG.Rd
│   ├── resList.Rd
│   ├── resamplePerplexity.Rd
│   ├── retrieveFeatureIndex.Rd
│   ├── runParams.Rd
│   ├── sampleCells.Rd
│   ├── sampleLabel.Rd
│   ├── sceCeldaC.Rd
│   ├── sceCeldaCG.Rd
│   ├── sceCeldaCGGridSearch.Rd
│   ├── sceCeldaG.Rd
│   ├── selectBestModel.Rd
│   ├── selectFeatures.Rd
│   ├── semiPheatmap.Rd
│   ├── simulateCells.Rd
│   ├── simulateContamination.Rd
│   ├── splitModule.Rd
│   ├── subsetCeldaList.Rd
│   └── topRank.Rd
├── src/
│   ├── DecontX.cpp
│   ├── Makevars
│   ├── Makevars.win
│   ├── RcppExports.cpp
│   ├── cG_calcGibbsProbY.cpp
│   ├── eigenMatMultInt.cpp
│   ├── matrixNorm.cpp
│   ├── matrixSums.c
│   ├── matrixSumsSparse.cpp
│   └── perplexity.c
├── tests/
│   ├── testthat/
│   │   ├── test-celda-functions.R
│   │   ├── test-celda_C.R
│   │   ├── test-celda_CG.R
│   │   ├── test-celda_G.R
│   │   ├── test-decon.R
│   │   ├── test-intialize_cluster.R
│   │   ├── test-matrixSums.R
│   │   └── test-with_seed.R
│   └── testthat.R
└── vignettes/
    ├── articles/
    │   ├── celda_pbmc3k.Rmd
    │   ├── decontX_pbmc4k.Rmd
    │   └── installation.Rmd
    ├── celda.Rmd
    └── decontX.Rmd

================================================
FILE CONTENTS
================================================

================================================
FILE: .Rbuildignore
================================================
^renv$
^renv\.lock$
^.*\.Rproj$
^\.Rproj\.user$
^CONDUCT\.md$
.travis.yml
NOTICE
_pkgdown.yml
^doc$
^Meta$
renv*

^_pkgdown\.yml$
^docs$
^pkgdown$
^\.github$


================================================
FILE: .github/.gitignore
================================================
*.html


================================================
FILE: .github/workflows/BioC-check.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
  push:
    branches: [devel, master]
  pull_request:
    branches: [devel, master]

name: BioC-check

jobs:
  R-CMD-check:
    runs-on: ${{ matrix.config.os }}

    name: ${{ matrix.config.os }} (${{ matrix.config.r }})

    strategy:
      fail-fast: false
      matrix:
        config:
          - {os: macOS-latest,   r: 'release'}

    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
      R_KEEP_PKG_SOURCE: yes

    steps:
      - uses: actions/checkout@v2

      - uses: r-lib/actions/setup-pandoc@v2

      - uses: r-lib/actions/setup-r@v2
        with:
          r-version: ${{ matrix.config.r }}
          http-user-agent: ${{ matrix.config.http-user-agent }}
          use-public-rspm: true

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: |
            any::rcmdcheck
            url::https://cran.r-project.org/src/contrib/Archive/dbplyr/dbplyr_2.3.4.tar.gz

      - name: Install XQuartz on macOS
        if: runner.os == 'macOS'
        run: brew install xquartz --cask

      - name: Install fftw3 on macOS
        if: runner.os == 'macOS'
        run: brew install fftw
          
      - name: Run BiocCheck
        run: |
          BiocManager::install("BiocCheck")
          library(BiocCheck)
          BiocCheck::BiocCheck(".",
              `quit-with-status` = TRUE,
              `no-check-R-ver` = TRUE,
              `no-check-bioc-help` = TRUE
          )
        shell: Rscript {0}
        


================================================
FILE: .github/workflows/check-standard.yaml
================================================
# Workflow derived from https://github.com/r-lib/actions/tree/master/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
  push:
    branches: [devel, master]
  pull_request:
    branches: [devel, master]

name: R-CMD-check

jobs:
  R-CMD-check:
    runs-on: ${{ matrix.config.os }}

    name: ${{ matrix.config.os }} (${{ matrix.config.r }})

    strategy:
      fail-fast: false
      matrix:
        config:
          - {os: macOS-latest,   r: 'release'}
          - {os: windows-latest, r: 'release'}
          - {os: ubuntu-latest,   r: 'release'}

    env:
      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
      R_KEEP_PKG_SOURCE: yes

    steps:
      - uses: actions/checkout@v2 

      - uses: r-lib/actions/setup-pandoc@v2

      - uses: r-lib/actions/setup-r@v2
        with:
          r-version: ${{ matrix.config.r }}
          http-user-agent: ${{ matrix.config.http-user-agent }}
          use-public-rspm: false

      - name: Install XQuartz on macOS
        if: runner.os == 'macOS'
        run: brew install xquartz --cask

      - name: Install fftw3 on macOS
        if: runner.os == 'macOS'
        run: brew install fftw

      - uses: r-lib/actions/setup-r-dependencies@v2
        with:
          extra-packages: |
            any::rcmdcheck
            any::tinytex
            url::https://cran.r-project.org/src/contrib/Archive/dbplyr/dbplyr_2.3.4.tar.gz

      - uses: r-lib/actions/setup-tinytex@v2
      - uses: r-lib/actions/check-r-package@v2

      - name: Show testthat output
        if: always()
        run: find check -name 'testthat.Rout*' -exec cat '{}' \; || true
        shell: bash

      - name: Upload check results
        if: failure()
        uses: actions/upload-artifact@main
        with:
          name: ${{ runner.os }}-r${{ matrix.config.r }}-results
          path: check


================================================
FILE: .gitignore
================================================
# History files
.Rhistory
.Rapp.history

# Session Data files
.RData
# Example code in package build process
*-Ex.R
# Output files from R CMD build
/*.tar.gz
# Output files from R CMD check
/*.Rcheck/
# RStudio files
.Rproj.user/
# produced vignettes
vignettes/*.html
vignettes/*.pdf
vignettes/*.log
inst/rmarkdown/*.html
inst/rmarkdown/*.rds
inst/rmarkdown/*.csv
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
# knitr and R markdown default cache directories
/*_cache/
/cache/
# Temporary files created by R markdown
*.utf8.md
*.knit.md
.Rproj.user
celda.Rproj
.DS_Store
src/*.o
src/*.dll
src/*.so
src-i386/*
src-x64/*
etc/*
# Celda log files with default prefix
Celda_chain.*log.txt
inst/doc
doc
Meta
.Rprofile
renv/
renv.lock


================================================
FILE: CONDUCT.md
================================================
# Contributor Code of Conduct

As contributors and maintainers of this project, we pledge to respect all people who 
contribute through reporting issues, posting feature requests, updating documentation,
submitting pull requests or patches, and other activities.

We are committed to making participation in this project a harassment-free experience for
everyone, regardless of level of experience, gender, gender identity and expression,
sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

Examples of unacceptable behavior by participants include the use of sexual language or
imagery, derogatory comments or personal attacks, trolling, public or private harassment,
insults, or other unprofessional conduct.

Project maintainers have the right and responsibility to remove, edit, or reject comments,
commits, code, wiki edits, issues, and other contributions that are not aligned to this 
Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed 
from the project team.

Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by 
opening an issue or contacting one or more of the project maintainers.

This Code of Conduct is adapted from the Contributor Covenant 
(http:contributor-covenant.org), version 1.0.0, available at 
http://contributor-covenant.org/version/1/0/0/


================================================
FILE: DESCRIPTION
================================================
Package: celda
Title: CEllular Latent Dirichlet Allocation
Version: 1.18.2
Authors@R: c(person("Joshua", "Campbell", email = "camp@bu.edu",
    role = c("aut", "cre")),
    person("Shiyi", "Yang", email="syyang@bu.edu", role = c("aut")),
    person("Zhe", "Wang", email="zhe@bu.edu", role = c("aut")),
    person("Sean", "Corbett", email = "scorbett@bu.edu", role = c("aut")),
    person("Yusuke", "Koga", email="ykoga07@bu.edu", role = c("aut")))
Description: Celda is a suite of Bayesian hierarchical models for
    clustering single-cell RNA-sequencing (scRNA-seq) data. It is able to
    perform "bi-clustering" and simultaneously cluster genes into gene modules
    and cells into cell subpopulations. It also contains DecontX, a novel
    Bayesian method to computationally estimate and remove RNA contamination in
    individual cells without empty droplet information. A variety of scRNA-seq
    data visualization functions is also included.
Depends: R (>= 4.0), SingleCellExperiment, Matrix
VignetteBuilder: knitr
Imports: plyr, foreach, ggplot2, RColorBrewer, grid, scales, gtable,
        grDevices, graphics, matrixStats, doParallel, digest, methods,
        reshape2, S4Vectors, data.table,
        Rcpp, RcppEigen, uwot, enrichR, SummarizedExperiment,
        MCMCprecision, ggrepel, Rtsne, withr,
        scater (>= 1.14.4), scran, dbscan,
        DelayedArray, stringr, ComplexHeatmap, gridExtra,
        circlize
Suggests: testthat, knitr, roxygen2, rmarkdown, biomaRt, covr,
        BiocManager, BiocStyle, TENxPBMCData, singleCellTK, M3DExampleData
LinkingTo: Rcpp, RcppEigen
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.3.1
BugReports: https://github.com/campbio/celda/issues
biocViews: SingleCell, GeneExpression, Clustering, Sequencing, Bayesian, ImmunoOncology, DataImport
NeedsCompilation: yes


================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2018 Joshua D Campbell

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: NAMESPACE
================================================
# Generated by roxygen2: do not edit by hand

export("celdaClusters<-")
export("celdaModules<-")
export("decontXcounts<-")
export("sampleLabel<-")
export(appendCeldaList)
export(availableModels)
export(bestLogLikelihood)
export(celda)
export(celdaClusters)
export(celdaGridSearch)
export(celdaHeatmap)
export(celdaModel)
export(celdaModules)
export(celdaPerplexity)
export(celdaProbabilityMap)
export(celdaTsne)
export(celdaUmap)
export(celda_C)
export(celda_CG)
export(celda_G)
export(celdatosce)
export(clusterProbability)
export(compareCountMatrix)
export(countChecksum)
export(decontX)
export(decontXcounts)
export(distinctColors)
export(factorizeMatrix)
export(featureModuleLookup)
export(featureModuleTable)
export(geneSetEnrich)
export(logLikelihood)
export(logLikelihoodHistory)
export(matrixNames)
export(moduleHeatmap)
export(normalizeCounts)
export(params)
export(perplexity)
export(plotCeldaViolin)
export(plotDecontXContamination)
export(plotDecontXMarkerExpression)
export(plotDecontXMarkerPercentage)
export(plotDimReduceCluster)
export(plotDimReduceFeature)
export(plotDimReduceGrid)
export(plotDimReduceModule)
export(plotGridSearchPerplexity)
export(plotHeatmap)
export(plotRPC)
export(recodeClusterY)
export(recodeClusterZ)
export(recursiveSplitCell)
export(recursiveSplitModule)
export(reorderCelda)
export(reportCeldaCGPlotResults)
export(reportCeldaCGRun)
export(resList)
export(resamplePerplexity)
export(retrieveFeatureIndex)
export(runParams)
export(sampleLabel)
export(selectBestModel)
export(selectFeatures)
export(simulateCells)
export(simulateContamination)
export(splitModule)
export(subsetCeldaList)
export(topRank)
exportMethods("celdaClusters<-")
exportMethods("celdaModules<-")
exportMethods("decontXcounts<-")
exportMethods("sampleLabel<-")
exportMethods(bestLogLikelihood)
exportMethods(celdaClusters)
exportMethods(celdaGridSearch)
exportMethods(celdaHeatmap)
exportMethods(celdaModel)
exportMethods(celdaModules)
exportMethods(celdaPerplexity)
exportMethods(celdaProbabilityMap)
exportMethods(celdaTsne)
exportMethods(celdaUmap)
exportMethods(celda_C)
exportMethods(celda_CG)
exportMethods(celda_G)
exportMethods(celdatosce)
exportMethods(clusterProbability)
exportMethods(compareCountMatrix)
exportMethods(countChecksum)
exportMethods(decontX)
exportMethods(decontXcounts)
exportMethods(factorizeMatrix)
exportMethods(featureModuleLookup)
exportMethods(geneSetEnrich)
exportMethods(logLikelihood)
exportMethods(logLikelihoodHistory)
exportMethods(matrixNames)
exportMethods(moduleHeatmap)
exportMethods(params)
exportMethods(perplexity)
exportMethods(plotCeldaViolin)
exportMethods(plotDimReduceCluster)
exportMethods(plotDimReduceFeature)
exportMethods(plotDimReduceGrid)
exportMethods(plotDimReduceModule)
exportMethods(plotGridSearchPerplexity)
exportMethods(plotRPC)
exportMethods(recursiveSplitCell)
exportMethods(recursiveSplitModule)
exportMethods(reorderCelda)
exportMethods(resList)
exportMethods(resamplePerplexity)
exportMethods(runParams)
exportMethods(sampleLabel)
exportMethods(selectBestModel)
exportMethods(selectFeatures)
exportMethods(splitModule)
exportMethods(subsetCeldaList)
import(Rcpp)
import(RcppEigen)
import(foreach)
import(grDevices)
import(graphics)
import(grid)
import(uwot)
importClassesFrom(Matrix,dgCMatrix)
importClassesFrom(SingleCellExperiment,SingleCellExperiment)
importFrom(MCMCprecision,fit_dirichlet)
importFrom(Matrix,colSums)
importFrom(Matrix,rowSums)
importFrom(Matrix,t)
importFrom(RColorBrewer,brewer.pal)
importFrom(Rtsne,Rtsne)
importFrom(data.table,as.data.table)
importFrom(digest,digest)
importFrom(doParallel,registerDoParallel)
importFrom(enrichR,enrichr)
importFrom(enrichR,listEnrichrDbs)
importFrom(ggrepel,geom_text_repel)
importFrom(grDevices,colorRampPalette)
importFrom(grDevices,colors)
importFrom(grDevices,hsv)
importFrom(grDevices,rgb2hsv)
importFrom(grid,grid.pretty)
importFrom(gtable,gtable)
importFrom(gtable,gtable_add_grob)
importFrom(gtable,gtable_height)
importFrom(gtable,gtable_width)
importFrom(matrixStats,logSumExp)
importFrom(methods,.hasSlot)
importFrom(methods,is)
importFrom(methods,new)
importFrom(plyr,mapvalues)
importFrom(reshape2,melt)
importFrom(scales,brewer_pal)
importFrom(scales,dscale)
importFrom(scales,hue_pal)
importFrom(withr,with_seed)
importMethodsFrom(Matrix,"%*%")
useDynLib(celda,"_colSumByGroup")
useDynLib(celda,"_colSumByGroupChange")
useDynLib(celda,"_colSumByGroupChange_numeric")
useDynLib(celda,"_colSumByGroup_numeric")
useDynLib(celda,"_perplexityG")
useDynLib(celda,"_rowSumByGroup")
useDynLib(celda,"_rowSumByGroupChange")
useDynLib(celda,"_rowSumByGroupChange_numeric")
useDynLib(celda,"_rowSumByGroup_numeric")


================================================
FILE: NEWS.md
================================================
# celda v1.18.2 (2024-04-02)
* Updated Makevar files to new CRAN standards 
* Fixed unit test causing error

# celda v1.18.1 (2023-11-05)
* Update to match Bioconductor release version
* Removed multipanelfigure as a dependency

# celda v1.14.2 (2023-01-19)
* Update to match Bioconductor release version

# celda v1.13.0 (2022-10-20)
* Bug fixes related to cluster labels stored as factors and plotting
* Updated sparse matrix conversion to work with Matrix v1.4-2

# celda v1.12.0 (2022-04-30)
* Update to match Bioconductor 3.15 release version

# celda v1.11.1 (2022-03-31)
* Fixes to reports
* Use smoothe splines for perplexity and RPC plots

# celda v1.11.0 (2022-03-31)
* Improvments to decontX vignette
* Added ability to subsample to speed up perplexity calculations
* Added ability to use batch parameter with the raw matrix in decontX

# celda v1.10.0 (2021-12-28)
* Update to match Bioconductor release version

# celda v1.9.3 (2021-10-04)
* Fixed bug in checking background matrix with decontX
* Switched to using Github Actions for Continuous Integration
* Fixed plotting bugs in celda results reports
* Speed up final step in decontX when creating final decontaminated matrix

# celda v1.9.2 (2021-07-19)
* Added a `NEWS.md` file to track changes to the package.
* Added new tutorials and documentation generated with pkgdown.
* Removed warnings in plotRPC functions.
* Added use of "displayName" to several functions that show feature names. 
* Minor bug fix when the input matrix was sparse and contained non-integer values.
* Several improvements to plotting functions. 

# celda v1.7.7 (2021-04-12):
* Added handling for sparse matrices

# celda v1.7.6 (2021-04-04):
* Added functions for creating HTML reports
* Fixed bug in decontX plotting

# celda v1.7.4 (2021-03-09):
* Enable input of raw/droplet matrix into decontX to estimate ambient RNA

# celda v1.1.6 (2019-07-16):
* Add multiclass decision tree

# celda v1.1.4 (2019-05-28):
* Add Alternate headings support for plotDimReduceFeature

# celda v1.1.3 (2019-05-14):
* Add multiclass decision tree (MCDT) cell cluster annotation

# celda v1.1.2 (2019-05-14):
* Fix a bug in celdaHeatmap

# celda v1.0.1 (2019-05-09):
* Default seed setting to maintain reproducibility

# celda v0.99.34 (2019-04-23):
* Minor changes to the vignettes

# celda v0.99.23 (2019-04-10):
* Remove pheatmap import

# celda v0.99.22 (2019-04-09):
* Package celda, for bi-clustering of single-cell 'omics data.

# celda v0.99.8 (2019-03-11):
* Second submission to Bioconductor

# celda v0.99.0 (2018-05-15):
* First submission to Bioconductor


================================================
FILE: NOTICE
================================================
The celda package incldues other open source software components, including 
functions adapted from other R libraries. The use of these components is annotated
throughout the codebase.

The following is a list of these components; their corresponding licenses are listed below.

- gtools
- pheatmap





gtools, pheatmap
-----------------
                    GNU GENERAL PUBLIC LICENSE
                       Version 2, June 1991

 Copyright (C) 1989, 1991 Free Software Foundation, Inc., <http://fsf.org/>
 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 Everyone is permitted to copy and distribute verbatim copies
 of this license document, but changing it is not allowed.

                            Preamble

  The licenses for most software are designed to take away your
freedom to share and change it.  By contrast, the GNU General Public
License is intended to guarantee your freedom to share and change free
software--to make sure the software is free for all its users.  This
General Public License applies to most of the Free Software
Foundation's software and to any other program whose authors commit to
using it.  (Some other Free Software Foundation software is covered by
the GNU Lesser General Public License instead.)  You can apply it to
your programs, too.

  When we speak of free software, we are referring to freedom, not
price.  Our General Public Licenses are designed to make sure that you
have the freedom to distribute copies of free software (and charge for
this service if you wish), that you receive source code or can get it
if you want it, that you can change the software or use pieces of it
in new free programs; and that you know you can do these things.

  To protect your rights, we need to make restrictions that forbid
anyone to deny you these rights or to ask you to surrender the rights.
These restrictions translate to certain responsibilities for you if you
distribute copies of the software, or if you modify it.

  For example, if you distribute copies of such a program, whether
gratis or for a fee, you must give the recipients all the rights that
you have.  You must make sure that they, too, receive or can get the
source code.  And you must show them these terms so they know their
rights.

  We protect your rights with two steps: (1) copyright the software, and
(2) offer you this license which gives you legal permission to copy,
distribute and/or modify the software.

  Also, for each author's protection and ours, we want to make certain
that everyone understands that there is no warranty for this free
software.  If the software is modified by someone else and passed on, we
want its recipients to know that what they have is not the original, so
that any problems introduced by others will not reflect on the original
authors' reputations.

  Finally, any free program is threatened constantly by software
patents.  We wish to avoid the danger that redistributors of a free
program will individually obtain patent licenses, in effect making the
program proprietary.  To prevent this, we have made it clear that any
patent must be licensed for everyone's free use or not licensed at all.

  The precise terms and conditions for copying, distribution and
modification follow.

                    GNU GENERAL PUBLIC LICENSE
   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION

  0. This License applies to any program or other work which contains
a notice placed by the copyright holder saying it may be distributed
under the terms of this General Public License.  The "Program", below,
refers to any such program or work, and a "work based on the Program"
means either the Program or any derivative work under copyright law:
that is to say, a work containing the Program or a portion of it,
either verbatim or with modifications and/or translated into another
language.  (Hereinafter, translation is included without limitation in
the term "modification".)  Each licensee is addressed as "you".

Activities other than copying, distribution and modification are not
covered by this License; they are outside its scope.  The act of
running the Program is not restricted, and the output from the Program
is covered only if its contents constitute a work based on the
Program (independent of having been made by running the Program).
Whether that is true depends on what the Program does.

  1. You may copy and distribute verbatim copies of the Program's
source code as you receive it, in any medium, provided that you
conspicuously and appropriately publish on each copy an appropriate
copyright notice and disclaimer of warranty; keep intact all the
notices that refer to this License and to the absence of any warranty;
and give any other recipients of the Program a copy of this License
along with the Program.

You may charge a fee for the physical act of transferring a copy, and
you may at your option offer warranty protection in exchange for a fee.

  2. You may modify your copy or copies of the Program or any portion
of it, thus forming a work based on the Program, and copy and
distribute such modifications or work under the terms of Section 1
above, provided that you also meet all of these conditions:

    a) You must cause the modified files to carry prominent notices
    stating that you changed the files and the date of any change.

    b) You must cause any work that you distribute or publish, that in
    whole or in part contains or is derived from the Program or any
    part thereof, to be licensed as a whole at no charge to all third
    parties under the terms of this License.

    c) If the modified program normally reads commands interactively
    when run, you must cause it, when started running for such
    interactive use in the most ordinary way, to print or display an
    announcement including an appropriate copyright notice and a
    notice that there is no warranty (or else, saying that you provide
    a warranty) and that users may redistribute the program under
    these conditions, and telling the user how to view a copy of this
    License.  (Exception: if the Program itself is interactive but
    does not normally print such an announcement, your work based on
    the Program is not required to print an announcement.)

These requirements apply to the modified work as a whole.  If
identifiable sections of that work are not derived from the Program,
and can be reasonably considered independent and separate works in
themselves, then this License, and its terms, do not apply to those
sections when you distribute them as separate works.  But when you
distribute the same sections as part of a whole which is a work based
on the Program, the distribution of the whole must be on the terms of
this License, whose permissions for other licensees extend to the
entire whole, and thus to each and every part regardless of who wrote it.

Thus, it is not the intent of this section to claim rights or contest
your rights to work written entirely by you; rather, the intent is to
exercise the right to control the distribution of derivative or
collective works based on the Program.

In addition, mere aggregation of another work not based on the Program
with the Program (or with a work based on the Program) on a volume of
a storage or distribution medium does not bring the other work under
the scope of this License.

  3. You may copy and distribute the Program (or a work based on it,
under Section 2) in object code or executable form under the terms of
Sections 1 and 2 above provided that you also do one of the following:

    a) Accompany it with the complete corresponding machine-readable
    source code, which must be distributed under the terms of Sections
    1 and 2 above on a medium customarily used for software interchange; or,

    b) Accompany it with a written offer, valid for at least three
    years, to give any third party, for a charge no more than your
    cost of physically performing source distribution, a complete
    machine-readable copy of the corresponding source code, to be
    distributed under the terms of Sections 1 and 2 above on a medium
    customarily used for software interchange; or,

    c) Accompany it with the information you received as to the offer
    to distribute corresponding source code.  (This alternative is
    allowed only for noncommercial distribution and only if you
    received the program in object code or executable form with such
    an offer, in accord with Subsection b above.)

The source code for a work means the preferred form of the work for
making modifications to it.  For an executable work, complete source
code means all the source code for all modules it contains, plus any
associated interface definition files, plus the scripts used to
control compilation and installation of the executable.  However, as a
special exception, the source code distributed need not include
anything that is normally distributed (in either source or binary
form) with the major components (compiler, kernel, and so on) of the
operating system on which the executable runs, unless that component
itself accompanies the executable.

If distribution of executable or object code is made by offering
access to copy from a designated place, then offering equivalent
access to copy the source code from the same place counts as
distribution of the source code, even though third parties are not
compelled to copy the source along with the object code.

  4. You may not copy, modify, sublicense, or distribute the Program
except as expressly provided under this License.  Any attempt
otherwise to copy, modify, sublicense or distribute the Program is
void, and will automatically terminate your rights under this License.
However, parties who have received copies, or rights, from you under
this License will not have their licenses terminated so long as such
parties remain in full compliance.

  5. You are not required to accept this License, since you have not
signed it.  However, nothing else grants you permission to modify or
distribute the Program or its derivative works.  These actions are
prohibited by law if you do not accept this License.  Therefore, by
modifying or distributing the Program (or any work based on the
Program), you indicate your acceptance of this License to do so, and
all its terms and conditions for copying, distributing or modifying
the Program or works based on it.

  6. Each time you redistribute the Program (or any work based on the
Program), the recipient automatically receives a license from the
original licensor to copy, distribute or modify the Program subject to
these terms and conditions.  You may not impose any further
restrictions on the recipients' exercise of the rights granted herein.
You are not responsible for enforcing compliance by third parties to
this License.

  7. If, as a consequence of a court judgment or allegation of patent
infringement or for any other reason (not limited to patent issues),
conditions are imposed on you (whether by court order, agreement or
otherwise) that contradict the conditions of this License, they do not
excuse you from the conditions of this License.  If you cannot
distribute so as to satisfy simultaneously your obligations under this
License and any other pertinent obligations, then as a consequence you
may not distribute the Program at all.  For example, if a patent
license would not permit royalty-free redistribution of the Program by
all those who receive copies directly or indirectly through you, then
the only way you could satisfy both it and this License would be to
refrain entirely from distribution of the Program.

If any portion of this section is held invalid or unenforceable under
any particular circumstance, the balance of the section is intended to
apply and the section as a whole is intended to apply in other
circumstances.

It is not the purpose of this section to induce you to infringe any
patents or other property right claims or to contest validity of any
such claims; this section has the sole purpose of protecting the
integrity of the free software distribution system, which is
implemented by public license practices.  Many people have made
generous contributions to the wide range of software distributed
through that system in reliance on consistent application of that
system; it is up to the author/donor to decide if he or she is willing
to distribute software through any other system and a licensee cannot
impose that choice.

This section is intended to make thoroughly clear what is believed to
be a consequence of the rest of this License.

  8. If the distribution and/or use of the Program is restricted in
certain countries either by patents or by copyrighted interfaces, the
original copyright holder who places the Program under this License
may add an explicit geographical distribution limitation excluding
those countries, so that distribution is permitted only in or among
countries not thus excluded.  In such case, this License incorporates
the limitation as if written in the body of this License.

  9. The Free Software Foundation may publish revised and/or new versions
of the General Public License from time to time.  Such new versions will
be similar in spirit to the present version, but may differ in detail to
address new problems or concerns.

Each version is given a distinguishing version number.  If the Program
specifies a version number of this License which applies to it and "any
later version", you have the option of following the terms and conditions
either of that version or of any later version published by the Free
Software Foundation.  If the Program does not specify a version number of
this License, you may choose any version ever published by the Free Software
Foundation.

  10. If you wish to incorporate parts of the Program into other free
programs whose distribution conditions are different, write to the author
to ask for permission.  For software which is copyrighted by the Free
Software Foundation, write to the Free Software Foundation; we sometimes
make exceptions for this.  Our decision will be guided by the two goals
of preserving the free status of all derivatives of our free software and
of promoting the sharing and reuse of software generally.

                            NO WARRANTY

  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
REPAIR OR CORRECTION.

  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
POSSIBILITY OF SUCH DAMAGES.

                     END OF TERMS AND CONDITIONS

            How to Apply These Terms to Your New Programs

  If you develop a new program, and you want it to be of the greatest
possible use to the public, the best way to achieve this is to make it
free software which everyone can redistribute and change under these terms.

  To do so, attach the following notices to the program.  It is safest
to attach them to the start of each source file to most effectively
convey the exclusion of warranty; and each file should have at least
the "copyright" line and a pointer to where the full notice is found.

    {description}
    Copyright (C) {year}  {fullname}

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation, Inc.,
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

Also add information on how to contact you by electronic and paper mail.

If the program is interactive, make it output a short notice like this
when it starts in an interactive mode:

    Gnomovision version 69, Copyright (C) year name of author
    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
    This is free software, and you are welcome to redistribute it
    under certain conditions; type `show c' for details.

The hypothetical commands `show w' and `show c' should show the appropriate
parts of the General Public License.  Of course, the commands you use may
be called something other than `show w' and `show c'; they could even be
mouse-clicks or menu items--whatever suits your program.

You should also get your employer (if you work as a programmer) or your
school, if any, to sign a "copyright disclaimer" for the program, if
necessary.  Here is a sample; alter the names:

  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
  `Gnomovision' (which makes passes at compilers) written by James Hacker.

  {signature of Ty Coon}, 1 April 1989
  Ty Coon, President of Vice

This General Public License does not permit incorporating your program into
proprietary programs.  If your program is a subroutine library, you may
consider it more useful to permit linking proprietary applications with the
library.  If this is what you want to do, use the GNU Lesser General
Public License instead of this License.


================================================
FILE: R/RcppExports.R
================================================
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

decontXEM <- function(counts, counts_colsums, theta, estimate_eta, eta, phi, z, estimate_delta, delta, pseudocount) {
    .Call('_celda_decontXEM', PACKAGE = 'celda', counts, counts_colsums, theta, estimate_eta, eta, phi, z, estimate_delta, delta, pseudocount)
}

decontXLogLik <- function(counts, theta, eta, phi, z, pseudocount) {
    .Call('_celda_decontXLogLik', PACKAGE = 'celda', counts, theta, eta, phi, z, pseudocount)
}

decontXInitialize <- function(counts, theta, z, pseudocount) {
    .Call('_celda_decontXInitialize', PACKAGE = 'celda', counts, theta, z, pseudocount)
}

calculateNativeMatrix <- function(counts, theta, eta, phi, z, pseudocount) {
    .Call('_celda_calculateNativeMatrix', PACKAGE = 'celda', counts, theta, eta, phi, z, pseudocount)
}

cG_calcGibbsProbY_Simple <- function(counts, nGbyTS, nTSbyC, nbyTS, nbyG, y, L, index, gamma, beta, delta) {
    .Call('_celda_cG_calcGibbsProbY_Simple', PACKAGE = 'celda', counts, nGbyTS, nTSbyC, nbyTS, nbyG, y, L, index, gamma, beta, delta)
}

cG_CalcGibbsProbY_ori <- function(index, counts, nTSbyC, nbyTS, nGbyTS, nbyG, y, L, nG, lg_beta, lg_gamma, lg_delta, delta) {
    .Call('_celda_cG_CalcGibbsProbY_ori', PACKAGE = 'celda', index, counts, nTSbyC, nbyTS, nGbyTS, nbyG, y, L, nG, lg_beta, lg_gamma, lg_delta, delta)
}

cG_CalcGibbsProbY_fastRow <- function(index, counts, nTSbyC, nbyTS, nGbyTS, nbyG, y, L, nG, lg_beta, lg_gamma, lg_delta, delta) {
    .Call('_celda_cG_CalcGibbsProbY_fastRow', PACKAGE = 'celda', index, counts, nTSbyC, nbyTS, nGbyTS, nbyG, y, L, nG, lg_beta, lg_gamma, lg_delta, delta)
}

cG_CalcGibbsProbY <- function(index, counts, nTSbyC, nbyTS, nGbyTS, nbyG, y, L, nG, lg_beta, lg_gamma, lg_delta, delta) {
    .Call('_celda_cG_CalcGibbsProbY', PACKAGE = 'celda', index, counts, nTSbyC, nbyTS, nGbyTS, nbyG, y, L, nG, lg_beta, lg_gamma, lg_delta, delta)
}

#' Fast matrix multiplication for double x int
#' 
#' @param A a double matrix
#' @param B an integer matrix
#' @return An integer matrix representing the product of A and B
eigenMatMultInt <- function(A, B) {
    .Call('_celda_eigenMatMultInt', PACKAGE = 'celda', A, B)
}

#' Fast matrix multiplication for double x double
#' 
#' @param A a double matrix
#' @param B an integer matrix
#' @return An integer matrix representing the product of A and B
eigenMatMultNumeric <- function(A, B) {
    .Call('_celda_eigenMatMultNumeric', PACKAGE = 'celda', A, B)
}

#' Fast normalization for numeric matrix
#' 
#' @param R_counts An integer matrix
#' @param R_alpha A double value to be added to the matrix as a pseudocount
#' @return A numeric matrix where the columns have been normalized to proportions
fastNormProp <- function(R_counts, R_alpha) {
    .Call('_celda_fastNormProp', PACKAGE = 'celda', R_counts, R_alpha)
}

#' Fast normalization for numeric matrix
#' 
#' @param R_counts An integer matrix
#' @param R_alpha A double value to be added to the matrix as a pseudocount
#' @return A numeric matrix where the columns have been normalized to proportions
fastNormPropLog <- function(R_counts, R_alpha) {
    .Call('_celda_fastNormPropLog', PACKAGE = 'celda', R_counts, R_alpha)
}

#' Fast normalization for numeric matrix
#' 
#' @param R_counts An integer matrix
#' @param R_alpha A double value to be added to the matrix as a pseudocount
#' @return A numeric matrix where the columns have been normalized to proportions
fastNormPropSqrt <- function(R_counts, R_alpha) {
    .Call('_celda_fastNormPropSqrt', PACKAGE = 'celda', R_counts, R_alpha)
}

#' get row and column indices of none zero elements in the matrix
#' 
#' @param R_counts A matrix
#' @return An integer matrix where each row is a row, column indices pair 
nonzero <- function(R_counts) {
    .Call('_celda_nonzero', PACKAGE = 'celda', R_counts)
}

colSumByGroupSparse <- function(counts, group, K) {
    .Call('_celda_colSumByGroupSparse', PACKAGE = 'celda', counts, group, K)
}

rowSumByGroupSparse <- function(counts, group, L) {
    .Call('_celda_rowSumByGroupSparse', PACKAGE = 'celda', counts, group, L)
}

colSumByGroupChangeSparse <- function(counts, px, group, pgroup, K) {
    .Call('_celda_colSumByGroupChangeSparse', PACKAGE = 'celda', counts, px, group, pgroup, K)
}

rowSumByGroupChangeSparse <- function(counts, px, group, pgroup, L) {
    .Call('_celda_rowSumByGroupChangeSparse', PACKAGE = 'celda', counts, px, group, pgroup, L)
}



================================================
FILE: R/aaa.R
================================================
setClass("celdaModel",
    slots = c(params = "list",
        # K, L, model priors, checksum
        names = "list",
        completeLogLik = "numeric",
        finalLogLik = "numeric",
        clusters = "list")
) # z and or y

setClass("celda_C",
    representation(sampleLabel = "factor"),
    contains = "celdaModel")

setClass("celda_G", contains = "celdaModel")

setClass("celda_CG", contains = c("celda_C", "celda_G"))

setClass("celdaList",
    slots = c(runParams = "data.frame",
        resList = "list",
        countChecksum = "character",
        perplexity = "matrix",
        celdaGridSearchParameters = "list")
)


================================================
FILE: R/accessors.R
================================================
#' @title Get or set the cell cluster labels from a celda
#'  \linkS4class{SingleCellExperiment} object or celda model
#'  object.
#' @description Return or set the cell cluster labels determined
#'  by \link{celda_C} or \link{celda_CG} models.
#' @param x Can be one of
#'  \itemize{
#'  \item A \linkS4class{SingleCellExperiment} object returned by
#'  \link{celda_C}, or \link{celda_CG}, with the matrix
#'  located in the \code{useAssay} assay slot. The
#'  a \link{altExp} slot with name \code{altExpName} will
#'  be used. Rows represent features and columns represent cells.
#'  \item Celda model object.}
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param value Character vector of cell cluster labels for replacements. Works
#'  only if \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @return One of
#' \itemize{
#'  \item Character vector if \code{x} is a
#'  \linkS4class{SingleCellExperiment} object.
#'  Contains cell cluster labels for each cell in x.
#'  \item List if \code{x} is a celda model object. Contains cell cluster
#'  labels (for celda_C and celdaCG
#'  Models) and/or feature module labels (for celda_G and celdaCG Models).}
#' @export
setGeneric("celdaClusters",
    function(x, altExpName = "featureSubset") {
        standardGeneric("celdaClusters")
    })


#' @rdname celdaClusters
#' @examples
#' data(sceCeldaCG)
#' celdaClusters(sceCeldaCG)
#' @export
setMethod("celdaClusters",
    signature(x = "SingleCellExperiment"),
    function(x, altExpName = "featureSubset") {
        altExp <- SingleCellExperiment::altExp(x, altExpName)
        return(SummarizedExperiment::colData(altExp)$celda_cell_cluster)
    })


#' @examples
#' data(celdaCGMod)
#' celdaClusters(celdaCGMod)
#' @rdname celdaClusters
#' @export
setMethod("celdaClusters",
    signature(x = "celdaModel"),
    function(x) {
        return(x@clusters)
    }
)


#' @rdname celdaClusters
#' @export
setGeneric("celdaClusters<-",
    function(x, altExpName = "featureSubset", value) {
        standardGeneric("celdaClusters<-")
    }
)


#' @rdname celdaClusters
#' @export
setMethod("celdaClusters<-", signature(x = "SingleCellExperiment"),
    function(x, altExpName = "featureSubset", value) {
        altExp <- SingleCellExperiment::altExp(x, altExpName)
        if (!is.factor(value)) {
            message("Cluster labels are converted to factors.")
            value <- as.factor(value)
        }
        SummarizedExperiment::colData(altExp)$celda_cell_cluster <- value
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    })


#' @title Get or set the feature module labels from a celda
#'  \linkS4class{SingleCellExperiment} object.
#' @description Return or set the feature module cluster labels determined
#'  by \link{celda_G} or \link{celda_CG} models.
#' @param sce A \linkS4class{SingleCellExperiment} object returned by
#'  \link{celda_G}, or \link{celda_CG}, with the matrix
#'  located in the \code{useAssay} assay slot.
#'  Rows represent features and columns represent cells.
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param value Character vector of feature module labels for replacements.
#'  Works only if \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @return Character vector. Contains feature module labels for each
#'  feature in x.
#' @export
setGeneric("celdaModules",
    function(sce, altExpName = "featureSubset") {
        standardGeneric("celdaModules")
    })


#' @rdname celdaModules
#' @examples
#' data(sceCeldaCG)
#' celdaModules(sceCeldaCG)
#' @export
setMethod("celdaModules",
    signature(sce = "SingleCellExperiment"),
    function(sce, altExpName = "featureSubset") {
        altExp <- SingleCellExperiment::altExp(sce, altExpName)
        return(SummarizedExperiment::rowData(altExp)$celda_feature_module)
    })


#' @rdname celdaModules
#' @export
setGeneric("celdaModules<-",
    function(sce, altExpName = "featureSubset", value) {
        standardGeneric("celdaModules<-")
    }
)


#' @rdname celdaModules
#' @export
setMethod("celdaModules<-", signature(sce = "SingleCellExperiment"),
    function(sce, altExpName = "featureSubset", value) {
        altExp <- SingleCellExperiment::altExp(sce, altExpName)
        if (!is.factor(value)) {
            message("Module labels are converted to factors.")
            value <- as.factor(value)
        }
        SummarizedExperiment::rowData(altExp)$celda_feature_module <- value
        SingleCellExperiment::altExp(sce, altExpName) <- altExp
        return(sce)
    })


#' @title Get or set sample labels from a celda
#'  \linkS4class{SingleCellExperiment}  object
#' @description Return or set the sample labels for the cells in \code{sce}.
#' @param x Can be one of
#'  \itemize{
#'  \item A \linkS4class{SingleCellExperiment} object returned by
#'  \link{celda_C}, \link{celda_G}, or \link{celda_CG}, with the matrix
#'  located in the \code{useAssay} assay slot.
#'  Rows represent features and columns represent cells.
#'  \item A celda model object.}
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param value Character vector of sample labels for replacements. Works
#'  only is \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @return Character vector. Contains the sample labels provided at model
#'  creation, or those automatically generated by celda.
#' @export
setGeneric("sampleLabel",
    function(x, altExpName = "featureSubset") {
        standardGeneric("sampleLabel")
    })


#' @rdname sampleLabel
#' @examples
#' data(sceCeldaCG)
#' sampleLabel(sceCeldaCG)
#' @export
setMethod("sampleLabel",
    signature(x = "SingleCellExperiment"),
    function(x, altExpName = "featureSubset") {
        altExp <- SingleCellExperiment::altExp(x, altExpName)
        return(SummarizedExperiment::colData(altExp)$celda_sample_label)
    })


#' @rdname sampleLabel
#' @export
setGeneric("sampleLabel<-",
    function(x, altExpName = "featureSubset", value) {
        standardGeneric("sampleLabel<-")
    }
)
#' @rdname sampleLabel
#' @export
setMethod("sampleLabel<-", signature(x = "SingleCellExperiment"),
    function(x, altExpName = "featureSubset", value) {
        altExp <- SingleCellExperiment::altExp(x, altExpName)
        if (!is.factor(value)) {
            message("Sample labels are converted to factors.")
            value <- as.factor(value)
        }
        SummarizedExperiment::colData(altExp)$celda_sample_label <- value
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    })


#' @examples
#' data(celdaCGMod)
#' sampleLabel(celdaCGMod)
#' @rdname sampleLabel
#' @export
setMethod("sampleLabel",
    signature(x = "celdaModel"),
    function(x) {
        x@sampleLabel
    }
)


#' @title Get parameter values provided for celdaModel creation
#' @description Retrieves the K/L, model priors (e.g. alpha, beta),
#'  and count matrix checksum parameters provided during the creation of the
#'  provided celdaModel.
#' @param celdaMod celdaModel. Options available in
#'  \code{celda::availableModels}.
#' @return List. Contains the model-specific parameters for the provided celda
#'  model object depending on its class.
#' @export
setGeneric(
    "params",
    function(celdaMod) {
        standardGeneric("params")
    }
)


#' @rdname params
#' @examples
#' data(celdaCGMod)
#' params(celdaCGMod)
#' @export
setMethod("params",
    signature = c(celdaMod = "celdaModel"),
    function(celdaMod) {
        celdaMod@params
    }
)


#' @title Get feature, cell and sample names from a celdaModel
#' @description Retrieves the row, column, and sample names used to generate
#'  a celdaModel.
#' @param celdaMod celdaModel. Options available in `celda::availableModels`.
#' @return List. Contains row, column, and sample character vectors
#'  corresponding to the values provided when the celdaModel was generated.
#' @export
setGeneric(
    "matrixNames",
    function(celdaMod) {
        standardGeneric("matrixNames")
    }
)


#' @rdname matrixNames
#' @examples
#' data(celdaCGMod)
#' matrixNames(celdaCGMod)
#' @export
setMethod("matrixNames",
    signature = c(celdaMod = "celdaModel"),
    function(celdaMod) {
        celdaMod@names
    }
)


#' @title Get run parameters from a celda model
#'  \code{SingleCellExperiment} or \code{celdaList} object
#' @description Returns details on the clustering parameters and model
#'  priors from the celdaList object when it was created.
#' @param x An object of class \linkS4class{SingleCellExperiment} or class
#'  \code{celdaList}.
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @return Data Frame. Contains details on the various K/L parameters, chain
#'  parameters, seed, and final log-likelihoods derived for each model in the
#'  provided celdaList.
#' @export
setGeneric("runParams",
    function(x, altExpName = "featureSubset") {
        standardGeneric("runParams")
    }
)


#' @examples
#' data(sceCeldaCGGridSearch)
#' runParams(sceCeldaCGGridSearch)
#' @rdname runParams
#' @export
setMethod("runParams",
    signature(x = "SingleCellExperiment"),
    function(x, altExpName = "featureSubset") {
        altExp <- SingleCellExperiment::altExp(x, altExpName)
        return(altExp@metadata$celda_grid_search@runParams)
    }
)


#' @examples
#' data(celdaCGGridSearchRes)
#' runParams(celdaCGGridSearchRes)
#' @rdname runParams
#' @export
setMethod("runParams",
    signature(x = "celdaList"),
    function(x) {
        return(x@runParams)
    }
)


#' @title Get final celdaModels from a celda model \code{SCE} or celdaList
#'  object
#' @description Returns all celda models generated during a
#'  \link{celdaGridSearch} run.
#' @param x An object of class \linkS4class{SingleCellExperiment} or
#'  \code{celdaList}.
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @return List. Contains one celdaModel object for each of the parameters
#'  specified in \code{runParams(x)}.
#' @export
setGeneric(
    "resList",
    function(x, altExpName = "featureSubset") {
        standardGeneric("resList")
    }
)


#' @examples
#' data(sceCeldaCGGridSearch)
#' celdaCGGridModels <- resList(sceCeldaCGGridSearch)
#' @rdname resList
#' @export
setMethod("resList",
    signature(x = "SingleCellExperiment"),
    function(x, altExpName = "featureSubset") {
        altExp <- SingleCellExperiment::altExp(x, altExpName)
        return(altExp@metadata$celda_grid_search@resList)
    }
)


#' @examples
#' data(celdaCGGridSearchRes)
#' celdaCGGridModels <- resList(celdaCGGridSearchRes)
#' @rdname resList
#' @export
setMethod("resList",
    signature(x = "celdaList"),
    function(x) {
        return(x@resList)
    }
)


#' @title Get celda model from a celda
#'  \link[SingleCellExperiment]{SingleCellExperiment} object
#' @description Return the celda model for \code{sce} returned by
#'  \link{celda_C}, \link{celda_G} or \link{celda_CG}.
#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
#'  returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @return Character. The celda model. Can be one of "celda_C", "celda_G", or
#'  "celda_CG".
#' @examples
#' data(sceCeldaCG)
#' celdaModel(sceCeldaCG)
#' @export
setGeneric("celdaModel",
    function(sce, altExpName = "featureSubset") {
        standardGeneric("celdaModel")
    })


#' @rdname celdaModel
#' @export
setMethod("celdaModel",
    signature(sce = "SingleCellExperiment"),
    function(sce, altExpName = "featureSubset") {

        if (!altExpName %in% SingleCellExperiment::altExpNames(sce)) {
            stop(altExpName, " not in 'altExpNames(sce)'. Run ",
                "selectFeatures(sce) first!")
        }

        altExp <- SingleCellExperiment::altExp(sce, altExpName)

        tryCatch(
            if (S4Vectors::metadata(altExp)$celda_parameters$model %in%
                    c("celda_C", "celda_G", "celda_CG")) {
                return(S4Vectors::metadata(altExp)$celda_parameters$model)
            } else {
                stop("S4Vectors::metadata(altExp(sce,",
                    " altExpName))$celda_parameters$model must be",
                    " one of 'celda_C', 'celda_G', or 'celda_CG'")
            },
            error = function(e) {
                message("S4Vectors::metadata(altExp(sce,",
                    " altExpName))$celda_parameters$model must",
                    " exist! Try running celda model (celda_C, celda_CG, or",
                    " celda_G) first.")
                stop(e)
            })
    })


#' @title Get perplexity for every model in a celdaList
#' @description Returns perplexity for each model in a celdaList as calculated
#'  by `perplexity().`
#' @param celdaList An object of class celdaList.
#' @return List. Contains one celdaModel object for each of the parameters
#'  specified in the `runParams()` of the provided celda list.
#' @examples
#' data(celdaCGGridSearchRes)
#' celdaCGGridModelPerplexities <- celdaPerplexity(celdaCGGridSearchRes)
#' @export
setGeneric(
    "celdaPerplexity",
    function(celdaList) {
        standardGeneric("celdaPerplexity")
    }
)


#' @title Get perplexity for every model in a celdaList
#' @description Returns perplexity for each model in a celdaList as calculated
#'  by `perplexity().`
#' @param celdaList An object of class celdaList.
#' @return List. Contains one celdaModel object for each of the parameters
#'  specified in the `runParams()` of the provided celda list.
#' @examples
#' data(celdaCGGridSearchRes)
#' celdaCGGridModelPerplexities <- celdaPerplexity(celdaCGGridSearchRes)
#' @export
setMethod("celdaPerplexity",
    signature = c(celdaList = "celdaList"),
    function(celdaList) {
        celdaList@perplexity
    }
)


#' @title Get the MD5 hash of the count matrix from the celdaList
#' @description Returns the MD5 hash of the count matrix used to generate the
#'  celdaList.
#' @param celdaList An object of class celdaList.
#' @return A character string of length 32 containing the MD5 digest of
#'  the count matrix.
#' @examples
#' data(celdaCGGridSearchRes)
#' countChecksum <- countChecksum(celdaCGGridSearchRes)
#' @export
setGeneric(
    "countChecksum",
    function(celdaList) {
        standardGeneric("countChecksum")
    }
)


#' @title Get the MD5 hash of the count matrix from the celdaList
#' @description Returns the MD5 hash of the count matrix used to generate the
#'  celdaList.
#' @param celdaList An object of class celdaList.
#' @return A character string of length 32 containing the MD5 digest of
#'  the count matrix.
#' @examples
#' data(celdaCGGridSearchRes)
#' countChecksum <- countChecksum(celdaCGGridSearchRes)
#' @export
setMethod("countChecksum",
    signature = c(celdaList = "celdaList"),
    function(celdaList) {
        celdaList@countChecksum
    }
)


================================================
FILE: R/celdaGridSearch.R
================================================
#' @title Run Celda in parallel with multiple parameters
#' @description Run Celda with different combinations of parameters and
#'  multiple chains in parallel. The variable \link{availableModels} contains
#'  the potential models that can be utilized. Different parameters to be tested
#'  should be stored in a list and passed to the argument \code{paramsTest}.
#'  Fixed parameters to be used in all models, such as \code{sampleLabel}, can
#'  be passed as a list to the argument \code{paramsFixed}. When
#'  \code{verbose = TRUE}, output from each chain will be sent to a log file
#'  but not be displayed in \code{stdout}.
#' @param x A numeric \link{matrix} of counts or a
#'  \linkS4class{SingleCellExperiment}
#'  with the matrix located in the assay slot under \code{useAssay}.
#'  Rows represent features and columns represent cells.
#' @param useAssay A string specifying the name of the
#'  \link{assay} slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param model Celda model. Options available in \link{availableModels}.
#' @param paramsTest List. A list denoting the combinations of parameters to
#'  run in a celda model. For example,
#'  \code{list(K = seq(5, 10), L = seq(15, 20))}
#'  will run all combinations of K from 5 to 10 and L from 15 to 20 in model
#'  \link{celda_CG}.
#' @param paramsFixed List. A list denoting additional parameters to use in
#'  each celda model. Default NULL.
#' @param maxIter Integer. Maximum number of iterations of sampling to
#'  perform. Default 200.
#' @param nchains Integer. Number of random cluster initializations. Default 3.
#' @param cores Integer. The number of cores to use for parallel estimation of
#'  chains. Default 1.
#' @param bestOnly Logical. Whether to return only the chain with the highest
#'  log likelihood per combination of parameters or return all chains. Default
#'  TRUE.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#'  a default value of 12345 is used. Seed values
#'  \code{seq(seed, (seed + nchains - 1))} will be supplied to each chain in
#'  \code{nchains}. If NULL, no calls to
#'  \link[withr]{with_seed} are made.
#' @param perplexity Logical. Whether to calculate perplexity for each model.
#'  If FALSE, then perplexity can be calculated later with
#'  \link{resamplePerplexity}. Default TRUE.
#' @param verbose Logical. Whether to print log messages during celda chain
#'  execution. Default TRUE.
#' @param logfilePrefix Character. Prefix for log files from worker threads
#'  and main process. Default "Celda".
#' @return A \linkS4class{SingleCellExperiment} object. Function
#'  parameter settings and celda model results are stored in the
#'  \link{metadata} \code{"celda_grid_search"} slot.
#' @seealso \link{celda_G} for feature clustering, \link{celda_C} for
#'  clustering of cells, and \link{celda_CG} for simultaneous clustering of
#'  features and cells. \link{subsetCeldaList} can subset the \code{celdaList}
#'  object. \link{selectBestModel} can get the best model for each combination
#'  of parameters.
#' @import foreach
#' @importFrom doParallel registerDoParallel
#' @importFrom methods is
#' @examples
#' \dontrun{
#' data(celdaCGSim)
#' ## Run various combinations of parameters with 'celdaGridSearch'
#' celdaCGGridSearchRes <- celdaGridSearch(celdaCGSim$counts,
#'   model = "celda_CG",
#'   paramsTest = list(K = seq(4, 6), L = seq(9, 11)),
#'   paramsFixed = list(sampleLabel = celdaCGSim$sampleLabel),
#'   bestOnly = TRUE,
#'   nchains = 1,
#'   cores = 1)
#' }
#' @export
setGeneric("celdaGridSearch",
    function(
        x,
        useAssay = "counts",
        altExpName = "featureSubset",
        model,
        paramsTest,
        paramsFixed = NULL,
        maxIter = 200,
        nchains = 3,
        cores = 1,
        bestOnly = TRUE,
        seed = 12345,
        perplexity = TRUE,
        verbose = TRUE,
        logfilePrefix = "Celda") {
    standardGeneric("celdaGridSearch")})


#' @rdname celdaGridSearch
#' @export
setMethod("celdaGridSearch",
    signature(x = "SingleCellExperiment"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        model,
        paramsTest,
        paramsFixed = NULL,
        maxIter = 200,
        nchains = 3,
        cores = 1,
        bestOnly = TRUE,
        seed = 12345,
        perplexity = TRUE,
        verbose = TRUE,
        logfilePrefix = "Celda") {

        xClass <- "SingleCellExperiment"

        if (!altExpName %in% SingleCellExperiment::altExpNames(x)) {
            stop(altExpName, " not in 'altExpNames(x)'. Run ",
                "selectFeatures(x) first!")
        }

        altExp <- SingleCellExperiment::altExp(x, altExpName)

        if (!useAssay %in% SummarizedExperiment::assayNames(altExp)) {
            stop(useAssay, " not in assayNames(altExp(x, altExpName))")
        }

        counts <- SummarizedExperiment::assay(altExp, i = useAssay)

        celdaList <- .celdaGridSearch(counts = counts,
            model = paste0(".", model),
            paramsTest = paramsTest,
            paramsFixed = paramsFixed,
            maxIter = maxIter,
            nchains = nchains,
            cores = cores,
            bestOnly = bestOnly,
            seed = seed,
            perplexity = perplexity,
            verbose = verbose,
            logfilePrefix = logfilePrefix)

        altExp <- .createSCEceldaGridSearch(celdaList = celdaList,
            sce = altExp,
            xClass = xClass,
            useAssay = useAssay,
            model = model,
            paramsTest = paramsTest,
            paramsFixed = paramsFixed,
            maxIter = maxIter,
            seed = seed,
            nchains = nchains,
            cores = cores,
            bestOnly = bestOnly,
            perplexity = perplexity,
            verbose = verbose,
            logfilePrefix = logfilePrefix)
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    })


#' @rdname celdaGridSearch
#' @export
setMethod("celdaGridSearch",
    signature(x = "matrix"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        model,
        paramsTest,
        paramsFixed = NULL,
        maxIter = 200,
        nchains = 3,
        cores = 1,
        bestOnly = TRUE,
        seed = 12345,
        perplexity = TRUE,
        verbose = TRUE,
        logfilePrefix = "Celda") {

        ls <- list()
        ls[[useAssay]] <- x
        sce <- SingleCellExperiment::SingleCellExperiment(assays = ls)
        SingleCellExperiment::altExp(sce, altExpName) <- sce
        xClass <- "matrix"

        celdaList <- .celdaGridSearch(counts = x,
            model = paste0(".", model),
            paramsTest = paramsTest,
            paramsFixed = paramsFixed,
            maxIter = maxIter,
            nchains = nchains,
            cores = cores,
            bestOnly = bestOnly,
            seed = seed,
            perplexity = perplexity,
            verbose = verbose,
            logfilePrefix = logfilePrefix)

        altExp <- .createSCEceldaGridSearch(celdaList = celdaList,
            sce = SingleCellExperiment::altExp(sce, altExpName),
            xClass = xClass,
            useAssay = useAssay,
            model = model,
            paramsTest = paramsTest,
            paramsFixed = paramsFixed,
            maxIter = maxIter,
            seed = seed,
            nchains = nchains,
            cores = cores,
            bestOnly = bestOnly,
            perplexity = perplexity,
            verbose = verbose,
            logfilePrefix = logfilePrefix)
        SingleCellExperiment::altExp(sce, altExpName) <- altExp
        return(sce)
    })


.celdaGridSearch <- function(counts,
                            model,
                            paramsTest,
                            paramsFixed,
                            maxIter,
                            nchains,
                            cores,
                            bestOnly,
                            seed,
                            perplexity,
                            verbose,
                            logfilePrefix) {

  ## Check parameters
  .validateCounts(counts)

  modelParams <- as.list(formals(model))
  if (!all(names(paramsTest) %in% names(modelParams))) {
    badParams <- setdiff(names(paramsTest), names(modelParams))
    stop(
      "The following elements in 'paramsTest' are not arguments of '",
      substring(model, 2),
      "': ",
      paste(badParams, collapse = ",")
    )
  }

  if (!is.null(paramsFixed) &&
    !all(names(paramsFixed) %in% names(modelParams))) {
    badParams <- setdiff(names(paramsFixed), names(modelParams))
    stop(
      "The following elements in 'paramsFixed' are not arguments",
      " of '",
      substring(model, 2),
      "': ",
      paste(badParams, collapse = ",")
    )
  }

  modelParamsRequired <- setdiff(
    names(modelParams[modelParams == ""]),
    "counts"
  )

  if (!all(modelParamsRequired %in% c(
    names(paramsTest),
    names(paramsFixed)
  ))) {
    missing.params <- setdiff(
      modelParamsRequired,
      c(names(paramsTest), names(paramsFixed))
    )
    stop(
      "The following arguments are not in 'paramsTest' or 'paramsFixed'",
      " but are required for '",
      substring(model, 2),
      "': ",
      paste(missing.params, collapse = ",")
    )
  }

  if (any(c("z.init", "y.init", "sampleLabel") %in% names(paramsTest))) {
    stop(
      "Setting parameters such as 'z.init', 'y.init', and 'sampleLabel'",
      " in 'paramsTest' is not currently supported."
    )
  }

  if (any(c("nchains") %in% names(paramsTest))) {
    warning(
      "Parameter 'nchains' should not be used within the paramsTest",
      " list"
    )
    paramsTest[["nchains"]] <- NULL
  }

  # Pre-generate a set of random seeds to be used for each chain
  if (is.null(seed)) {
    allSeeds <- NULL
  } else {
    allSeeds <- seq(seed, (seed + nchains - 1))
  }

  # Set up parameter combinations for each individual chain
  runParams <- base::expand.grid(c(
    chain = list(seq_len(nchains)),
    paramsTest
  ))
  runParams <- cbind(index = seq_len(nrow(runParams)), runParams)

  if (is.null(allSeeds)) {
    runParams <- cbind(runParams,
      seed = rep("NULL", nrow(runParams)))
  } else {
    runParams <- cbind(runParams,
      seed = rep(allSeeds, nrow(runParams) / nchains))
  }

  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = NULL,
    append = FALSE,
    verbose = verbose
  )

  .logMessages("Starting celdaGridSearch with",
    substring(model, 2),
    logfile = NULL,
    append = TRUE,
    verbose = verbose
  )

  .logMessages("Number of cores:",
    cores,
    logfile = NULL,
    append = TRUE,
    verbose = verbose
  )

  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = NULL,
    append = TRUE,
    verbose = verbose
  )

  startTime <- Sys.time()

  # An MD5 checksum of the count matrix. Passed to models so
  # later on, we can check on celda_* model objects which
  # count matrix was used.
  counts <- .processCounts(counts)
  countChecksum <- .createCountChecksum(counts)

  ## Use DoParallel to loop through each combination of parameters
  cl <- parallel::makeCluster(cores)
  doParallel::registerDoParallel(cl)
  i <- NULL # Setting visible binding for R CMD CHECK
  resList <- foreach(
    i = seq_len(nrow(runParams)),
    .export = model,
    .combine = c,
    .multicombine = TRUE
  ) %dopar% {

    ## Set up chain parameter list
    current.run <- c(runParams[i, ])
    chainParams <- list()
    for (j in names(paramsTest)) {
      chainParams[[j]] <- current.run[[j]]
    }
    chainParams$counts <- counts
    chainParams$maxIter <- maxIter
    chainParams$nchain <- 1
    chainParams$countChecksum <- countChecksum
    chainParams$verbose <- verbose
    chainParams$logfile <- paste0(
      logfilePrefix,
      "_",
      paste(paste(
        colnames(runParams), runParams[i, ],
        sep = "-"
      ), collapse = "_"),
      "_Seed-",
      ifelse(is.null(chainParams$seed), "NULL", chainParams$seed),
      "_log.txt"
    )

    ## Run model
    if (is.null(seed)) {
      res <- do.call(model, c(chainParams, paramsFixed))
    } else {
      chainSeed <- allSeeds[ifelse(i %% nchains == 0,
          nchains, i %% nchains)]
      res <- with_seed(chainSeed,
          do.call(model, c(chainParams, paramsFixed)))
    }
    return(list(res))
  }
  parallel::stopCluster(cl)

  logliks <- vapply(resList, function(mod) {
    bestLogLikelihood(mod)
  }, double(1))
  runParams <- cbind(runParams, logLikelihood = logliks)

  celdaRes <- methods::new(
    "celdaList",
    runParams = runParams,
    resList = resList,
    countChecksum = countChecksum
  )

  if (isTRUE(bestOnly)) {
    celdaRes <- selectBestModel(celdaRes, asList = TRUE)
  }

  if (isTRUE(perplexity)) {
    .logMessages(
      date(),
      ".. Calculating perplexity",
      append = TRUE,
      verbose = verbose,
      logfile = NULL
    )
    celdaRes <- resamplePerplexity(counts, celdaRes, seed = seed)
  }

  endTime <- Sys.time()
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = NULL,
    append = TRUE,
    verbose = verbose
  )
  .logMessages("Completed celdaGridSearch. Total time:",
    format(difftime(endTime, startTime)),
    logfile = NULL,
    append = TRUE,
    verbose = verbose
  )
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = NULL,
    append = TRUE,
    verbose = verbose
  )

  return(celdaRes)
}


#' @title Subset celda model from SCE object returned from
#'  \code{celdaGridSearch}
#' @description Select a subset of models from a
#'  \linkS4class{SingleCellExperiment} object generated by
#'  \link{celdaGridSearch} that match the criteria in the argument
#'  \code{params}.
#' @param x Can be one of
#' \itemize{
#'  \item A \linkS4class{SingleCellExperiment} object returned from
#'  \code{celdaGridSearch}, \code{recursiveSplitModule},
#'  or \code{recursiveSplitCell}. Must contain a list named
#'  \code{"celda_grid_search"} in \code{metadata(x)}.
#'  \item celdaList object.}
#' @param params List. List of parameters used to subset the matching celda
#'  models in list \code{"celda_grid_search"} in \code{metadata(x)}.
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @return One of
#' \itemize{
#'  \item A new \linkS4class{SingleCellExperiment} object containing
#'  all models matching the
#'  provided criteria in \code{params}. If only one celda model result in the
#'  \code{"celda_grid_search"} slot in \code{metadata(x)} matches
#'  the given criteria, a new \linkS4class{SingleCellExperiment} object
#'  with the matching model stored in the
#'  \link{metadata}
#'  \code{"celda_parameters"} slot will be returned. Otherwise, a new
#'  \linkS4class{SingleCellExperiment} object with the subset models stored
#'  in the \link{metadata}
#'  \code{"celda_grid_search"} slot will be returned.
#'  \item A new \code{celdaList} object containing all models matching the
#'  provided criteria in \code{params}. If only one item in the
#'  \code{celdaList} matches the given criteria, the matching model will be
#'  returned directly instead of a \code{celdaList} object.}
#' @seealso \link{celdaGridSearch} can run Celda with multiple parameters and
#'  chains in parallel. \link{selectBestModel} can get the best model for each
#'  combination of parameters.
#' @export
setGeneric("subsetCeldaList",
    function(x, params, altExpName = "featureSubset") {

    standardGeneric("subsetCeldaList")})


#' @rdname subsetCeldaList
#' @examples
#' data(sceCeldaCGGridSearch)
#' sceK5L10 <- subsetCeldaList(sceCeldaCGGridSearch,
#'     params = list(K = 5, L = 10))
#' @export
setMethod("subsetCeldaList",
    signature(x = "SingleCellExperiment"),
    function(x, params, altExpName = "featureSubset") {

        ## Check for bad parameter names
        if (!all(names(params) %in% colnames(runParams(x,
            altExpName = altExpName)))) {
            badParams <- setdiff(names(params),
                colnames(runParams(x, altExpName = altExpName)))
            stop("The following elements in 'params' are not columns in",
                " runParams(x, altExpName = altExpName) ",
                paste(badParams, collapse = ",")
            )
        }

        ## Subset 'runParams' based on items in 'params'
        newRunParams <- runParams(x, altExpName = altExpName)
        for (i in names(params)) {
            newRunParams <-
                subset(newRunParams, newRunParams[, i] %in% params[[i]])

            if (nrow(newRunParams) == 0) {
                stop("No runs matched the criteria given in 'params'. Check",
                    " 'runParams(x, altExpName = altExpName)' for complete",
                    " list of parameters used",
                    " to generate 'x'.")
            }
        }

        ## Get index of selected models, subset celdaList, and return
        ix <- match(newRunParams$index, runParams(x,
            altExpName = altExpName)$index)
        altExp <- SingleCellExperiment::altExp(x, altExpName)

        if (length(ix) == 1) {
            altExp <- .subsetCeldaListSCE(altExp, ix)
        } else {
            altExp@metadata$celda_grid_search@runParams <-
                as.data.frame(newRunParams)
            altExp@metadata$celda_grid_search@resList <-
                altExp@metadata$celda_grid_search@resList[ix]
        }
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    }
)


#' @rdname subsetCeldaList
#' @examples
#' data(celdaCGGridSearchRes)
#' resK5L10 <- subsetCeldaList(celdaCGGridSearchRes,
#'     params = list(K = 5, L = 10))
#' @export
setMethod("subsetCeldaList",
    signature(x = "celdaList"),
    function(x, params) {
        ## Check for bad parameter names
        if (!all(names(params) %in% colnames(runParams(x)))) {
            badParams <- setdiff(names(params), colnames(runParams(x)))
            stop("The following elements in 'params' are not columns in",
                " runParams (x) ",
                paste(badParams, collapse = ",")
            )
        }

        ## Subset 'runParams' based on items in 'params'
        newRunParams <- runParams(x)
        for (i in names(params)) {
            newRunParams <-
                subset(newRunParams, newRunParams[, i] %in% params[[i]])

            if (nrow(newRunParams) == 0) {
                stop("No runs matched the criteria given in 'params'. Check",
                    " 'runParams(x)' for complete list of parameters used",
                    " to generate 'x'.")
            }
        }

        ## Get index of selected models, subset celdaList, and return
        ix <- match(newRunParams$index, runParams(x)$index)
        if (length(ix) == 1) {
            return(resList(x)[[ix]])
        } else {
            x@runParams <- as.data.frame(newRunParams)
            x@resList <- resList(x)[ix]
            return(x)
        }
    }
)


#' @title Select best chain within each combination of parameters
#' @description Select the chain with the best log likelihood for each
#'  combination of tested parameters from a \code{SCE} object gererated by
#'  \link{celdaGridSearch} or from a \code{celdaList} object.
#' @param x Can be one of
#' \itemize{
#'  \item A \linkS4class{SingleCellExperiment} object returned from
#'  \code{celdaGridSearch}, \code{recursiveSplitModule},
#'  or \code{recursiveSplitCell}. Must contain a list named
#'  \code{"celda_grid_search"} in \code{metadata(x)}.
#'  \item celdaList object.}
#' @param asList \code{TRUE} or \code{FALSE}. Whether to return the
#'  best model as a
#'  \code{celdaList} object or not. If \code{FALSE}, return the best model as a
#'  corresponding celda model object.
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @return One of
#' \itemize{
#'  \item A new \linkS4class{SingleCellExperiment} object containing
#'  one model with the best log-likelihood for each set of parameters in
#'  \code{metadata(x)}. If there is only one set of parameters,
#'  a new \linkS4class{SingleCellExperiment} object
#'  with the matching model stored in the
#'  \link{metadata}
#'  \code{"celda_parameters"} slot will be returned. Otherwise, a new
#'  \linkS4class{SingleCellExperiment} object with the subset models stored
#'  in the \link{metadata}
#'  \code{"celda_grid_search"} slot will be returned.
#'  \item A new \code{celdaList} object containing one model with the best
#'  log-likelihood for each set of parameters. If only one set of parameters
#'  is in the \code{celdaList}, the best model will be returned directly
#'  instead of a \code{celdaList} object.}
#' @seealso \link{celdaGridSearch} \link{subsetCeldaList}
#' @export
setGeneric("selectBestModel",
    function(x, asList = FALSE, altExpName = "featureSubset") {

    standardGeneric("selectBestModel")})


#' @rdname selectBestModel
#' @examples
#' data(sceCeldaCGGridSearch)
#' ## Returns same result as running celdaGridSearch with "bestOnly = TRUE"
#' sce <- selectBestModel(sceCeldaCGGridSearch)
#' @importFrom data.table as.data.table
#' @export
setMethod("selectBestModel", signature(x = "SingleCellExperiment"),
    function(x, asList = FALSE, altExpName = "featureSubset") {

        altExp <- SingleCellExperiment::altExp(x, altExpName)
        logLikelihood <- NULL
        group <- setdiff(colnames(runParams(x, altExpName = altExpName)),
            c("index", "chain", "logLikelihood", "mean_perplexity", "seed"))
        runParams <- S4Vectors::metadata(altExp)$celda_grid_search@runParams
        dt <- data.table::as.data.table(runParams)
        .SD <- NULL # fix check note
        newRunParams <- as.data.frame(dt[, .SD[which.max(logLikelihood)],
            by = group])
        newRunParams <- newRunParams[, colnames(runParams)]

        ix <- match(newRunParams$index, runParams$index)
        if (nrow(newRunParams) == 1 & !asList) {
            altExp <- .subsetCeldaListSCE(altExp, ix)
        } else {
            altExp@metadata$celda_grid_search@runParams <-
                as.data.frame(newRunParams)
            altExp@metadata$celda_grid_search@resList <-
                altExp@metadata$celda_grid_search@resList[ix]
        }
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    }
)


#' @rdname selectBestModel
#' @examples
#' data(celdaCGGridSearchRes)
#' ## Returns same result as running celdaGridSearch with "bestOnly = TRUE"
#' cgsBest <- selectBestModel(celdaCGGridSearchRes)
#' @importFrom data.table as.data.table
#' @export
setMethod("selectBestModel", signature(x = "celdaList"),
    function(x, asList = FALSE) {
        logLikelihood <- NULL
        group <- setdiff(colnames(runParams(x)),
            c("index", "chain", "logLikelihood", "mean_perplexity", "seed"))
        dt <- data.table::as.data.table(runParams(x))
        .SD <- NULL # fix check note
        newRunParams <- as.data.frame(dt[, .SD[which.max(logLikelihood)],
            by = group])
        newRunParams <- newRunParams[, colnames(runParams(x))]

        ix <- match(newRunParams$index, runParams(x)$index)
        if (nrow(newRunParams) == 1 & !asList) {
            return(resList(x)[[ix]])
        } else {
            x@runParams <- as.data.frame(newRunParams)
            x@resList <- resList(x)[ix]
            return(x)
        }
    }
)


.createSCEceldaGridSearch <- function(celdaList,
    sce,
    xClass,
    useAssay,
    model,
    paramsTest,
    paramsFixed,
    maxIter,
    seed,
    nchains,
    cores,
    bestOnly,
    perplexity,
    verbose,
    logfilePrefix) {

    S4Vectors::metadata(sce)[["celda_grid_search"]] <- celdaList

    S4Vectors::metadata(sce)$celda_grid_search@celdaGridSearchParameters <-
        list(xClass = xClass,
            useAssay = useAssay,
            model = model,
            paramsTest = paramsTest,
            paramsFixed = paramsFixed,
            maxIter = maxIter,
            seed = seed,
            nchains = nchains,
            cores = cores,
            bestOnly = bestOnly,
            perplexity = perplexity,
            verbose = verbose,
            logfilePrefix = logfilePrefix)
    return(sce)
}


.subsetCeldaListSCE <- function(x, ix) {
    cgsparam <- x@metadata$celda_grid_search@celdaGridSearchParameters
    if (cgsparam$model == "celda_C") {
        x <- .createSCEceldaC(celdaCMod =
                x@metadata$celda_grid_search@resList[[ix]],
            sce = x,
            xClass = cgsparam$xClass,
            useAssay = cgsparam$useAssay,
            algorithm = cgsparam$algorithm,
            stopIter = cgsparam$stopIter,
            maxIter = cgsparam$maxIter,
            splitOnIter = cgsparam$splitOnIter,
            splitOnLast = cgsparam$splitOnLast,
            nchains = cgsparam$nchains,
            zInitialize = cgsparam[["zInitialize"]],
            zInit = cgsparam[["zInit"]],
            logfile = cgsparam$logfile,
            verbose = cgsparam$verbose)
    } else if (cgsparam$model == "celda_G") {
        x <- .createSCEceldaG(celdaGMod =
                x@metadata$celda_grid_search@resList[[ix]],
            sce = x,
            xClass = cgsparam$xClass,
            useAssay = cgsparam$useAssay,
            stopIter = cgsparam$stopIter,
            maxIter = cgsparam$maxIter,
            splitOnIter = cgsparam$splitOnIter,
            splitOnLast = cgsparam$splitOnLast,
            nchains = cgsparam$nchains,
            yInitialize = cgsparam[["yInitialize"]],
            yInit = cgsparam[["yInit"]],
            logfile = cgsparam$logfile,
            verbose = cgsparam$verbose)
    } else if (cgsparam$model == "celda_CG") {
        x <- .createSCEceldaCG(celdaCGMod =
                x@metadata$celda_grid_search@resList[[ix]],
            sce = x,
            xClass = cgsparam$xClass,
            useAssay = cgsparam$useAssay,
            algorithm = cgsparam$algorithm,
            stopIter = cgsparam$stopIter,
            maxIter = cgsparam$maxIter,
            splitOnIter = cgsparam$splitOnIter,
            splitOnLast = cgsparam$splitOnLast,
            nchains = cgsparam$nchains,
            zInitialize = cgsparam[["zInitialize"]],
            yInitialize = cgsparam[["yInitialize"]],
            zInit = cgsparam[["zInit"]],
            yInit = cgsparam[["yInit"]],
            logfile = cgsparam$logfile,
            verbose = cgsparam$verbose)
    } else {
        stop("S4Vectors::metadata(altExp(x, altExpName))$celda_grid_search@",
            "celdaGridSearchParameters$model must be",
            " one of 'celda_C', 'celda_G', or 'celda_CG'")
    }
    return(x)
}


================================================
FILE: R/celdaProbabilityMap.R
================================================
#' @title Probability map for a celda model
#' @description Renders probability and relative expression heatmaps to
#'  visualize the relationship between features and cell populations (or cell
#'  populations and samples).
#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
#'  returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
#' @param useAssay A string specifying which \link{assay}
#'  slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param level Character. One of "cellPopulation" or "Sample".
#'  "cellPopulation" will display the absolute probabilities and relative
#'  normalized expression of each module in each cell population.
#'  \strong{\code{level = "cellPopulation"} only works for celda_CG \code{sce}
#'  objects}. "sample" will display the absolute probabilities and relative
#'  normalized abundance of each cell population in each sample. Default
#'  "cellPopulation".
#' @param ncols The number of colors (>1) to be in the color palette of
#'  the absolute probability heatmap.
#' @param col2 Passed to \code{col} argument of \link[ComplexHeatmap]{Heatmap}.
#'  Set color boundaries and colors for the relative expression heatmap.
#' @param title1 Passed to \code{column_title} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Figure title for the absolute probability
#'  heatmap.
#' @param title2 Passed to \code{column_title} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Figure title for the relative expression
#'  heatmap.
#' @param showColumnNames Passed to \code{show_column_names} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Show column names.
#' @param showRowNames Passed to \code{show_row_names} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Show row names.
#' @param rowNamesgp Passed to \code{row_names_gp} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Set row name font.
#' @param colNamesgp Passed to \code{column_names_gp} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Set column name font.
#' @param clusterRows Passed to \code{cluster_rows} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Cluster rows.
#' @param clusterColumns Passed to \code{cluster_columns} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Cluster columns.
#' @param showHeatmapLegend Passed to \code{show_heatmap_legend} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Show heatmap legend.
#' @param heatmapLegendParam Passed to \code{heatmap_legend_param} argument of
#'  \link[ComplexHeatmap]{Heatmap}. Heatmap legend parameters.
#' @param ... Additional parameters passed to \link[ComplexHeatmap]{Heatmap}.
#' @seealso \link{celda_C} for clustering cells. \link{celda_CG} for
#'  clustering features and cells
#' @return A \link[ComplexHeatmap]{HeatmapList} object containing 2
#'  \link[ComplexHeatmap]{Heatmap-class} objects
#' @export
setGeneric("celdaProbabilityMap",
    function(sce,
        useAssay = "counts",
        altExpName = "featureSubset",
        level = c("cellPopulation", "sample"),
        ncols = 100,
        col2 = circlize::colorRamp2(c(-2, 0, 2),
            c("#1E90FF", "#FFFFFF", "#CD2626")),
        title1 = "Absolute probability",
        title2 = "Relative expression",
        showColumnNames = TRUE,
        showRowNames = TRUE,
        rowNamesgp = grid::gpar(fontsize = 8),
        colNamesgp = grid::gpar(fontsize = 12),
        clusterRows = FALSE,
        clusterColumns = FALSE,
        showHeatmapLegend = TRUE,
        heatmapLegendParam = list(title = NULL,
            legend_height = grid::unit(6, "cm")),
        ...) {

        standardGeneric("celdaProbabilityMap")
    })


#' @rdname celdaProbabilityMap
#' @importFrom RColorBrewer brewer.pal
#' @importFrom grDevices colorRampPalette
#' @examples
#' data(sceCeldaCG)
#' celdaProbabilityMap(sceCeldaCG)
#' @export
setMethod("celdaProbabilityMap", signature(sce = "SingleCellExperiment"),
    function(sce,
        useAssay = "counts",
        altExpName = "featureSubset",
        level = c("cellPopulation", "sample"),
        ncols = 100,
        col2 = circlize::colorRamp2(c(-2, 0, 2),
            c("#1E90FF", "#FFFFFF", "#CD2626")),
        title1 = "Absolute probability",
        title2 = "Relative expression",
        showColumnNames = TRUE,
        showRowNames = TRUE,
        rowNamesgp = grid::gpar(fontsize = 8),
        colNamesgp = grid::gpar(fontsize = 12),
        clusterRows = FALSE,
        clusterColumns = FALSE,
        showHeatmapLegend = TRUE,
        heatmapLegendParam = list(title = NULL,
            legend_height = grid::unit(6, "cm")),
        ...) {

        level <- match.arg(level)
        if (celdaModel(sce, altExpName = altExpName) == "celda_C") {
            if (level == "cellPopulation") {
                warning("'level' has been set to 'sample'")
            }
            pm <- .celdaProbabilityMapC(sce = sce,
                useAssay = useAssay,
                altExpName = altExpName,
                level = "sample",
                ncols = ncols,
                col2 = col2,
                title1 = title1,
                title2 = title2,
                showColumnNames = showColumnNames,
                showRowNames = showRowNames,
                rowNamesgp = rowNamesgp,
                colNamesgp = colNamesgp,
                clusterRows = clusterRows,
                clusterColumns = clusterColumns,
                showHeatmapLegend = showHeatmapLegend,
                heatmapLegendParam = heatmapLegendParam,
                ...)
        } else if (celdaModel(sce, altExpName = altExpName) == "celda_CG") {
            pm <- .celdaProbabilityMapCG(sce = sce,
                useAssay = useAssay,
                altExpName = altExpName,
                level = level,
                ncols = ncols,
                col2 = col2,
                title1 = title1,
                title2 = title2,
                showColumnNames = showColumnNames,
                showRowNames = showRowNames,
                rowNamesgp = rowNamesgp,
                colNamesgp = colNamesgp,
                clusterRows = clusterRows,
                clusterColumns = clusterColumns,
                showHeatmapLegend = showHeatmapLegend,
                heatmapLegendParam = heatmapLegendParam,
                ...)
        } else {
            stop("S4Vectors::metadata(altExp(sce,",
                " altExpName))$celda_parameters$model must be",
                " one of 'celda_C', or 'celda_CG'!")
        }
        return(pm)
    }
)


.celdaProbabilityMapC <- function(sce,
    useAssay,
    altExpName,
    level,
    ncols,
    col2,
    title1,
    title2,
    showColumnNames,
    showRowNames,
    rowNamesgp,
    colNamesgp,
    clusterRows,
    clusterColumns,
    showHeatmapLegend,
    heatmapLegendParam,
    ...) {

    altExp <- SingleCellExperiment::altExp(sce, altExpName)

    zInclude <- which(tabulate(SummarizedExperiment::colData(
        altExp)$celda_cell_cluster,
        S4Vectors::metadata(altExp)$celda_parameters$K) > 0)

    factorized <- factorizeMatrix(x = sce, useAssay = useAssay,
        type = "proportion")

    samp <- factorized$proportions$sample[zInclude, , drop = FALSE]
    col1 <- grDevices::colorRampPalette(c("white",
        "blue",
        "midnightblue",
        "springgreen4",
        "yellowgreen",
        "yellow",
        "orange",
        "red"))(100)
    breaks <- seq(0, 1, length.out = length(col1))

    g1 <- ComplexHeatmap::Heatmap(matrix = samp,
        col = circlize::colorRamp2(breaks, col1),
        column_title = title1,
        show_column_names = showColumnNames,
        show_row_names = showRowNames,
        row_names_gp = rowNamesgp,
        column_names_gp = colNamesgp,
        cluster_rows = clusterRows,
        cluster_columns = clusterColumns,
        show_heatmap_legend = showHeatmapLegend,
        heatmap_legend_param = heatmapLegendParam,
        ...)

    if (ncol(samp) > 1) {
        sampNorm <- normalizeCounts(samp,
            normalize = "proportion",
            transformationFun = sqrt,
            scaleFun = base::scale)

        g2 <- ComplexHeatmap::Heatmap(matrix = sampNorm,
            col = col2,
            column_title = title2,
            show_column_names = showColumnNames,
            show_row_names = showRowNames,
            row_names_gp = rowNamesgp,
            column_names_gp = colNamesgp,
            cluster_rows = clusterRows,
            cluster_columns = clusterColumns,
            show_heatmap_legend = showHeatmapLegend,
            heatmap_legend_param = heatmapLegendParam,
            ...)
        return(g1 + g2)
    } else {
        return(g1)
    }
}


.celdaProbabilityMapCG <- function(sce,
    useAssay,
    altExpName,
    level,
    ncols,
    col2,
    title1,
    title2,
    showColumnNames,
    showRowNames,
    rowNamesgp,
    colNamesgp,
    clusterRows,
    clusterColumns,
    showHeatmapLegend,
    heatmapLegendParam,
    ...) {

    altExp <- SingleCellExperiment::altExp(sce, altExpName)

    factorized <- factorizeMatrix(x = sce, useAssay = useAssay,
        altExpName = altExpName,
        type = c("counts", "proportion"))
    zInclude <- which(tabulate(SummarizedExperiment::colData(
        altExp)$celda_cell_cluster,
        S4Vectors::metadata(altExp)$celda_parameters$K) > 0)
    yInclude <- which(tabulate(SummarizedExperiment::rowData(
        altExp)$celda_feature_module,
        S4Vectors::metadata(altExp)$celda_parameters$L) > 0)

    if (level == "cellPopulation") {
        pop <- factorized$proportions$cellPopulation[yInclude,
            zInclude,
            drop = FALSE]
        popNorm <- normalizeCounts(pop,
            normalize = "proportion",
            transformationFun = sqrt,
            scaleFun = base::scale)

        percentile9 <- round(stats::quantile(pop, .9), digits = 2) * 100
        cols11 <- grDevices::colorRampPalette(c("white",
            RColorBrewer::brewer.pal(n = 9, name = "Blues")))(percentile9)
        cols12 <- grDevices::colorRampPalette(c("midnightblue",
            c("springgreen4", "Yellowgreen", "Yellow", "Orange",
                "Red")))(ncols - percentile9)
        col1 <- c(cols11, cols12)
        breaks <- seq(0, 1, length.out = length(col1))

        g1 <- ComplexHeatmap::Heatmap(matrix = pop,
            col = circlize::colorRamp2(breaks, col1),
            column_title = title1,
            show_column_names = showColumnNames,
            show_row_names = showRowNames,
            row_names_gp = rowNamesgp,
            column_names_gp = colNamesgp,
            cluster_rows = clusterRows,
            cluster_columns = clusterColumns,
            show_heatmap_legend = showHeatmapLegend,
            heatmap_legend_param = heatmapLegendParam,
            ...)
        g2 <- ComplexHeatmap::Heatmap(matrix = popNorm,
            col = col2,
            column_title = title2,
            show_column_names = showColumnNames,
            show_row_names = showRowNames,
            row_names_gp = rowNamesgp,
            column_names_gp = colNamesgp,
            cluster_rows = clusterRows,
            cluster_columns = clusterColumns,
            show_heatmap_legend = showHeatmapLegend,
            heatmap_legend_param = heatmapLegendParam,
            ...)
        return(g1 + g2)
    } else {
        samp <- factorized$proportions$sample
        col1 <- grDevices::colorRampPalette(c(
            "white",
            "blue",
            "#08306B",
            "#006D2C",
            "yellowgreen",
            "yellow",
            "orange",
            "red"
        ))(100)
        breaks <- seq(0, 1, length.out = length(col1))

        g1 <- ComplexHeatmap::Heatmap(matrix = samp,
            col = circlize::colorRamp2(breaks, col1),
            column_title = title1,
            show_column_names = showColumnNames,
            show_row_names = showRowNames,
            row_names_gp = rowNamesgp,
            column_names_gp = colNamesgp,
            cluster_rows = clusterRows,
            cluster_columns = clusterColumns,
            show_heatmap_legend = showHeatmapLegend,
            heatmap_legend_param = heatmapLegendParam,
            ...)

        if (ncol(samp) > 1) {
            sampNorm <- normalizeCounts(factorized$counts$sample,
                normalize = "proportion",
                transformationFun = sqrt,
                scaleFun = base::scale)
            g2 <- ComplexHeatmap::Heatmap(matrix = sampNorm,
                col = col2,
                column_title = title2,
                show_column_names = showColumnNames,
                show_row_names = showRowNames,
                row_names_gp = rowNamesgp,
                column_names_gp = colNamesgp,
                cluster_rows = clusterRows,
                cluster_columns = clusterColumns,
                show_heatmap_legend = showHeatmapLegend,
                heatmap_legend_param = heatmapLegendParam,
                ...)
            return(g1 + g2)
        } else {
            return(g1 + g2)
        }
    }
}


================================================
FILE: R/celdaUMAP.R
================================================
#' @title Uniform Manifold Approximation and Projection (UMAP) dimension
#'  reduction for celda \code{sce} object
#' @description Embeds cells in two dimensions using \link[uwot]{umap} based on
#'  a celda model. For celda_C \code{sce} objects, PCA on the normalized counts
#'  is used to reduce the number of features before applying UMAP. For celda_CG
#'  \code{sce} object, UMAP is run on module probabilities to reduce the number
#'  of features instead of using PCA. Module probabilities are square-root
#'  transformed before applying UMAP.
#' @param sce A \link[SingleCellExperiment]{SingleCellExperiment} object
#'  returned by \link{celda_C}, \link{celda_G}, or \link{celda_CG}.
#' @param useAssay A string specifying which \link{assay}
#'  slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param maxCells Integer. Maximum number of cells to plot. Cells will be
#'  randomly subsampled if \code{ncol(sce) > maxCells}. Larger numbers of cells
#'  requires more memory. If NULL, no subsampling will be performed.
#'  Default NULL.
#' @param minClusterSize Integer. Do not subsample cell clusters below this
#'  threshold. Default 100.
#' @param modules Integer vector. Determines which features modules to use for
#'  UMAP. If NULL, all modules will be used. Default NULL.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#'  a default value of 12345 is used. If NULL, no calls to
#'  \link[withr]{with_seed} are made.
#' @param nNeighbors The size of local neighborhood used for
#'  manifold approximation. Larger values result in more global
#'  views of the manifold, while smaller values result in more
#'  local data being preserved. Default 30.
#'  See \link[uwot]{umap} for more information.
#' @param minDist The effective minimum distance between embedded points.
#'  Smaller values will result in a more clustered/clumped
#'  embedding where nearby points on the manifold are drawn
#'  closer together, while larger values will result on a more
#'  even dispersal of points. Default 0.75.
#'  See \link[uwot]{umap} for more information.
#' @param spread The effective scale of embedded points. In combination with
#'  \code{min_dist}, this determines how clustered/clumped the
#'   embedded points are. Default 1. See \link[uwot]{umap} for more information.
#' @param pca Logical. Whether to perform
#'  dimensionality reduction with PCA before UMAP. Only works for celda_C
#'  \code{sce} objects.
#' @param initialDims Integer. Number of dimensions from PCA to use as
#'  input in UMAP. Default 50. Only works for celda_C \code{sce} objects.
#' @param normalize Character. Passed to \link{normalizeCounts} in
#'  normalization step. Divides counts by the library sizes for each
#'  cell. One of 'proportion', 'cpm', 'median', or 'mean'. 'proportion' uses
#'  the total counts for each cell as the library size. 'cpm' divides the
#'  library size of each cell by one million to produce counts per million.
#'  'median' divides the library size of each cell by the median library size
#'  across all cells. 'mean' divides the library size of each cell by the mean
#'  library size across all cells.
#' @param scaleFactor Numeric. Sets the scale factor for cell-level
#'  normalization. This scale factor is multiplied to each cell after the
#'  library size of each cell had been adjusted in \code{normalize}. Default
#'  \code{NULL} which means no scale factor is applied.
#' @param transformationFun Function. Applys a transformation such as 'sqrt',
#'  'log', 'log2', 'log10', or 'log1p'. If \code{NULL}, no transformation will
#'  be applied. Occurs after applying normalization and scale factor. Default
#'  \code{NULL}.
#' @param cores Number of threads to use. Default 1.
#' @param ... Additional parameters to pass to \link[uwot]{umap}.
#' @return \code{sce} with UMAP coordinates
#'  (columns "celda_UMAP1" & "celda_UMAP2") added to
#'  \code{\link{reducedDim}(sce, "celda_UMAP")}.
#' @export
setGeneric("celdaUmap",
    function(sce,
        useAssay = "counts",
        altExpName = "featureSubset",
        maxCells = NULL,
        minClusterSize = 100,
        modules = NULL,
        seed = 12345,
        nNeighbors = 30,
        minDist = 0.75,
        spread = 1,
        pca = TRUE,
        initialDims = 50,
        normalize = "proportion",
        scaleFactor = NULL,
        transformationFun = sqrt,
        cores = 1,
        ...) {

        standardGeneric("celdaUmap")
    })


#' @rdname celdaUmap
#' @examples
#' data(sceCeldaCG)
#' umapRes <- celdaUmap(sceCeldaCG)
#' @export
setMethod("celdaUmap", signature(sce = "SingleCellExperiment"),
    function(sce,
        useAssay = "counts",
        altExpName = "featureSubset",
        maxCells = NULL,
        minClusterSize = 100,
        modules = NULL,
        seed = 12345,
        nNeighbors = 30,
        minDist = 0.75,
        spread = 1,
        pca = TRUE,
        initialDims = 50,
        normalize = "proportion",
        scaleFactor = NULL,
        transformationFun = sqrt,
        cores = 1,
        ...) {

        if (is.null(seed)) {
            sce <- .celdaUmap(sce = sce,
                useAssay = useAssay,
                altExpName = altExpName,
                maxCells = maxCells,
                minClusterSize = minClusterSize,
                modules = modules,
                seed = seed,
                nNeighbors = nNeighbors,
                minDist = minDist,
                spread = spread,
                pca = pca,
                initialDims = initialDims,
                normalize = normalize,
                scaleFactor = scaleFactor,
                transformationFun = transformationFun,
                cores = cores,
                ...)
        } else {
            with_seed(seed,
                sce <- .celdaUmap(sce = sce,
                    useAssay = useAssay,
                    altExpName = altExpName,
                    maxCells = maxCells,
                    minClusterSize = minClusterSize,
                    modules = modules,
                    seed = seed,
                    nNeighbors = nNeighbors,
                    minDist = minDist,
                    spread = spread,
                    pca = pca,
                    initialDims = initialDims,
                    normalize = normalize,
                    scaleFactor = scaleFactor,
                    transformationFun = transformationFun,
                    cores = cores,
                    ...))
        }
        return(sce)
    })


.celdaUmap <- function(sce,
    useAssay,
    altExpName,
    maxCells,
    minClusterSize,
    modules,
    seed,
    nNeighbors,
    minDist,
    spread,
    pca,
    initialDims,
    cores,
    normalize,
    scaleFactor,
    transformationFun,
    ...) {

    celdaMod <- celdaModel(sce, altExpName = altExpName)
    altExp <- SingleCellExperiment::altExp(sce, altExpName)

    if (celdaMod == "celda_C") {
        res <- .celdaUmapC(sce = altExp,
            useAssay = useAssay,
            maxCells = maxCells,
            minClusterSize = minClusterSize,
            nNeighbors = nNeighbors,
            minDist = minDist,
            spread = spread,
            pca = pca,
            initialDims = initialDims,
            normalize = normalize,
            scaleFactor = scaleFactor,
            transformationFun = transformationFun,
            cores = cores,
            ...)
    } else if (celdaMod == "celda_CG") {
        res <- .celdaUmapCG(sce = altExp,
            useAssay = useAssay,
            maxCells = maxCells,
            minClusterSize = minClusterSize,
            modules = modules,
            seed = seed,
            nNeighbors = nNeighbors,
            minDist = minDist,
            spread = spread,
            normalize = normalize,
            scaleFactor = scaleFactor,
            transformationFun = transformationFun,
            cores = cores,
            ...)
    } else if (celdaMod == "celda_G") {
        res <- .celdaUmapG(sce = altExp,
            useAssay = useAssay,
            maxCells = maxCells,
            minClusterSize = minClusterSize,
            modules = modules,
            seed = seed,
            nNeighbors = nNeighbors,
            minDist = minDist,
            spread = spread,
            normalize = normalize,
            scaleFactor = scaleFactor,
            transformationFun = transformationFun,
            cores = cores,
            ...)
    } else {
        stop("S4Vectors::metadata(altExp(sce, altExpName))$",
            "celda_parameters$model must be",
            " one of 'celda_C', 'celda_G', or 'celda_CG'")
    }
    SingleCellExperiment::reducedDim(altExp, "celda_UMAP") <- res
    SingleCellExperiment::altExp(sce, altExpName) <- altExp
    return(sce)
}


.celdaUmapC <- function(sce,
    useAssay,
    maxCells,
    minClusterSize,
    nNeighbors,
    minDist,
    spread,
    pca,
    initialDims,
    normalize,
    scaleFactor,
    transformationFun,
    cores,
    ...) {

    preparedCountInfo <- .prepareCountsForDimReductionCeldaC(sce = sce,
        useAssay = useAssay,
        maxCells = maxCells,
        minClusterSize = minClusterSize,
        normalize = normalize,
        scaleFactor = scaleFactor,
        transformationFun = transformationFun)
    umapRes <- .calculateUmap(preparedCountInfo$norm,
        nNeighbors = nNeighbors,
        minDist = minDist,
        spread = spread,
        pca = pca,
        initialDims = initialDims,
        cores = cores,
        ...
    )

    final <- matrix(NA, nrow = ncol(sce), ncol = 2)
    final[preparedCountInfo$cellIx, ] <- umapRes
    rownames(final) <- colnames(sce)
    colnames(final) <- c("celda_UMAP1", "celda_UMAP2")
    return(final)
}


.celdaUmapCG <- function(sce,
    useAssay,
    maxCells,
    minClusterSize,
    modules,
    seed,
    nNeighbors,
    minDist,
    spread,
    normalize,
    scaleFactor,
    transformationFun,
    cores,
    ...) {

    preparedCountInfo <- .prepareCountsForDimReductionCeldaCG(sce = sce,
        useAssay = useAssay,
        maxCells = maxCells,
        minClusterSize = minClusterSize,
        modules = modules,
        normalize = normalize,
        scaleFactor = scaleFactor,
        transformationFun = transformationFun)
    umapRes <- .calculateUmap(preparedCountInfo$norm,
        nNeighbors = nNeighbors,
        minDist = minDist,
        spread = spread,
        cores = cores,
        ...)

    final <- matrix(NA, nrow = ncol(sce), ncol = 2)
    final[preparedCountInfo$cellIx, ] <- umapRes
    rownames(final) <- colnames(sce)
    colnames(final) <- c("celda_UMAP1", "celda_UMAP2")
    return(final)
}


.celdaUmapG <- function(sce,
    useAssay,
    maxCells,
    minClusterSize,
    modules,
    seed,
    nNeighbors,
    minDist,
    spread,
    normalize,
    scaleFactor,
    transformationFun,
    cores,
    ...) {

    preparedCountInfo <- .prepareCountsForDimReductionCeldaG(sce = sce,
        useAssay = useAssay,
        maxCells = maxCells,
        minClusterSize = minClusterSize,
        modules = modules,
        normalize = normalize,
        scaleFactor = scaleFactor,
        transformationFun = transformationFun)
    umapRes <- .calculateUmap(preparedCountInfo$norm,
        nNeighbors = nNeighbors,
        minDist = minDist,
        spread = spread,
        cores = cores,
        ...)

    final <- matrix(NA, nrow = ncol(sce), ncol = 2)
    final[preparedCountInfo$cellIx, ] <- umapRes
    rownames(final) <- colnames(sce)
    colnames(final) <- c("celda_UMAP1", "celda_UMAP2")
    return(final)
}


# Run the UMAP algorithm for dimensionality reduction
# @param norm Normalized count matrix.
# @param nNeighbors The size of local neighborhood used for
#   manifold approximation. Larger values result in more global
#   views of the manifold, while smaller values result in more
#   local data being preserved. Default 30.
#    See `?uwot::umap` for more information.
# @param minDist The effective minimum distance between embedded points.
#    Smaller values will result in a more clustered/clumped
#    embedding where nearby points on the manifold are drawn
#    closer together, while larger values will result on a more
#    even dispersal of points. Default 0.2.
#    See `?uwot::umap` for more information.
# @param spread The effective scale of embedded points. In combination with
#    'min_dist', this determines how clustered/clumped the
#    embedded points are. Default 1.
#    See `?uwot::umap` for more information.
# @param pca Logical. Whether to perform
# dimensionality reduction with PCA before UMAP.
# @param initialDims Integer. Number of dimensions from PCA to use as
# input in UMAP. Default 50.
# @param cores Number of threads to use. Default 1.
# @param ... Other parameters to pass to `uwot::umap`.
#' @import uwot
.calculateUmap <- function(norm,
    nNeighbors = 30,
    minDist = 0.75,
    spread = 1,
    pca = FALSE,
    initialDims = 50,
    cores = 1,
    ...) {
    if (isTRUE(pca)) {
        doPCA <- initialDims
    } else {
        doPCA <- NULL
    }

    res <- uwot::umap(norm,
        n_neighbors = nNeighbors,
        min_dist = minDist, spread = spread,
        n_threads = cores, n_sgd_threads = 1, pca = doPCA, ...
    )
    return(res)
}


================================================
FILE: R/celda_C.R
================================================
#' @title Cell clustering with Celda
#' @description Clusters the columns of a count matrix containing single-cell
#'  data into K subpopulations. The
#'  \code{useAssay} \link{assay} slot in
#'  \code{altExpName} \link{altExp} slot will be used if
#'  it exists. Otherwise, the \code{useAssay}
#'  \link{assay} slot in \code{x} will be used if
#'  \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @param x A \linkS4class{SingleCellExperiment}
#'  with the matrix located in the assay slot under \code{useAssay}.
#'  Rows represent features and columns represent cells. Alternatively,
#'  any matrix-like object that can be coerced to a sparse matrix of class
#'  "dgCMatrix" can be directly used as input. The matrix will automatically be
#'  converted to a \linkS4class{SingleCellExperiment} object.
#' @param useAssay A string specifying the name of the
#'  \link{assay} slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param sampleLabel Vector or factor. Denotes the sample label for each cell
#'  (column) in the count matrix.
#' @param K Integer. Number of cell populations.
#' @param alpha Numeric. Concentration parameter for Theta. Adds a pseudocount
#'  to each cell population in each sample. Default 1.
#' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to
#'  each feature in each cell population. Default 1.
#' @param algorithm String. Algorithm to use for clustering cell subpopulations.
#'  One of 'EM' or 'Gibbs'. The EM algorithm is faster, especially for larger
#'  numbers of cells. However, more chains may be required to ensure a good
#'  solution is found. If 'EM' is selected, then 'stopIter' will be
#'  automatically set to 1. Default 'EM'.
#' @param stopIter Integer. Number of iterations without improvement in the
#'  log likelihood to stop inference. Default 10.
#' @param maxIter Integer. Maximum number of iterations of Gibbs sampling or
#'  EM to perform. Default 200.
#' @param splitOnIter Integer. On every `splitOnIter` iteration, a heuristic
#'  will be applied to determine if a cell population should be reassigned and
#'  another cell population should be split into two clusters. To disable
#'  splitting, set to -1. Default 10.
#' @param splitOnLast Integer. After `stopIter` iterations have been
#'  performed without improvement, a heuristic will be applied to determine if
#'  a cell population should be reassigned and another cell population should be
#'  split into two clusters. If a split occurs, then `stopIter` will be reset.
#'  Default TRUE.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#'  a default value of 12345 is used. If NULL, no calls to
#'  \link[withr]{with_seed} are made.
#' @param nchains Integer. Number of random cluster initializations. Default 3.
#' @param zInitialize Character. One of 'random', 'split', or 'predefined'.
#'  With 'random', cells are randomly assigned to a populations. With 'split',
#'  cells will be split into sqrt(K) populations and then each population will
#'  be subsequently split into another sqrt(K) populations. With 'predefined',
#'  values in `zInit` will be used to initialize `z`. Default 'split'.
#' @param zInit Integer vector. Sets initial starting values of z. 'zInit'
#'  is only used when `zInitialize = 'predfined'`. Default NULL.
#' @param countChecksum Character. An MD5 checksum for the `counts` matrix.
#'  Default NULL.
#' @param logfile Character. Messages will be redirected to a file named
#'  `logfile`. If NULL, messages will be printed to stdout.  Default NULL.
#' @param verbose Logical. Whether to print log messages. Default TRUE.
#' @return A \link[SingleCellExperiment]{SingleCellExperiment} object. Function
#'  parameter settings are stored in the \link{metadata}
#'  \code{"celda_parameters"} slot.
#'  Columns \code{celda_sample_label} and \code{celda_cell_cluster} in
#'  \link{colData} contain sample labels and celda cell
#'  population clusters.
#' @seealso \link{celda_G} for feature clustering and \link{celda_CG} for
#'  simultaneous clustering of features and cells. \link{celdaGridSearch} can
#'  be used to run multiple values of K and multiple chains in parallel.
#' @examples
#' data(celdaCSim)
#' sce <- celda_C(celdaCSim$counts,
#'     K = celdaCSim$K,
#'     sampleLabel = celdaCSim$sampleLabel,
#'     nchains = 1)
#' @import Rcpp RcppEigen
#' @importFrom withr with_seed
#' @export
setGeneric("celda_C",
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        sampleLabel = NULL,
        K,
        alpha = 1,
        beta = 1,
        algorithm = c("EM", "Gibbs"),
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        zInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        zInit = NULL,
        logfile = NULL,
        verbose = TRUE) {
    standardGeneric("celda_C")})


#' @rdname celda_C
#' @export
setMethod("celda_C",
    signature(x = "SingleCellExperiment"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        sampleLabel = NULL,
        K,
        alpha = 1,
        beta = 1,
        algorithm = c("EM", "Gibbs"),
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        zInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        zInit = NULL,
        logfile = NULL,
        verbose = TRUE) {

        xClass <- "SingleCellExperiment"

        if (!altExpName %in% SingleCellExperiment::altExpNames(x)) {
            stop(altExpName, " not in 'altExpNames(x)'. Run ",
                "selectFeatures(x) first!")
        }

        altExp <- SingleCellExperiment::altExp(x, altExpName)

        if (!useAssay %in% SummarizedExperiment::assayNames(altExp)) {
            stop(useAssay, " not in assayNames(altExp(x, altExpName))")
        }

        counts <- SummarizedExperiment::assay(altExp, i = useAssay)

        altExp <- .celdaCWithSeed(counts = counts,
            xClass = xClass,
            useAssay = useAssay,
            sce = altExp,
            sampleLabel = sampleLabel,
            K = K,
            alpha = alpha,
            beta = beta,
            algorithm = match.arg(algorithm),
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            seed = seed,
            nchains = nchains,
            zInitialize = match.arg(zInitialize),
            countChecksum = countChecksum,
            zInit = zInit,
            logfile = logfile,
            verbose = verbose)
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    }
)


#' @rdname celda_C
#' @export
setMethod("celda_C",
    signature(x = "ANY"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        sampleLabel = NULL,
        K,
        alpha = 1,
        beta = 1,
        algorithm = c("EM", "Gibbs"),
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        zInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        zInit = NULL,
        logfile = NULL,
        verbose = TRUE) {

        # Convert to sparse matrix
        x <- methods::as(x, "CsparseMatrix")

        ls <- list()
        ls[[useAssay]] <- x
        sce <- SingleCellExperiment::SingleCellExperiment(assays = ls)
        SingleCellExperiment::altExp(sce, altExpName) <- sce
        xClass <- "matrix"

        altExp <- .celdaCWithSeed(counts = x,
            xClass = xClass,
            useAssay = useAssay,
            sce = SingleCellExperiment::altExp(sce, altExpName),
            sampleLabel = sampleLabel,
            K = K,
            alpha = alpha,
            beta = beta,
            algorithm = match.arg(algorithm),
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            seed = seed,
            nchains = nchains,
            zInitialize = match.arg(zInitialize),
            countChecksum = countChecksum,
            zInit = zInit,
            logfile = logfile,
            verbose = verbose)
        SingleCellExperiment::altExp(sce, altExpName) <- altExp
        return(sce)
    }
)


.celdaCWithSeed <- function(counts,
    xClass,
    useAssay,
    sce,
    sampleLabel,
    K,
    alpha,
    beta,
    algorithm,
    stopIter,
    maxIter,
    splitOnIter,
    splitOnLast,
    seed,
    nchains,
    zInitialize,
    countChecksum,
    zInit,
    logfile,
    verbose) {

    .validateCounts(counts)

    if (is.null(seed)) {
        celdaCMod <- .celda_C(counts = counts,
            sampleLabel = sampleLabel,
            K = K,
            alpha = alpha,
            beta = beta,
            algorithm = algorithm,
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            nchains = nchains,
            zInitialize = zInitialize,
            countChecksum = countChecksum,
            zInit = zInit,
            logfile = logfile,
            verbose = verbose,
            reorder = TRUE)
    } else {
        with_seed(seed,
            celdaCMod <- .celda_C(counts = counts,
                sampleLabel = sampleLabel,
                K = K,
                alpha = alpha,
                beta = beta,
                algorithm = algorithm,
                stopIter = stopIter,
                maxIter = maxIter,
                splitOnIter = splitOnIter,
                splitOnLast = splitOnLast,
                nchains = nchains,
                zInitialize = zInitialize,
                countChecksum = countChecksum,
                zInit = zInit,
                logfile = logfile,
                verbose = verbose,
                reorder = TRUE))
    }

    sce <- .createSCEceldaC(celdaCMod = celdaCMod,
        sce = sce,
        xClass = xClass,
        useAssay = useAssay,
        algorithm = algorithm,
        stopIter = stopIter,
        maxIter = maxIter,
        splitOnIter = splitOnIter,
        splitOnLast = splitOnLast,
        nchains = nchains,
        zInitialize = zInitialize,
        zInit = zInit,
        logfile = logfile,
        verbose = verbose)
    return(sce)
}


# celda_C main function
.celda_C <- function(counts,
    sampleLabel = NULL,
    K,
    alpha = 1,
    beta = 1,
    algorithm = c("EM", "Gibbs"),
    stopIter = 10,
    maxIter = 200,
    splitOnIter = 10,
    splitOnLast = TRUE,
    nchains = 3,
    zInitialize = c("split", "random", "predefined"),
    countChecksum = NULL,
    zInit = NULL,
    logfile = NULL,
    verbose = TRUE,
    reorder = TRUE) {

    .logMessages(paste(rep("-", 50), collapse = ""),
        logfile = logfile,
        append = FALSE,
        verbose = verbose)

    .logMessages("Starting Celda_C: Clustering cells.",
        logfile = logfile,
        append = TRUE,
        verbose = verbose)

    .logMessages(paste(rep("-", 50), collapse = ""),
        logfile = logfile,
        append = TRUE,
        verbose = verbose)

    startTime <- Sys.time()

    ## Error checking and variable processing
    counts <- .processCounts(counts)
    if (is.null(countChecksum)) {
        countChecksum <- .createCountChecksum(counts)
    }

    sampleLabel <- .processSampleLabels(sampleLabel, ncol(counts))
    s <- as.integer(sampleLabel)

    algorithm <- match.arg(algorithm)
    if (algorithm == "EM") {
      stopIter <- 1
    }

    algorithmFun <- ifelse(algorithm == "Gibbs",
      ".cCCalcGibbsProbZ",
      ".cCCalcEMProbZ"
    )
    zInitialize <- match.arg(zInitialize)

    allChains <- seq(nchains)

    bestResult <- NULL
    for (i in allChains) {
      ## Initialize cluster labels
      .logMessages(date(),
        ".. Initializing 'z' in chain",
        i,
        "with",
        paste0("'", zInitialize, "' "),
        logfile = logfile,
        append = TRUE,
        verbose = verbose
      )

      if (zInitialize == "predefined") {
        if (is.null(zInit)) {
          stop("'zInit' needs to specified when initilize.z == 'given'.")
        }

      z <- .initializeCluster(K,
        ncol(counts),
        initial = zInit,
        fixed = NULL
      )
    } else if (zInitialize == "split") {
      z <- .initializeSplitZ(counts,
        K = K,
        alpha = alpha,
        beta = beta
      )
    } else {
      z <- .initializeCluster(K,
        ncol(counts),
        initial = NULL,
        fixed = NULL
      )
    }

    zBest <- z

    ## Calculate counts one time up front
    p <- .cCDecomposeCounts(counts, s, z, K)
    nS <- p$nS
    nG <- p$nG
    nM <- p$nM
    mCPByS <- p$mCPByS
    nGByCP <- p$nGByCP
    nCP <- p$nCP
    nByC <- p$nByC

    ll <- .cCCalcLL(
      mCPByS = mCPByS,
      nGByCP = nGByCP,
      s = s,
      K = K,
      nS = nS,
      nG = nG,
      alpha = alpha,
      beta = beta
    )

    iter <- 1L
    numIterWithoutImprovement <- 0L
    doCellSplit <- TRUE
    while (iter <= maxIter & numIterWithoutImprovement <= stopIter) {
      nextZ <- do.call(algorithmFun, list(
        counts = counts,
        mCPByS = mCPByS,
        nGByCP = nGByCP,
        nByC = nByC,
        nCP = nCP,
        z = z,
        s = s,
        K = K,
        nG = nG,
        nM = nM,
        alpha = alpha,
        beta = beta
      ))

      mCPByS <- nextZ$mCPByS
      nGByCP <- nextZ$nGByCP
      nCP <- nextZ$nCP
      z <- nextZ$z

      ## Perform split on i-th iteration of no improvement in log
      ## likelihood
      tempLl <- .cCCalcLL(
        mCPByS = mCPByS,
        nGByCP = nGByCP,
        s = s,
        K = K,
        nS = nS,
        nG = nG,
        alpha = alpha,
        beta = beta
      )

      if (K > 2 & iter != maxIter &
        ((((numIterWithoutImprovement == stopIter &
          !all(tempLl >= ll))) & isTRUE(splitOnLast)) |
          (splitOnIter > 0 & iter %% splitOnIter == 0 &
            isTRUE(doCellSplit)))) {
        .logMessages(date(),
          " .... Determining if any cell clusters should be split.",
          logfile = logfile,
          append = TRUE,
          sep = "",
          verbose = verbose
        )

        res <- .cCSplitZ(
          counts,
          mCPByS,
          nGByCP,
          nCP,
          s,
          z,
          K,
          nS,
          nG,
          alpha,
          beta,
          zProb = t(as.matrix(nextZ$probs)),
          maxClustersToTry = K,
          minCell = 3
        )

        .logMessages(res$message,
          logfile = logfile,
          append = TRUE,
          verbose = verbose
        )

        # Reset convergence counter if a split occured
        if (!isTRUE(all.equal(z, res$z))) {
          numIterWithoutImprovement <- 0L
          doCellSplit <- TRUE
        } else {
          doCellSplit <- FALSE
        }

        ## Re-calculate variables
        z <- res$z
        mCPByS <- res$mCPByS
        nGByCP <- res$nGByCP
        nCP <- res$nCP
      }

      ## Calculate complete likelihood
      tempLl <- .cCCalcLL(
        mCPByS = mCPByS,
        nGByCP = nGByCP,
        s = s,
        K = K,
        nS = nS,
        nG = nG,
        alpha = alpha,
        beta = beta
      )

      if ((all(tempLl > ll)) | iter == 1) {
        zBest <- z
        llBest <- tempLl
        numIterWithoutImprovement <- 1L
      } else {
        numIterWithoutImprovement <- numIterWithoutImprovement + 1L
      }

      ll <- c(ll, tempLl)

      .logMessages(date(),
        ".... Completed iteration:",
        iter,
        "| logLik:",
        tempLl,
        logfile = logfile,
        append = TRUE,
        verbose = verbose
      )
      iter <- iter + 1
    }

    names <- list(
      row = rownames(counts),
      column = colnames(counts),
      sample = levels(sampleLabel)
    )

    result <- list(
      z = zBest,
      completeLogLik = ll,
      finalLogLik = llBest,
      K = K,
      sampleLabel = sampleLabel,
      alpha = alpha,
      beta = beta,
      countChecksum = countChecksum,
      names = names
    )

    if (is.null(bestResult) ||
      result$finalLogLik > bestResult$finalLogLik) {
      bestResult <- result
    }

    .logMessages(date(),
      ".. Finished chain",
      i,
      logfile = logfile,
      append = TRUE,
      verbose = verbose
    )
  }

  bestResult <- methods::new("celda_C",
    clusters = list(z = bestResult$z),
    params = list(
      K = as.integer(bestResult$K),
      alpha = bestResult$alpha,
      beta = bestResult$beta,
      countChecksum = bestResult$countChecksum
    ),
    sampleLabel = bestResult$sampleLabel,
    completeLogLik = bestResult$completeLogLik,
    finalLogLik = bestResult$finalLogLik,
    names = bestResult$names
  )

  if (isTRUE(reorder)) {
    bestResult <- .reorderCeldaC(counts = counts, res = bestResult)
  }

  endTime <- Sys.time()
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  .logMessages("Completed Celda_C. Total time:",
    format(difftime(endTime, startTime)),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  return(bestResult)
}


# Gibbs sampling for the celda_C Model
.cCCalcGibbsProbZ <- function(counts,
                              mCPByS,
                              nGByCP,
                              nByC,
                              nCP,
                              z,
                              s,
                              K,
                              nG,
                              nM,
                              alpha,
                              beta,
                              doSample = TRUE) {

  ## Set variables up front outside of loop
  probs <- matrix(NA, ncol = nM, nrow = K)

  ix <- sample(seq(nM))
  for (i in ix) {
    ## Subtract cell counts from current population assignment
    # nGByCP1 <- nGByCP
    # nGByCP1[, z[i]] <- nGByCP[, z[i]] - counts[, i]
    # nGByCP1 <- .colSums(lgamma(nGByCP1 + beta), nrow(nGByCP), ncol(nGByCP))

    # nCP1 <- nCP
    # nCP1[z[i]] <- nCP1[z[i]] - nByC[i]
    # nCP1 <- lgamma(nCP1 + (nG * beta))

    ## Add cell counts to all other populations
    # nGByCP2 <- nGByCP
    # otherIx <- seq(K)[-z[i]]
    # nGByCP2[, otherIx] <- nGByCP2[, otherIx] + counts[, i]
    # nGByCP2 <- .colSums(lgamma(nGByCP2 + beta), nrow(nGByCP), ncol(nGByCP))

    # nCP2 <- nCP
    # nCP2[otherIx] <- nCP2[otherIx] + nByC[i]
    # nCP2 <- lgamma(nCP2 + (nG * beta))


    mCPByS[z[i], s[i]] <- mCPByS[z[i], s[i]] - 1L

    ## Calculate probabilities for each state
    ## when consider a specific cluster fo this cell,
    ##   no need to calculate cells in other cluster
    for (j in seq_len(K)) {
      # otherIx <- seq(K)[-j]
      if (j != z[i]) { # when j is not current population assignment
        ## Theta simplified
        probs[j, i] <- log(mCPByS[j, s[i]] + alpha) +
          # if adding this cell -- Phi Numerator
          sum(lgamma(nGByCP[, j] + counts[, i] + beta)) -
          # if adding this cell -- Phi Denominator
          lgamma(nCP[j] + nByC[i] + nG * beta) -
          # if without this cell -- Phi Numerator
          sum(lgamma(nGByCP[, j] + beta)) +
          # if without this cell -- Phi Denominator
          lgamma(nCP[j] + nG * beta)
        # sum(nGByCP1[otherIx]) + ## Phi Numerator (other cells)
        # nGByCP2[j] - ## Phi Numerator (current cell)
        # sum(nCP1[otherIx]) - ## Phi Denominator (other cells)
        # nCP2[j] - ## Phi Denominator (current cell)
      } else { # when j is current population assignment
        ## Theta simplified
        probs[j, i] <- log(mCPByS[j, s[i]] + alpha) +
          sum(lgamma(nGByCP[, j] + beta)) -
          lgamma(nCP[j] + nG * beta) -
          sum(lgamma(nGByCP[, j] - counts[, i] + beta)) +
          lgamma(nCP[j] - nByC[i] + nG * beta)
      }
    }

    ## Sample next state and add back counts
    prevZ <- z[i]
    if (isTRUE(doSample)) {
      z[i] <- .sampleLl(probs[, i])
    }

    if (prevZ != z[i]) {
      nGByCP[, prevZ] <- nGByCP[, prevZ] - counts[, i]
      nGByCP[, z[i]] <- nGByCP[, z[i]] + counts[, i]

      nCP[prevZ] <- nCP[prevZ] - nByC[i]
      nCP[z[i]] <- nCP[z[i]] + nByC[i]
    }
    mCPByS[z[i], s[i]] <- mCPByS[z[i], s[i]] + 1L
  }

  return(list(
    mCPByS = mCPByS,
    nGByCP = nGByCP,
    nCP = nCP,
    z = z,
    probs = probs
  ))
}


.cCCalcEMProbZ <- function(counts,
                           mCPByS,
                           nGByCP,
                           nByC,
                           nCP,
                           z,
                           s,
                           K,
                           nG,
                           nM,
                           alpha,
                           beta,
                           doSample = TRUE) {

  ## Expectation given current cell population labels
  theta <- fastNormPropLog(mCPByS, alpha)
  phi <- fastNormPropLog(nGByCP, beta)

  ## Maximization to find best label for each cell
  probs <- .countsTimesProbs(counts, phi) + theta[, s]

  if (isTRUE(doSample)) {
    zPrevious <- z
    z <- apply(probs, 2, which.max)

    ## Recalculate counts based on new label
    p <- .cCReDecomposeCounts(counts, s, z, zPrevious, nGByCP, K)
    mCPByS <- p$mCPByS
    nGByCP <- p$nGByCP
    nCP <- p$nCP
  }

  return(list(
    mCPByS = mCPByS,
    nGByCP = nGByCP,
    nCP = nCP,
    z = z,
    probs = probs
  ))
}


# Calculate log-likelihood for celda_C model
.cCCalcLL <- function(mCPByS,
                      nGByCP,
                      s,
                      z,
                      K,
                      nS,
                      nG,
                      alpha,
                      beta) {

  ## Calculate for "Theta" component
  a <- nS * lgamma(K * alpha)
  b <- sum(lgamma(mCPByS + alpha))
  c <- -nS * K * lgamma(alpha)
  d <- -sum(lgamma(colSums(mCPByS + alpha)))

  thetaLl <- a + b + c + d

  ## Calculate for "Phi" component
  a <- K * lgamma(nG * beta)
  b <- sum(lgamma(nGByCP + beta))
  c <- -K * nG * lgamma(beta)
  d <- -sum(lgamma(colSums(nGByCP + beta)))

  phiLl <- a + b + c + d

  final <- thetaLl + phiLl
  return(final)
}


# Takes raw counts matrix and converts it to a series of matrices needed for
# log likelihood calculation
# @param counts Integer matrix. Rows represent features and columns represent
# cells.
# @param s Integer vector. Contains the sample label for each cell (column) in
# the count matrix.
# @param z Numeric vector. Denotes cell population labels.
# @param K Integer. Number of cell populations.
#' @importFrom Matrix colSums
.cCDecomposeCounts <- function(counts, s, z, K) {
  nS <- length(unique(s))
  nG <- nrow(counts)
  nM <- ncol(counts)

  mCPByS <- matrix(as.integer(table(factor(z, levels = seq(K)), s)),
    ncol = nS
  )

  nGByCP <- .colSumByGroup(counts, group = z, K = K)
  nCP <- .colSums(nGByCP, nrow(nGByCP), ncol(nGByCP))
  nByC <- colSums(counts)

  return(list(
    mCPByS = mCPByS,
    nGByCP = nGByCP,
    nCP = nCP,
    nByC = nByC,
    nS = nS,
    nG = nG,
    nM = nM
  ))
}

#' @importFrom Matrix colSums
.cCReDecomposeCounts <- function(counts, s, z, previousZ, nGByCP, K) {
  ## Recalculate counts based on new label
  nGByCP <- .colSumByGroupChange(counts, nGByCP, z, previousZ, K)
  nCP <- colSums(nGByCP)
  nS <- length(unique(s))
  mCPByS <- matrix(as.integer(table(factor(z, levels = seq(K)), s)),
    ncol = nS
  )

  return(list(
    mCPByS = mCPByS,
    nGByCP = nGByCP,
    nCP = nCP
  ))
}


.prepareCountsForDimReductionCeldaC <- function(sce,
    useAssay,
    maxCells,
    minClusterSize,
    normalize,
    scaleFactor,
    transformationFun) {

    counts <- SummarizedExperiment::assay(sce, i = useAssay)
    counts <- .processCounts(counts)

    ## Checking if maxCells and minClusterSize will work
    if (!is.null(maxCells)) {
        if ((maxCells < ncol(counts)) &
                (maxCells / minClusterSize <
                        S4Vectors::metadata(sce)$celda_parameters$K)) {

            stop("Cannot distribute ",
                maxCells,
                " cells among ",
                S4Vectors::metadata(sce)$celda_parameters$K,
                " clusters while maintaining a minumum of ",
                minClusterSize,
                " cells per cluster. Try increasing 'maxCells' or decreasing",
                " 'minClusterSize'.")
        }
    } else {
        maxCells <- ncol(counts)
    }

    ## Select a subset of cells to sample if greater than 'maxCells'
    totalCellsToRemove <- ncol(counts) - maxCells
    zInclude <- rep(TRUE, ncol(counts))

    if (totalCellsToRemove > 0) {
        zTa <- tabulate(SummarizedExperiment::colData(sce)$celda_cell_cluster,
            S4Vectors::metadata(sce)$celda_parameters$K)

        ## Number of cells that can be sampled from each cluster without
        ## going below the minimum threshold
        clusterCellsToSample <- zTa - minClusterSize
        clusterCellsToSample[clusterCellsToSample < 0] <- 0

        ## Number of cells to sample after exluding smaller clusters
        ## Rounding can cause number to be off by a few, so ceiling is
        ## used with a second round of subtraction
        clusterNToSample <- ceiling((clusterCellsToSample /
                sum(clusterCellsToSample)) * totalCellsToRemove)
        diff <- sum(clusterNToSample) - totalCellsToRemove
        clusterNToSample[which.max(clusterNToSample)] <-
            clusterNToSample[which.max(clusterNToSample)] - diff

        ## Perform sampling for each cluster
        for (i in which(clusterNToSample > 0)) {
            zInclude[sample(which(
                SummarizedExperiment::colData(sce)$celda_cell_cluster == i),
                clusterNToSample[i])] <- FALSE
        }
    }

    cellIx <- which(zInclude)
    norm <- t(normalizeCounts(counts[, cellIx],
        normalize = normalize,
        scaleFactor = scaleFactor,
        transformationFun = transformationFun))
    return(list(norm = norm, cellIx = cellIx))
}


.createSCEceldaC <- function(celdaCMod,
    sce,
    xClass,
    useAssay,
    algorithm,
    stopIter,
    maxIter,
    splitOnIter,
    splitOnLast,
    nchains,
    zInitialize,
    zInit,
    logfile,
    verbose) {

    # add metadata
    S4Vectors::metadata(sce)[["celda_parameters"]] <- list(
        model = "celda_C",
        xClass = xClass,
        useAssay = useAssay,
        sampleLevels = celdaCMod@names$sample,
        K = celdaCMod@params$K,
        alpha = celdaCMod@params$alpha,
        beta = celdaCMod@params$beta,
        algorithm = algorithm,
        stopIter = stopIter,
        maxIter = maxIter,
        splitOnIter = splitOnIter,
        splitOnLast = splitOnLast,
        seed = celdaCMod@params$seed,
        nchains = nchains,
        zInitialize = zInitialize,
        countChecksum = celdaCMod@params$countChecksum,
        zInit = zInit,
        logfile = logfile,
        verbose = verbose,
        completeLogLik = celdaCMod@completeLogLik,
        finalLogLik = celdaCMod@finalLogLik,
        cellClusterLevels = sort(unique(celdaClusters(celdaCMod)$z)))

    SummarizedExperiment::rowData(sce)["rownames"] <- celdaCMod@names$row
    SummarizedExperiment::colData(sce)["colnames"] <-
        celdaCMod@names$column
    SummarizedExperiment::colData(sce)["celda_sample_label"] <-
        as.factor(celdaCMod@sampleLabel)
    SummarizedExperiment::colData(sce)["celda_cell_cluster"] <-
        as.factor(celdaClusters(celdaCMod)$z)

    return(sce)
}

# #' @name countsTimesProbs
# #' @title Counts matrix times cell population probabilies
# #' @param counts feature-by-cell matrix
# #' @param phi feature-by-probability matrix
#' @importMethodsFrom Matrix %*%
.countsTimesProbs <- function(counts, phi) {
  ## Maximization to find best label for each cell
  if (inherits(counts, "matrix") & is.integer(counts)) {
    probs <- eigenMatMultInt(phi, counts)
  } else if (inherits(counts, "matrix") & is.numeric(counts)) {
    probs <- eigenMatMultNumeric(phi, counts)
  } else {
    probs <- (t(phi) %*% counts)
  }
  return(probs)
}


================================================
FILE: R/celda_CG.R
================================================
#' @title Cell and feature clustering with Celda
#' @description Clusters the rows and columns of a count matrix containing
#'  single-cell data into L modules and K subpopulations, respectively. The
#'  \code{useAssay} \link{assay} slot in
#'  \code{altExpName} \link{altExp} slot will be used if
#'  it exists. Otherwise, the \code{useAssay}
#'  \link{assay} slot in \code{x} will be used if
#'  \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @param x A \linkS4class{SingleCellExperiment}
#'  with the matrix located in the assay slot under \code{useAssay}.
#'  Rows represent features and columns represent cells. Alternatively,
#'  any matrix-like object that can be coerced to a sparse matrix of class
#'  "dgCMatrix" can be directly used as input. The matrix will automatically be
#'  converted to a \linkS4class{SingleCellExperiment} object.
#' @param useAssay A string specifying the name of the
#'  \link{assay} slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param sampleLabel Vector or factor. Denotes the sample label for each cell
#'  (column) in the count matrix.
#' @param K Integer. Number of cell populations.
#' @param L Integer. Number of feature modules.
#' @param alpha Numeric. Concentration parameter for Theta. Adds a pseudocount
#'  to each cell population in each sample. Default 1.
#' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to
#'  each feature module in each cell population. Default 1.
#' @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount to
#'  each feature in each module. Default 1.
#' @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount to
#'  the number of features in each module. Default 1.
#' @param algorithm String. Algorithm to use for clustering cell subpopulations.
#'  One of 'EM' or 'Gibbs'. The EM algorithm for cell clustering is faster,
#'  especially for larger numbers of cells. However, more chains may be required
#'  to ensure a good solution is found. Default 'EM'.
#' @param stopIter Integer. Number of iterations without improvement in the log
#'  likelihood to stop inference. Default 10.
#' @param maxIter Integer. Maximum number of iterations of Gibbs sampling to
#'  perform. Default 200.
#' @param splitOnIter Integer. On every \code{splitOnIter} iteration,
#'  a heuristic
#'  will be applied to determine if a cell population or feature module should
#'  be reassigned and another cell population or feature module should be split
#'  into two clusters. To disable splitting, set to -1. Default 10.
#' @param splitOnLast Integer. After \code{stopIter} iterations have been
#'  performed without improvement, a heuristic will be applied to determine if
#'  a cell population or feature module should be reassigned and another cell
#'  population or feature module should be split into two clusters. If a split
#'  occurs, then 'stopIter' will be reset. Default TRUE.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#'  a default value of 12345 is used. If NULL, no calls to
#'  \link[withr]{with_seed} are made.
#' @param nchains Integer. Number of random cluster initializations. Default 3.
#' @param zInitialize Chararacter. One of 'random', 'split', or 'predefined'.
#'  With 'random', cells are randomly assigned to a populations. With 'split',
#'  cells will be split into sqrt(K) populations and then each population will
#'  be subsequently split into another sqrt(K) populations. With 'predefined',
#'  values in \code{zInit} will be used to initialize \code{z}. Default 'split'.
#' @param yInitialize Character. One of 'random', 'split', or 'predefined'.
#'  With 'random', features are randomly assigned to a modules. With 'split',
#'  features will be split into sqrt(L) modules and then each module will be
#'  subsequently split into another sqrt(L) modules. With 'predefined', values
#'  in \code{yInit} will be used to initialize \code{y}. Default 'split'.
#' @param zInit Integer vector. Sets initial starting values of z. 'zInit'
#'  is only used when `zInitialize = 'predfined'`. Default NULL.
#' @param yInit Integer vector. Sets initial starting values of y.
#'  'yInit' is only be used when `yInitialize = "predefined"`. Default NULL.
#' @param countChecksum Character. An MD5 checksum for the counts matrix.
#'  Default NULL.
#' @param logfile Character. Messages will be redirected to a file named
#'  `logfile`. If NULL, messages will be printed to stdout.  Default NULL.
#' @param verbose Logical. Whether to print log messages. Default TRUE.
#' @return A \linkS4class{SingleCellExperiment} object. Function
#'  parameter settings are stored in \link{metadata}
#'  \code{"celda_parameters"} in \link{altExp} slot.
#'  In \link{altExp} slot,
#'  columns \code{celda_sample_label} and \code{celda_cell_cluster} in
#'  \link{colData} contain sample labels and celda cell
#'  population clusters. Column \code{celda_feature_module} in
#'  \link{rowData} contains feature modules.
#' @seealso \link{celda_G} for feature clustering and \link{celda_C} for
#'  clustering cells. \link{celdaGridSearch} can be used to run multiple
#'  values of K/L and multiple chains in parallel.
#' @import Rcpp RcppEigen
#' @export
setGeneric("celda_CG",
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        sampleLabel = NULL,
        K,
        L,
        alpha = 1,
        beta = 1,
        delta = 1,
        gamma = 1,
        algorithm = c("EM", "Gibbs"),
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        zInitialize = c("split", "random", "predefined"),
        yInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        zInit = NULL,
        yInit = NULL,
        logfile = NULL,
        verbose = TRUE) {
    standardGeneric("celda_CG")})


#' @rdname celda_CG
#' @export
setMethod("celda_CG",
    signature(x = "SingleCellExperiment"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        sampleLabel = NULL,
        K,
        L,
        alpha = 1,
        beta = 1,
        delta = 1,
        gamma = 1,
        algorithm = c("EM", "Gibbs"),
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        zInitialize = c("split", "random", "predefined"),
        yInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        zInit = NULL,
        yInit = NULL,
        logfile = NULL,
        verbose = TRUE) {

        xClass <- "SingleCellExperiment"

        if (!altExpName %in% SingleCellExperiment::altExpNames(x)) {
            stop(altExpName, " not in 'altExpNames(x)'. Run ",
                "selectFeatures(x) first!")
        }

        altExp <- SingleCellExperiment::altExp(x, altExpName)

        if (!useAssay %in% SummarizedExperiment::assayNames(altExp)) {
            stop(useAssay, " not in assayNames(altExp(x, altExpName))")
        }

        counts <- SummarizedExperiment::assay(altExp, i = useAssay)

        altExp <- .celdaCGWithSeed(counts = counts,
            xClass = xClass,
            useAssay = useAssay,
            sce = altExp,
            sampleLabel = sampleLabel,
            K = K,
            L = L,
            alpha = alpha,
            beta = beta,
            delta = delta,
            gamma = gamma,
            algorithm = match.arg(algorithm),
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            seed = seed,
            nchains = nchains,
            zInitialize = match.arg(zInitialize),
            yInitialize = match.arg(yInitialize),
            countChecksum = countChecksum,
            zInit = zInit,
            yInit = yInit,
            logfile = logfile,
            verbose = verbose)
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    }
)


#' @rdname celda_CG
#' @examples
#' data(celdaCGSim)
#' sce <- celda_CG(celdaCGSim$counts,
#'     K = celdaCGSim$K,
#'     L = celdaCGSim$L,
#'     sampleLabel = celdaCGSim$sampleLabel,
#'     nchains = 1)
#' @export
setMethod("celda_CG",
    signature(x = "ANY"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        sampleLabel = NULL,
        K,
        L,
        alpha = 1,
        beta = 1,
        delta = 1,
        gamma = 1,
        algorithm = c("EM", "Gibbs"),
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        zInitialize = c("split", "random", "predefined"),
        yInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        zInit = NULL,
        yInit = NULL,
        logfile = NULL,
        verbose = TRUE) {

        # Convert to sparse matrix
        x <- methods::as(x, "CsparseMatrix")

        ls <- list()
        ls[[useAssay]] <- x
        sce <- SingleCellExperiment::SingleCellExperiment(assays = ls)
        SingleCellExperiment::altExp(sce, altExpName) <- sce
        xClass <- "matrix"

        altExp <- .celdaCGWithSeed(counts = x,
            xClass = xClass,
            useAssay = useAssay,
            sce = SingleCellExperiment::altExp(sce, altExpName),
            sampleLabel = sampleLabel,
            K = K,
            L = L,
            alpha = alpha,
            beta = beta,
            delta = delta,
            gamma = gamma,
            algorithm = match.arg(algorithm),
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            seed = seed,
            nchains = nchains,
            zInitialize = match.arg(zInitialize),
            yInitialize = match.arg(yInitialize),
            countChecksum = countChecksum,
            zInit = zInit,
            yInit = yInit,
            logfile = logfile,
            verbose = verbose)
        SingleCellExperiment::altExp(sce, altExpName) <- altExp
        return(sce)
    }
)


.celdaCGWithSeed <- function(counts,
    xClass,
    useAssay,
    sce,
    sampleLabel,
    K,
    L,
    alpha,
    beta,
    delta,
    gamma,
    algorithm,
    stopIter,
    maxIter,
    splitOnIter,
    splitOnLast,
    seed,
    nchains,
    zInitialize,
    yInitialize,
    countChecksum,
    zInit,
    yInit,
    logfile,
    verbose) {

    .validateCounts(counts)

    if (is.null(seed)) {
        celdaCGMod <- .celda_CG(
            counts = counts,
            sampleLabel = sampleLabel,
            K = K,
            L = L,
            alpha = alpha,
            beta = beta,
            delta = delta,
            gamma = gamma,
            algorithm = algorithm,
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            nchains = nchains,
            zInitialize = zInitialize,
            yInitialize = yInitialize,
            countChecksum = countChecksum,
            zInit = zInit,
            yInit = yInit,
            logfile = logfile,
            verbose = verbose,
            reorder = TRUE
        )
    } else {
        with_seed(
            seed,
            celdaCGMod <- .celda_CG(
                counts = counts,
                sampleLabel = sampleLabel,
                K = K,
                L = L,
                alpha = alpha,
                beta = beta,
                delta = delta,
                gamma = gamma,
                algorithm = algorithm,
                stopIter = stopIter,
                maxIter = maxIter,
                splitOnIter = splitOnIter,
                splitOnLast = splitOnLast,
                nchains = nchains,
                zInitialize = zInitialize,
                yInitialize = yInitialize,
                countChecksum = countChecksum,
                zInit = zInit,
                yInit = yInit,
                logfile = logfile,
                verbose = verbose,
                reorder = TRUE
            )
        )
    }

    sce <- .createSCEceldaCG(celdaCGMod = celdaCGMod,
        sce = sce,
        xClass = xClass,
        useAssay = useAssay,
        algorithm = algorithm,
        stopIter = stopIter,
        maxIter = maxIter,
        splitOnIter = splitOnIter,
        splitOnLast = splitOnLast,
        nchains = nchains,
        zInitialize = zInitialize,
        yInitialize = yInitialize,
        zInit = zInit,
        yInit = yInit,
        logfile = logfile,
        verbose = verbose)
    return(sce)
}


.celda_CG <- function(counts,
                      sampleLabel = NULL,
                      K,
                      L,
                      alpha = 1,
                      beta = 1,
                      delta = 1,
                      gamma = 1,
                      algorithm = c("EM", "Gibbs"),
                      stopIter = 10,
                      maxIter = 200,
                      splitOnIter = 10,
                      splitOnLast = TRUE,
                      nchains = 3,
                      zInitialize = c("split", "random", "predefined"),
                      yInitialize = c("split", "random", "predefined"),
                      countChecksum = NULL,
                      zInit = NULL,
                      yInit = NULL,
                      logfile = NULL,
                      verbose = TRUE,
                      reorder = TRUE) {
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = FALSE,
    verbose = verbose
  )

  .logMessages("Starting Celda_CG: Clustering cells and genes.",
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  startTime <- Sys.time()

  counts <- .processCounts(counts)
  if (is.null(countChecksum)) {
    countChecksum <- .createCountChecksum(counts)
  }

  sampleLabel <- .processSampleLabels(sampleLabel, ncol(counts))
  s <- as.integer(sampleLabel)

  algorithm <- match.arg(algorithm)
  algorithmFun <- ifelse(algorithm == "Gibbs",
    ".cCCalcGibbsProbZ",
    ".cCCalcEMProbZ"
  )
  zInitialize <- match.arg(zInitialize)
  yInitialize <- match.arg(yInitialize)

  allChains <- seq(nchains)

  # Pre-compute lgamma values
  lggamma <- lgamma(seq(0, nrow(counts) + L) + gamma)
  lgdelta <- c(NA, lgamma((seq(nrow(counts) + L) * delta)))

  bestResult <- NULL
  for (i in allChains) {
    ## Initialize cluster labels
    .logMessages(date(),
      ".. Initializing 'z' in chain",
      i,
      "with",
      paste0("'", zInitialize, "' "),
      logfile = logfile,
      append = TRUE,
      verbose = verbose
    )

    .logMessages(date(),
      ".. Initializing 'y' in chain",
      i,
      "with",
      paste0("'", yInitialize, "' "),
      logfile = logfile,
      append = TRUE,
      verbose = verbose
    )

    if (zInitialize == "predefined") {
      if (is.null(zInit)) {
        stop("'zInit' needs to specified when initilize.z == 'given'.")
      }
      z <- .initializeCluster(K,
        ncol(counts),
        initial = zInit,
        fixed = NULL
      )
    } else if (zInitialize == "split") {
      z <- .initializeSplitZ(
        counts,
        K = K,
        alpha = alpha,
        beta = beta
      )
    } else {
      z <- .initializeCluster(K,
        ncol(counts),
        initial = NULL,
        fixed = NULL
      )
    }

    if (yInitialize == "predefined") {
      if (is.null(yInit)) {
        stop("'yInit' needs to specified when initilize.y == 'given'.")
      }
      y <- .initializeCluster(L,
        nrow(counts),
        initial = yInit,
        fixed = NULL
      )
    } else if (yInitialize == "split") {
      y <- .initializeSplitY(counts,
        L,
        beta = beta,
        delta = delta,
        gamma = gamma
      )
    } else {
      y <- .initializeCluster(L,
        nrow(counts),
        initial = NULL,
        fixed = NULL
      )
    }

    zBest <- z
    yBest <- y

    ## Calculate counts one time up front
    p <- .cCGDecomposeCounts(counts, s, z, y, K, L)
    mCPByS <- p$mCPByS
    nTSByC <- p$nTSByC
    nTSByCP <- p$nTSByCP
    nCP <- p$nCP
    nByG <- p$nByG
    nByC <- p$nByC
    nByTS <- p$nByTS
    nGByTS <- p$nGByTS
    nGByCP <- p$nGByCP
    nM <- p$nM
    nG <- p$nG
    nS <- p$nS
    rm(p)

    ll <- .cCGCalcLL(
      K = K,
      L = L,
      mCPByS = mCPByS,
      nTSByCP = nTSByCP,
      nByG = nByG,
      nByTS = nByTS,
      nGByTS = nGByTS,
      nS = nS,
      nG = nG,
      alpha = alpha,
      beta = beta,
      delta = delta,
      gamma = gamma
    )

    iter <- 1L
    numIterWithoutImprovement <- 0L
    doCellSplit <- TRUE
    doGeneSplit <- TRUE
    while (iter <= maxIter & numIterWithoutImprovement <= stopIter) {
      ## Gibbs sampling for each gene
      lgbeta <- lgamma(seq(0, max(nCP)) + beta)
      nextY <- .cGCalcGibbsProbY(
        counts = nGByCP,
        nTSByC = nTSByCP,
        nByTS = nByTS,
        nGByTS = nGByTS,
        nByG = nByG,
        y = y,
        L = L,
        nG = nG,
        beta = beta,
        delta = delta,
        gamma = gamma,
        lgbeta = lgbeta,
        lggamma = lggamma,
        lgdelta = lgdelta
      )
      nTSByCP <- nextY$nTSByC
      nGByTS <- nextY$nGByTS
      nByTS <- nextY$nByTS
      nTSByC <- .rowSumByGroupChange(counts, nTSByC, nextY$y, y, L)
      y <- nextY$y

      ## Gibbs or EM sampling for each cell
      nextZ <- do.call(algorithmFun, list(
        counts = nTSByC,
        mCPByS = mCPByS,
        nGByCP = nTSByCP,
        nCP = nCP,
        nByC = nByC,
        z = z,
        s = s,
        K = K,
        nG = L,
        nM = nM,
        alpha = alpha,
        beta = beta
      ))
      mCPByS <- nextZ$mCPByS
      nTSByCP <- nextZ$nGByCP
      nCP <- nextZ$nCP
      nGByCP <- .colSumByGroupChange(counts, nGByCP, nextZ$z, z, K)
      z <- nextZ$z

      ## Perform split on i-th iteration defined by splitOnIter
      tempLl <- .cCGCalcLL(
        K = K,
        L = L,
        mCPByS = mCPByS,
        nTSByCP = nTSByCP,
        nByG = nByG,
        nByTS = nByTS,
        nGByTS = nGByTS,
        nS = nS,
        nG = nG,
        alpha = alpha,
        beta = beta,
        delta = delta,
        gamma = gamma
      )

      if (L > 2 & iter != maxIter &
        (((numIterWithoutImprovement == stopIter &
          !all(tempLl >= ll)) & isTRUE(splitOnLast)) |
          (splitOnIter > 0 & iter %% splitOnIter == 0 &
            isTRUE(doGeneSplit)))) {
        .logMessages(date(),
          " .... Determining if any gene clusters should be split.",
          logfile = logfile,
          append = TRUE,
          sep = "",
          verbose = verbose
        )
        res <- .cCGSplitY(counts,
          y,
          mCPByS,
          nGByCP,
          nTSByC,
          nTSByCP,
          nByG,
          nByTS,
          nGByTS,
          nCP,
          s,
          z,
          K,
          L,
          nS,
          nG,
          alpha,
          beta,
          delta,
          gamma,
          yProb = t(nextY$probs),
          maxClustersToTry = max(L / 2, 10),
          minCell = 3
        )
        .logMessages(res$message,
          logfile = logfile,
          append = TRUE,
          verbose = verbose
        )

        # Reset convergence counter if a split occured
        if (!isTRUE(all.equal(y, res$y))) {
          numIterWithoutImprovement <- 1L
          doGeneSplit <- TRUE
        } else {
          doGeneSplit <- FALSE
        }

        ## Re-calculate variables
        y <- res$y
        nTSByCP <- res$nTSByCP
        nByTS <- res$nByTS
        nGByTS <- res$nGByTS
        nTSByC <- .rowSumByGroup(counts, group = y, L = L)
      }

      if (K > 2 & iter != maxIter &
        (((numIterWithoutImprovement == stopIter &
          !all(tempLl > ll)) & isTRUE(splitOnLast)) |
          (splitOnIter > 0 & iter %% splitOnIter == 0 &
            isTRUE(doCellSplit)))) {
        .logMessages(date(),
          " .... Determining if any cell clusters should be split.",
          logfile = logfile,
          append = TRUE,
          sep = "",
          verbose = verbose
        )
        res <- .cCGSplitZ(counts,
          mCPByS,
          nTSByC,
          nTSByCP,
          nByG,
          nByTS,
          nGByTS,
          nCP,
          s,
          z,
          K,
          L,
          nS,
          nG,
          alpha,
          beta,
          delta,
          gamma,
          zProb = t(nextZ$probs),
          maxClustersToTry = K,
          minCell = 3
        )
        .logMessages(res$message,
          logfile = logfile,
          append = TRUE,
          verbose = verbose
        )

        # Reset convergence counter if a split occured
        if (!isTRUE(all.equal(z, res$z))) {
          numIterWithoutImprovement <- 0L
          doCellSplit <- TRUE
        } else {
          doCellSplit <- FALSE
        }

        ## Re-calculate variables
        z <- res$z
        mCPByS <- res$mCPByS
        nTSByCP <- res$nTSByCP
        nCP <- res$nCP
        nGByCP <- .colSumByGroup(counts, group = z, K = K)
      }

      ## Calculate complete likelihood
      tempLl <- .cCGCalcLL(
        K = K,
        L = L,
        mCPByS = mCPByS,
        nTSByCP = nTSByCP,
        nByG = nByG,
        nByTS = nByTS,
        nGByTS = nGByTS,
        nS = nS,
        nG = nG,
        alpha = alpha,
        beta = beta,
        delta = delta,
        gamma = gamma
      )
      if ((all(tempLl > ll)) | iter == 1) {
        zBest <- z
        yBest <- y
        llBest <- tempLl
        numIterWithoutImprovement <- 1L
      } else {
        numIterWithoutImprovement <- numIterWithoutImprovement + 1L
      }
      ll <- c(ll, tempLl)

      .logMessages(date(),
        " .... Completed iteration: ",
        iter,
        " | logLik: ",
        tempLl,
        logfile = logfile,
        append = TRUE,
        sep = "",
        verbose = verbose
      )
      iter <- iter + 1L
    }

    names <- list(
      row = rownames(counts),
      column = colnames(counts),
      sample = levels(sampleLabel)
    )

    result <- list(
      z = zBest,
      y = yBest,
      completeLogLik = ll,
      finalLogLik = llBest,
      K = K,
      L = L,
      alpha = alpha,
      beta = beta,
      delta = delta,
      gamma = gamma,
      sampleLabel = sampleLabel,
      names = names,
      countChecksum = countChecksum
    )

    class(result) <- "celda_CG"

    if (is.null(bestResult) ||
      result$finalLogLik > bestResult$finalLogLik) {
      bestResult <- result
    }

    .logMessages(date(),
      ".. Finished chain",
      i,
      logfile = logfile,
      append = TRUE,
      verbose = verbose
    )
  }

  ## Peform reordering on final Z and Y assigments:
  bestResult <- methods::new("celda_CG",
    clusters = list(z = zBest, y = yBest),
    params = list(
      K = as.integer(K),
      L = as.integer(L),
      alpha = alpha,
      beta = beta,
      delta = delta,
      gamma = gamma,
      countChecksum = countChecksum
    ),
    completeLogLik = ll,
    finalLogLik = llBest,
    sampleLabel = sampleLabel,
    names = names
  )
  if (isTRUE(reorder)) {
    bestResult <- .reorderCeldaCG(counts = counts, res = bestResult)
  }

  endTime <- Sys.time()
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )
  .logMessages("Completed Celda_CG. Total time:",
    format(difftime(endTime, startTime)),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  return(bestResult)
}


# Calculate the loglikelihood for the celda_CG model
.cCGCalcLL <- function(K,
                       L,
                       mCPByS,
                       nTSByCP,
                       nByG,
                       nByTS,
                       nGByTS,
                       nS,
                       nG,
                       alpha,
                       beta,
                       delta,
                       gamma) {
  nG <- sum(nGByTS)

  ## Calculate for "Theta" component
  a <- nS * lgamma(K * alpha)
  b <- sum(lgamma(mCPByS + alpha))
  c <- -nS * K * lgamma(alpha)
  d <- -sum(lgamma(colSums(mCPByS + alpha)))

  thetaLl <- a + b + c + d

  ## Calculate for "Phi" component
  a <- K * lgamma(L * beta)
  b <- sum(lgamma(nTSByCP + beta))
  c <- -K * L * lgamma(beta)
  d <- -sum(lgamma(colSums(nTSByCP + beta)))

  phiLl <- a + b + c + d

  ## Calculate for "Psi" component
  a <- sum(lgamma(nGByTS * delta))
  b <- sum(lgamma(nByG + delta))
  c <- -nG * lgamma(delta)
  d <- -sum(lgamma(nByTS + (nGByTS * delta)))

  psiLl <- a + b + c + d

  ## Calculate for "Eta" side
  a <- lgamma(L * gamma)
  b <- sum(lgamma(nGByTS + gamma))
  c <- -L * lgamma(gamma)
  d <- -lgamma(sum(nGByTS + gamma))

  etaLl <- a + b + c + d

  final <- thetaLl + phiLl + psiLl + etaLl
  return(final)
}


# Takes raw counts matrix and converts it to a series of matrices needed for
# log likelihood calculation
# @param counts Integer matrix. Rows represent features and columns represent
# cells.
# @param s Integer vector. Contains the sample label for each cell (column) in
# the count matrix.
# @param z Numeric vector. Denotes cell population labels.
# @param y Numeric vector. Denotes feature module labels.
# @param K Integer. Number of cell populations.
# @param L Integer. Number of feature modules.
#' @importFrom Matrix colSums rowSums
.cCGDecomposeCounts <- function(counts, s, z, y, K, L) {
  nS <- length(unique(s))
  mCPByS <- matrix(as.integer(table(factor(z, levels = seq(K)), s)),
    ncol = nS
  )

  nTSByC <- .rowSumByGroup(counts, group = y, L = L)
  nGByCP <- .colSumByGroup(counts, group = z, K = K)
  nTSByCP <- .colSumByGroup(nTSByC, group = z, K = K)

  nByC <- colSums(counts)
  nByG <- rowSums(counts)
  nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L)
  nCP <- .colSums(nTSByCP, nrow(nTSByCP), ncol(nTSByCP))
  nGByTS <- tabulate(y, L) + 1 ## Add pseudogene to each module
  nG <- nrow(counts)
  nM <- ncol(counts)

  return(list(
    mCPByS = mCPByS,
    nTSByC = nTSByC,
    nTSByCP = nTSByCP,
    nCP = nCP,
    nByG = nByG,
    nByC = nByC,
    nByTS = nByTS,
    nGByTS = nGByTS,
    nGByCP = nGByCP,
    nM = nM,
    nG = nG,
    nS = nS
  ))
}


.prepareCountsForDimReductionCeldaCG <- function(sce,
    useAssay,
    maxCells,
    minClusterSize,
    modules,
    normalize,
    scaleFactor,
    transformationFun) {

    counts <- SummarizedExperiment::assay(sce, i = useAssay)
    counts <- .processCounts(counts)

    K <- S4Vectors::metadata(sce)$celda_parameters$K
    z <- as.integer(SummarizedExperiment::colData(sce)$celda_cell_cluster)
    y <- as.integer(SummarizedExperiment::rowData(sce)$celda_feature_module)
    L <- S4Vectors::metadata(sce)$celda_parameters$L
    alpha <- S4Vectors::metadata(sce)$celda_parameters$alpha
    beta <- S4Vectors::metadata(sce)$celda_parameters$beta

    delta <- S4Vectors::metadata(sce)$celda_parameters$delta
    gamma <- S4Vectors::metadata(sce)$celda_parameters$gamma
    sampleLabel <-
        SummarizedExperiment::colData(sce)$celda_sample_label
    cNames <- colnames(sce)
    rNames <- rownames(sce)
    sNames <- S4Vectors::metadata(sce)$celda_parameters$sampleLevels

    ## Checking if maxCells and minClusterSize will work
    if (!is.null(maxCells)) {
        if ((maxCells < ncol(counts)) &
                (maxCells / minClusterSize < K)) {
            stop("Cannot distribute ",
                maxCells,
                " cells among ",
                K,
                " clusters while maintaining a minumum of ",
                minClusterSize,
                " cells per cluster. Try increasing 'maxCells' or",
                " decreasing 'minClusterSize'.")
        }
    } else {
        maxCells <- ncol(counts)
    }

    fm <- .factorizeMatrixCG(
        counts = counts,
        K = K,
        z = z,
        y = y,
        L = L,
        alpha = alpha,
        beta = beta,
        delta = delta,
        gamma = gamma,
        sampleLabel = sampleLabel,
        cNames = cNames,
        rNames = rNames,
        sNames = sNames,
        type = "counts")
    modulesToUse <- seq(nrow(fm$counts$cell))
    if (!is.null(modules)) {
        if (!all(modules %in% modulesToUse)) {
            stop("'modules' must be a vector of numbers between 1 and ",
                modulesToUse,
                ".")
        }
        modulesToUse <- modules
    }

    ## Select a subset of cells to sample if greater than 'maxCells'
    totalCellsToRemove <- ncol(counts) - maxCells
    zInclude <- rep(TRUE, ncol(counts))

    if (totalCellsToRemove > 0) {
        zTa <- tabulate(z, K)

        ## Number of cells that can be sampled from each cluster without
        ## going below the minimum threshold
        clusterCellsToSample <- zTa - minClusterSize
        clusterCellsToSample[clusterCellsToSample < 0] <- 0

        ## Number of cells to sample after exluding smaller clusters
        ## Rounding can cause number to be off by a few, so ceiling is used
        ## with a second round of subtraction
        clusterNToSample <- ceiling((clusterCellsToSample /
                sum(clusterCellsToSample)) * totalCellsToRemove)
        diff <- sum(clusterNToSample) - totalCellsToRemove
        clusterNToSample[which.max(clusterNToSample)] <-
            clusterNToSample[which.max(clusterNToSample)] - diff

        ## Perform sampling for each cluster
        for (i in which(clusterNToSample > 0)) {
            zInclude[sample(which(z == i), clusterNToSample[i])] <- FALSE
        }
    }
    cellIx <- which(zInclude)

    norm <- t(normalizeCounts(fm$counts$cell[modulesToUse, cellIx],
        normalize = normalize,
        scaleFactor = scaleFactor,
        transformationFun = transformationFun))
    return(list(norm = norm, cellIx = cellIx))
}


.createSCEceldaCG <- function(celdaCGMod,
    sce,
    xClass,
    useAssay,
    algorithm,
    stopIter,
    maxIter,
    splitOnIter,
    splitOnLast,
    nchains,
    zInitialize,
    yInitialize,
    zInit,
    yInit,
    logfile,
    verbose) {

    # add metadata
    S4Vectors::metadata(sce)[["celda_parameters"]] <- list(
        model = "celda_CG",
        xClass = xClass,
        useAssay = useAssay,
        sampleLevels = celdaCGMod@names$sample,
        K = celdaCGMod@params$K,
        L = celdaCGMod@params$L,
        alpha = celdaCGMod@params$alpha,
        beta = celdaCGMod@params$beta,
        delta = celdaCGMod@params$delta,
        gamma = celdaCGMod@params$gamma,
        algorithm = algorithm,
        stopIter = stopIter,
        maxIter = maxIter,
        splitOnIter = splitOnIter,
        splitOnLast = splitOnLast,
        seed = celdaCGMod@params$seed,
        nchains = nchains,
        zInitialize = zInitialize,
        yInitialize = yInitialize,
        countChecksum = celdaCGMod@params$countChecksum,
        zInit = zInit,
        yInit = yInit,
        logfile = logfile,
        verbose = verbose,
        completeLogLik = celdaCGMod@completeLogLik,
        finalLogLik = celdaCGMod@finalLogLik,
        cellClusterLevels = sort(unique(celdaClusters(celdaCGMod)$z)),
        featureModuleLevels = sort(unique(celdaClusters(celdaCGMod)$y)))

    SummarizedExperiment::rowData(sce)["rownames"] <- celdaCGMod@names$row
    SummarizedExperiment::colData(sce)["colnames"] <-
        celdaCGMod@names$column
    SummarizedExperiment::colData(sce)["celda_sample_label"] <-
        as.factor(celdaCGMod@sampleLabel)
    SummarizedExperiment::colData(sce)["celda_cell_cluster"] <-
        as.factor(celdaClusters(celdaCGMod)$z)
    SummarizedExperiment::rowData(sce)["celda_feature_module"] <-
        as.factor(celdaClusters(celdaCGMod)$y)

    return(sce)
}


================================================
FILE: R/celda_G.R
================================================
#' @title Feature clustering with Celda
#' @description Clusters the rows of a count matrix containing single-cell data
#'  into L modules. The
#'  \code{useAssay} \link{assay} slot in
#'  \code{altExpName} \link{altExp} slot will be used if
#'  it exists. Otherwise, the \code{useAssay}
#'  \link{assay} slot in \code{x} will be used if
#'  \code{x} is a \linkS4class{SingleCellExperiment} object.
#' @param x A \linkS4class{SingleCellExperiment}
#'  with the matrix located in the assay slot under \code{useAssay}.
#'  Rows represent features and columns represent cells. Alternatively,
#'  any matrix-like object that can be coerced to a sparse matrix of class
#'  "dgCMatrix" can be directly used as input. The matrix will automatically be
#'  converted to a \linkS4class{SingleCellExperiment} object.
#' @param useAssay A string specifying the name of the
#'  \link{assay} slot to use. Default "counts".
#' @param altExpName The name for the \link{altExp} slot
#'  to use. Default "featureSubset".
#' @param L Integer. Number of feature modules.
#' @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to
#'  each feature module in each cell. Default 1.
#' @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount to
#'  each feature in each module. Default 1.
#' @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount to
#'  the number of features in each module. Default 1.
#' @param stopIter Integer. Number of iterations without improvement in the
#'  log likelihood to stop inference. Default 10.
#' @param maxIter Integer. Maximum number of iterations of Gibbs sampling to
#'  perform. Default 200.
#' @param splitOnIter Integer. On every `splitOnIter` iteration, a heuristic
#'  will be applied to determine if a feature module should be reassigned and
#'  another feature module should be split into two clusters. To disable
#'  splitting, set to -1. Default 10.
#' @param splitOnLast Integer. After `stopIter` iterations have been
#'  performed without improvement, a heuristic will be applied to determine if
#'  a cell population should be reassigned and another cell population should be
#'  split into two clusters. If a split occurs, then `stopIter` will be reset.
#'  Default TRUE.
#' @param seed Integer. Passed to \link[withr]{with_seed}. For reproducibility,
#'  a default value of 12345 is used. If NULL, no calls to
#'  \link[withr]{with_seed} are made.
#' @param nchains Integer. Number of random cluster initializations. Default 3.
#' @param yInitialize Chararacter. One of 'random', 'split', or 'predefined'.
#'  With 'random', features are randomly assigned to a modules. With 'split',
#'  features will be split into sqrt(L) modules and then each module will be
#'  subsequently split into another sqrt(L) modules. With 'predefined', values
#'  in `yInit` will be used to initialize `y`. Default 'split'.
#' @param yInit Integer vector. Sets initial starting values of y.
#'  `yInit` can only be used when `yInitialize = 'predefined'`. Default NULL.
#' @param countChecksum Character. An MD5 checksum for the `counts` matrix.
#'  Default NULL.
#' @param logfile Character. Messages will be redirected to a file named
#'  \code{logfile}. If NULL, messages will be printed to stdout. Default NULL.
#' @param verbose Logical. Whether to print log messages. Default TRUE.
#' @return A \linkS4class{SingleCellExperiment} object. Function
#'  parameter settings are stored in the \link{metadata}
#'  \code{"celda_parameters"} slot. Column \code{celda_feature_module} in
#'  \link{rowData} contains feature modules.
#' @seealso \link{celda_C} for cell clustering and \link{celda_CG} for
#'  simultaneous clustering of features and cells. \link{celdaGridSearch} can
#'  be used to run multiple values of L and multiple chains in parallel.
#' @examples
#' data(celdaGSim)
#' sce <- celda_G(celdaGSim$counts, L = celdaGSim$L, nchains = 1)
#' @export
setGeneric("celda_G",
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        L,
        beta = 1,
        delta = 1,
        gamma = 1,
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        yInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        yInit = NULL,
        logfile = NULL,
        verbose = TRUE) {
    standardGeneric("celda_G")})


#' @rdname celda_G
#' @export
setMethod("celda_G",
    signature(x = "SingleCellExperiment"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        L,
        beta = 1,
        delta = 1,
        gamma = 1,
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        yInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        yInit = NULL,
        logfile = NULL,
        verbose = TRUE) {

        xClass <- "SingleCellExperiment"

        if (!altExpName %in% SingleCellExperiment::altExpNames(x)) {
            stop(altExpName, " not in 'altExpNames(x)'. Run ",
                "selectFeatures(x) first!")
        }

        altExp <- SingleCellExperiment::altExp(x, altExpName)

        if (!useAssay %in% SummarizedExperiment::assayNames(altExp)) {
            stop(useAssay, " not in assayNames(altExp(x, altExpName))")
        }

        counts <- SummarizedExperiment::assay(altExp, i = useAssay)

        altExp <- .celdaGWithSeed(counts = counts,
            xClass = xClass,
            useAssay = useAssay,
            sce = altExp,
            L = L,
            beta = beta,
            delta = delta,
            gamma = gamma,
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            seed = seed,
            nchains = nchains,
            yInitialize = match.arg(yInitialize),
            countChecksum = countChecksum,
            yInit = yInit,
            logfile = logfile,
            verbose = verbose)
        SingleCellExperiment::altExp(x, altExpName) <- altExp
        return(x)
    }
)


#' @rdname celda_G
#' @export
setMethod("celda_G",
    signature(x = "ANY"),
    function(x,
        useAssay = "counts",
        altExpName = "featureSubset",
        L,
        beta = 1,
        delta = 1,
        gamma = 1,
        stopIter = 10,
        maxIter = 200,
        splitOnIter = 10,
        splitOnLast = TRUE,
        seed = 12345,
        nchains = 3,
        yInitialize = c("split", "random", "predefined"),
        countChecksum = NULL,
        yInit = NULL,
        logfile = NULL,
        verbose = TRUE) {

        # Convert to sparse matrix
        x <- methods::as(x, "CsparseMatrix")

        ls <- list()
        ls[[useAssay]] <- x
        sce <- SingleCellExperiment::SingleCellExperiment(assays = ls)
        SingleCellExperiment::altExp(sce, altExpName) <- sce
        xClass <- "matrix"

        altExp <- .celdaGWithSeed(counts = x,
            xClass = xClass,
            useAssay = useAssay,
            sce = SingleCellExperiment::altExp(sce, altExpName),
            L = L,
            beta = beta,
            delta = delta,
            gamma = gamma,
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            seed = seed,
            nchains = nchains,
            yInitialize = match.arg(yInitialize),
            countChecksum = countChecksum,
            yInit = yInit,
            logfile = logfile,
            verbose = verbose)
        SingleCellExperiment::altExp(sce, altExpName) <- altExp
        return(sce)
    }
)


.celdaGWithSeed <- function(counts,
    xClass,
    useAssay,
    sce,
    L,
    beta,
    delta,
    gamma,
    stopIter,
    maxIter,
    splitOnIter,
    splitOnLast,
    seed,
    nchains,
    yInitialize,
    countChecksum,
    yInit,
    logfile,
    verbose) {

    .validateCounts(counts)

    if (is.null(seed)) {
        celdaGMod <- .celda_G(counts = counts,
            L = L,
            beta = beta,
            delta = delta,
            gamma = gamma,
            stopIter = stopIter,
            maxIter = maxIter,
            splitOnIter = splitOnIter,
            splitOnLast = splitOnLast,
            nchains = nchains,
            yInitialize = yInitialize,
            countChecksum = countChecksum,
            yInit = yInit,
            logfile = logfile,
            verbose = verbose,
            reorder = TRUE)
    } else {
        with_seed(
            seed,
            celdaGMod <- .celda_G(counts = counts,
                L = L,
                beta = beta,
                delta = delta,
                gamma = gamma,
                stopIter = stopIter,
                maxIter = maxIter,
                splitOnIter = splitOnIter,
                splitOnLast = splitOnLast,
                nchains = nchains,
                yInitialize = yInitialize,
                countChecksum = countChecksum,
                yInit = yInit,
                logfile = logfile,
                verbose = verbose,
                reorder = TRUE)
        )
    }

    sce <- .createSCEceldaG(celdaGMod = celdaGMod,
        sce = sce,
        xClass = xClass,
        useAssay = useAssay,
        stopIter = stopIter,
        maxIter = maxIter,
        splitOnIter = splitOnIter,
        splitOnLast = splitOnLast,
        nchains = nchains,
        yInitialize = yInitialize,
        yInit = yInit,
        logfile = logfile,
        verbose = verbose)
    return(sce)
}

#' @importFrom Matrix colSums
.celda_G <- function(counts,
                     L,
                     beta = 1,
                     delta = 1,
                     gamma = 1,
                     stopIter = 10,
                     maxIter = 200,
                     splitOnIter = 10,
                     splitOnLast = TRUE,
                     nchains = 3,
                     yInitialize = c("split", "random", "predefined"),
                     countChecksum = NULL,
                     yInit = NULL,
                     logfile = NULL,
                     verbose = TRUE,
                     reorder = TRUE) {
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = FALSE,
    verbose = verbose
  )
  .logMessages("Starting Celda_G: Clustering genes.",
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )
  .logMessages(paste(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )
  start.time <- Sys.time()

  ## Error checking and variable processing
  counts <- .processCounts(counts)
  if (is.null(countChecksum)) {
    countChecksum <- .createCountChecksum(counts)
  }
  yInitialize <- match.arg(yInitialize)

  allChains <- seq(nchains)

  # Pre-compute lgamma values
  cs <- colSums(counts)
  lgbeta <- lgamma(seq(0, max(cs)) + beta)
  lggamma <- lgamma(seq(0, nrow(counts) + L) + gamma)
  lgdelta <- c(NA, lgamma((seq(nrow(counts) + L) * delta)))

  bestResult <- NULL
  for (i in allChains) {
    ## Randomly select y or y to supplied initial values
    ## Initialize cluster labels
    .logMessages(date(),
      ".. Initializing 'y' in chain",
      i,
      "with",
      paste0("'", yInitialize, "' "),
      logfile = logfile,
      append = TRUE,
      verbose = verbose
    )

    if (yInitialize == "predefined") {
      if (is.null(yInit)) {
        stop("'yInit' needs to specified when initilize.y == 'given'.")
      }
      y <- .initializeCluster(L,
        nrow(counts),
        initial = yInit,
        fixed = NULL
      )
    } else if (yInitialize == "split") {
      y <- .initializeSplitY(counts,
        L,
        beta = beta,
        delta = delta,
        gamma = gamma
      )
    } else {
      y <- .initializeCluster(L,
        nrow(counts),
        initial = NULL,
        fixed = NULL
      )
    }
    yBest <- y

    ## Calculate counts one time up front
    p <- .cGDecomposeCounts(counts = counts, y = y, L = L)
    nTSByC <- p$nTSByC
    nByG <- p$nByG
    nByTS <- p$nByTS
    nGByTS <- p$nGByTS
    nM <- p$nM
    nG <- p$nG
    rm(p)

    ## Calculate initial log likelihood
    ll <- .cGCalcLL(
      nTSByC = nTSByC,
      nByTS = nByTS,
      nByG = nByG,
      nGByTS = nGByTS,
      nM = nM,
      nG = nG,
      L = L,
      beta = beta,
      delta = delta,
      gamma = gamma
    )

    iter <- 1L
    numIterWithoutImprovement <- 0L
    doGeneSplit <- TRUE
    while (iter <= maxIter & numIterWithoutImprovement <= stopIter) {
      nextY <- .cGCalcGibbsProbY(
        counts = counts,
        nTSByC = nTSByC,
        nByTS = nByTS,
        nGByTS = nGByTS,
        nByG = nByG,
        y = y,
        nG = nG,
        L = L,
        beta = beta,
        delta = delta,
        gamma = gamma,
        lgbeta = lgbeta,
        lggamma = lggamma,
        lgdelta = lgdelta
      )
      nTSByC <- nextY$nTSByC
      nGByTS <- nextY$nGByTS
      nByTS <- nextY$nByTS
      y <- nextY$y

      ## Perform split on i-th iteration of no improvement in log
      ## likelihood
      tempLl <- .cGCalcLL(
        nTSByC = nTSByC,
        nByTS = nByTS,
        nByG = nByG,
        nGByTS = nGByTS,
        nM = nM,
        nG = nG,
        L = L,
        beta = beta,
        delta = delta,
        gamma = gamma
      )
      if (L > 2 & iter != maxIter &
        ((((numIterWithoutImprovement == stopIter &
          !all(tempLl >= ll))) & isTRUE(splitOnLast)) |
          (splitOnIter > 0 & iter %% splitOnIter == 0 &
            isTRUE(doGeneSplit)))) {
        .logMessages(date(),
          " .... Determining if any gene clusters should be split.",
          logfile = logfile,
          append = TRUE,
          sep = "",
          verbose = verbose
        )
        res <- .cGSplitY(counts,
          y,
          nTSByC,
          nByTS,
          nByG,
          nGByTS,
          nM,
          nG,
          L,
          beta,
          delta,
          gamma,
          yProb = t(nextY$probs),
          minFeature = 3,
          maxClustersToTry = max(L / 2, 10)
        )
        .logMessages(res$message,
          logfile = logfile,
          append = TRUE,
          verbose = verbose
        )

        # Reset convergence counter if a split occured
        if (!isTRUE(all.equal(y, res$y))) {
          numIterWithoutImprovement <- 1L
          doGeneSplit <- TRUE
        } else {
          doGeneSplit <- FALSE
        }

        ## Re-calculate variables
        y <- res$y
        nTSByC <- res$nTSByC
        nByTS <- res$nByTS
        nGByTS <- res$nGByTS
      }

      ## Calculate complete likelihood
      tempLl <- .cGCalcLL(
        nTSByC = nTSByC,
        nByTS = nByTS,
        nByG = nByG,
        nGByTS = nGByTS,
        nM = nM,
        nG = nG,
        L = L,
        beta = beta,
        delta = delta,
        gamma = gamma
      )
      if ((all(tempLl > ll)) | iter == 1) {
        yBest <- y
        llBest <- tempLl
        numIterWithoutImprovement <- 1L
      } else {
        numIterWithoutImprovement <- numIterWithoutImprovement + 1L
      }
      ll <- c(ll, tempLl)

      .logMessages(date(),
        ".... Completed iteration:",
        iter,
        "| logLik:",
        tempLl,
        logfile = logfile,
        append = TRUE,
        verbose = verbose
      )
      iter <- iter + 1
    }

    names <- list(row = rownames(counts), column = colnames(counts))

    result <- list(
      y = yBest,
      completeLogLik = ll,
      finalLogLik = llBest,
      L = L,
      beta = beta,
      delta = delta,
      gamma = gamma,
      countChecksum = countChecksum,
      names = names
    )

    if (is.null(bestResult) ||
      result$finalLogLik > bestResult$finalLogLik) {
      bestResult <- result
    }

    .logMessages(date(),
      ".. Finished chain",
      i,
      logfile = logfile,
      append = TRUE,
      verbose = verbose
    )
  }

  bestResult <- methods::new("celda_G",
    clusters = list(y = yBest),
    params = list(
      L = as.integer(L),
      beta = beta,
      delta = delta,
      gamma = gamma,
      countChecksum = countChecksum
    ),
    completeLogLik = ll,
    finalLogLik = llBest,
    names = names
  )
  if (isTRUE(reorder)) {
    bestResult <- .reorderCeldaG(counts = counts, res = bestResult)
  }

  endTime <- Sys.time()
  .logMessages(paste0(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )
  .logMessages("Completed Celda_G. Total time:",
    format(difftime(endTime, start.time)),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )
  .logMessages(paste0(rep("-", 50), collapse = ""),
    logfile = logfile,
    append = TRUE,
    verbose = verbose
  )

  return(bestResult)
}


# Calculate Log Likelihood For Single Set of Cluster Assignments
# (Gene Clustering)
# This function calculates the log-likelihood of a given set of cluster
# assigments for the samples
# represented in the provided count matrix.
# @param nTSByC Number of counts in each Transcriptional State per Cell.
# @param nByTS Number of counts per Transcriptional State.
# @param nGByTS Number of genes in each Transcriptional State.
# @param nG.in.Y  Number of genes in each of the cell cluster.
# @param gamma Numeric. Concentration parameter for Eta. Adds a pseudocount to
# the number of features in each module. Default 1.
# @param delta Numeric. Concentration parameter for Psi. Adds a pseudocount to
# each feature in each module. Default 1.
# @param beta Numeric. Concentration parameter for Phi. Adds a pseudocount to
# each feature module in each cell. Default 1.
# @keywords log likelihood
.cGCalcGibbsProbY <- function(counts,
                              nTSByC,
                              nByTS,
                              nGByTS,
                              nByG,
                              y,
                              L,
                              nG,
                              beta,
                              delta,
                              gamma,
                              lgbeta,
                              lggamma,
                              lgdelta,
                              doSample = TRUE) {

  ## Set variables up front outside of loop
  probs <- matrix(NA, ncol = nG, nrow = L)
  ix <- sample(seq(nG))
  for (i in ix) {
    probs[, i] <- cG_CalcGibbsProbY(index = i,
      counts = as.numeric(counts[i, ]),
      nTSbyC = nTSByC,
      nbyTS = nByTS,
      nGbyTS = nGByTS,
      nbyG = nByG,
      y = y,
      L = L,
      nG = nG,
      lg_beta = lgbeta,
      lg_gamma = lggamma,
      lg_delta = lgdelta,
      delta = delta
    )
    ## Sample next state and add back counts
    if (isTRUE(doSample)) {
      prevY <- y[i]
      y[i] <- .sampleLl(probs[, i])

      if (prevY != y[i]) {
        nTSByC[prevY, ] <- nTSByC[prevY, ] - counts[i, ]
        nGByTS[prevY] <- nGByTS[prevY] - 1L
        nByTS[prevY] <- nByTS[prevY] - nByG[i]

        nTSByC[y[i], ] <- nTSByC[y[i], ] + counts[i, ]
        nGByTS[y[i]] <- nGByTS[y[i]] + 1L
        nByTS[y[i]] <- nByTS[y[i]] + nByG[i]
      }
    }
  }

  return(list(
    nTSByC = nTSByC,
    nGByTS = nGByTS,
    nByTS = nByTS,
    y = y,
    probs = probs
  ))
}


# Calculate log-likelihood of celda_CG model
.cGCalcLL <- function(nTSByC,
                      nByTS,
                      nByG,
                      nGByTS,
                      nM,
                      nG,
                      L,
                      beta,
                      delta,
                      gamma) {
  nG <- sum(nGByTS)

  ## Calculate for "Phi" component
  a <- nM * lgamma(L * beta)
  b <- sum(lgamma(nTSByC + beta))
  c <- -nM * L * lgamma(beta)
  d <- -sum(lgamma(colSums(nTSByC + beta)))

  phiLl <- a + b + c + d

  ## Calculate for "Psi" component
  a <- sum(lgamma(nGByTS * delta))
  b <- sum(lgamma(nByG + delta))
  c <- -nG * lgamma(delta)
  d <- -sum(lgamma(nByTS + (nGByTS * delta)))

  psiLl <- a + b + c + d

  ## Calculate for "Eta" component
  a <- lgamma(L * gamma)
  b <- sum(lgamma(nGByTS + gamma))
  c <- -L * lgamma(gamma)
  d <- -sum(lgamma(sum(nGByTS + gamma)))

  etaLl <- a + b + c + d

  final <- phiLl + psiLl + etaLl
  return(final)
}


# Takes raw counts matrix and converts it to a series of matrices needed for
# log likelihood calculation
# @param counts Integer matrix. Rows represent features and columns represent
# cells.
# @param y Numeric vector. Denotes feature module labels.
# @param L Integer. Number of feature modules.
#' @importFrom Matrix rowSums
.cGDecomposeCounts <- function(counts, y, L) {
  if (any(y > L)) {
    stop("Assigned value of feature module greater than the total number",
        " of feature modules!")
  }

  nTSByC <- .rowSumByGroup(counts, group = y, L = L)
  nByG <- rowSums(counts)
  nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L)
  nGByTS <- tabulate(y, L) + 1 ## Add pseudogene to each state
  nM <- ncol(counts)
  nG <- nrow(counts)

  return(list(
    nTSByC = nTSByC,
    nByG = nByG,
    nByTS = nByTS,
    nGByTS = nGByTS,
    nM = nM,
    nG = nG
  ))
}


.cGReDecomposeCounts <- function(counts, y, previousY, nTSByC, nByG, L) {
  ## Recalculate counts based on new label
  nTSByC <- .rowSumByGroupChange(counts, nTSByC, y, previousY, L)
  nByTS <- .rowSumByGroup(matrix(nByG, ncol = 1), group = y, L = L)
  nGByTS <- tabulate(y, L) + 1

  return(list(
    nTSByC = nTSByC,
    nByTS = nByTS,
    nGByTS = nGByTS
  ))
}


.prepareCountsForDimReductionCeldaG <- function(sce,
    useAssay,
    maxCells,
    minClusterSize,
    modules,
    normalize,
    scaleFactor,
    transformationFun) {

    counts <- SummarizedExperiment::assay(sce, i = useAssay)
    counts <- .processCounts(counts)
    y <- as.integer(SummarizedExperiment::rowData(sce)$celda_feature_module)
    L <- S4Vectors::metadata(sce)$celda_parameters$L
    beta <- S4Vectors::metadata(sce)$celda_parameters$beta
    delta <- S4Vectors::metadata(sce)$celda_parameters$delta
    gamma <- S4Vectors::metadata(sce)$celda_parameters$gamma
    cNames <- colnames(sce)
    rNames <- rownames(sce)

    if (is.null(maxCells) || maxCells > ncol(counts)) {
        maxCells <- ncol(counts)
        cellIx <- seq_len(ncol(counts))
    } else {
        cellIx <- sample(seq(ncol(counts)), maxCells)
    }

    fm <- .factorizeMatrixG(
        counts = counts,
        y = y,
        L = L,
        beta = beta,
        delta = delta,
        gamma = gamma,
        cNames = cNames,
        rNames = rNames,
        type = "counts")

    modulesToUse <- seq(nrow(fm$counts$cell))
    if (!is.null(modules)) {
        if (!all(modules %in% modulesToUse)) {
            stop(
                "'modules' must be a vector of numbers between 1 and ",
                modulesToUse,
                "."
            )
        }
        modulesToUse <- modules
    }

    norm <- t(normalizeCounts(fm$counts$cell[modulesToUse, cellIx],
        normalize = normalize,
        scaleFactor = scaleFactor,
        transformationFun = transformationFun))
    return(list(norm = norm, cellIx = cellIx))
}


.createSCEceldaG <- function(celdaGMod,
    sce,
    xClass,
    useAssay,
    stopIter,
    maxIter,
    splitOnIter,
    splitOnLast,
    nchains,
    yInitialize,
    yInit,
    logfile,
    verbose) {

    # add metadata
    S4Vectors::metadata(sce)[["celda_parameters"]] <- list(
        model = "celda_G",
        xClass = xClass,
        useAssay = useAssay,
        L = celdaGMod@params$L,
        beta = celdaGMod@params$beta,
        delta = celdaGMod@params$delta,
        gamma = celdaGMod@params$gamma,
        stopIter = stopIter,
        maxIter = maxIter,
        splitOnIter = splitOnIter,
        splitOnLast = splitOnLast,
        seed = celdaGMod@params$seed,
        nchains = nchains,
        yInitialize = yInitialize,
        countChecksum = celdaGMod@params$countChecksum,
        yInit = yInit,
        logfile = logfile,
        verbose = verbose,
        completeLogLik = celdaGMod@completeLogLik,
        finalLogLik = celdaGMod@finalLogLik,
        featureModuleLevels = sort(unique(celdaClusters(celdaGMod)$y)))

    SummarizedExperiment::rowData(sce)["rownames"] <- celdaGMod@names$row
    SummarizedExperiment::colData(sce)["colnames"] <-
        celdaGMod@names$column
    SummarizedExperiment::rowData(sce)["celda_feature_module"] <-
        as.factor(celdaClusters(celdaGMod)$y)

    return(sce)
}


================================================
FILE: R/celda_functions.R
================================================
.sampleLl <- function(llProbs) {
  probsSub <- exp(llProbs - max(llProbs))
  probsNorm <- probsSub / sum(probsSub)
  probsSelect <- sample.int(
    length(probsNorm),
    size = 1L,
    replace = TRUE,
    prob = probsNorm
  )
  return(probsSelect)
}


.cosineDist <- function(x) {
  x <- t(x)
  y <- (1 - .cosine(x)) / 2
  return(stats::as.dist(y))
}


.cosine <- function(x) {
  y <- x %*% t(x) / (sqrt(rowSums(x^2) %*% t(rowSums(x^2))))
  return(y)
}


.spearmanDist <- function(x) {
  y <- (1 - stats::cor(x, method = "spearman")) / 2
  return(stats::as.dist(y))
}


.hellingerDist <- function(x) {
  y <- stats::dist(t(sqrt(x)), method = "euclidean") * 1 / sqrt(2)
  return(y)
}


.normalizeLogProbs <- function(llProbs) {
  llProbs <- exp(sweep(llProbs, 1, base::apply(llProbs, 1, max), "-"))
  probs <- sweep(llProbs, 1, rowSums(llProbs), "/")
  return(probs)
}


#' @title Normalization of count data
#' @description Performs normalization, transformation, and/or scaling of a
#'  counts matrix
#' @param counts Integer, Numeric or Sparse matrix. Rows represent features
#' and columns represent cells.
#' @param normalize Character.
#'  Divides counts by the library sizes for each cell. One of 'proportion',
#'  'cpm', 'median', or 'mean'. 'proportion' uses the total counts for each
#'  cell as the library size. 'cpm' divides the library size of each cell by
#'  one million to produce counts per million. 'median' divides the library
#'  size of each cell by the median library size across all cells. 'mean'
#'  divides the library size of each cell by the mean library size across all
#'  cells.
#' @param scaleFactor Numeric. Sets the scale factor for cell-level
#'  normalization. This scale factor is multiplied to each cell after the
#'  library size of each cell had been adjusted in \code{normalize}. Default
#'  \code{NULL} which means no scale factor is applied.
#' @param transformationFun Function. Applys a transformation such as
#'  \link{sqrt}, \link{log}, \link{log2}, \link{log10}, or \link{log1p}.
#'  If NULL, no transformation will be applied. Occurs after normalization.
#'  Default NULL.
#' @param scaleFun Function. Scales the rows of the normalized and transformed
#'  count matrix. For example, 'scale' can be used to z-score normalize the
#'  rows. Default NULL.
#' @param pseudocountNormalize Numeric. Add a pseudocount to counts before
#'  normalization. Default 0.
#' @param pseudocountTransform Numeric. Add a pseudocount to normalized counts
#'  before applying the transformation function. Adding a pseudocount
#'  can be useful before applying a log transformation. Default  0.
#' @return Numeric Matrix. A normalized matrix.
#' @examples
#' data(celdaCGSim)
#' normalizedCounts <- normalizeCounts(celdaCGSim$counts, "proportion",
#'   pseudocountNormalize = 1)
#' @importFrom Matrix colSums
#' @export
normalizeCounts <- function(counts,
                            normalize = c("proportion", "cpm",
                              "median", "mean"),
                            scaleFactor = NULL,
                            transformationFun = NULL,
                            scaleFun = NULL,
                            pseudocountNormalize = 0,
                            pseudocountTransform = 0) {

  normalize <- match.arg(normalize)

  if (!is.null(transformationFun) &&
    !is.function(transformationFun)) {
    stop("'transformationFun' needs to be of class 'function'")
  }
  if (!is.null(scaleFun) && !is.function(scaleFun)) {
    stop("'scaleFun' needs to be of class 'function'")
  }
  # Perform normalization
  if (normalize == "proportion" & inherits(counts, "matrix")) {
    norm <- fastNormProp(counts, pseudocountNormalize)
  } else {
    counts <- counts + pseudocountNormalize
    cs <- colSums(counts)
    norm <- switch(
      normalize,
      "proportion" = sweep(counts, 2, cs, "/"),
      "cpm" = sweep(counts, 2, cs / 1e6, "/"),
      "median" = sweep(counts, 2, cs / stats::median(cs), "/"),
      "mean" = sweep(counts, 2, cs / mean(cs), "/")
    )
  }

  if (!is.null(scaleFactor)) {
      norm <- norm * scaleFactor
  }

  if (!is.null(transformationFun)) {
    norm <- do.call(
      transformationFun,
      list(norm + pseudocountTransform)
    )
  }
  if (!is.null(scaleFun)) {
    norm <- t(base::apply(norm, 1, scaleFun))
  }
  colnames(norm) <- colnames(counts)
  rownames(norm) <- rownames(counts)
  return(norm)
}


#' @ti

Download .txt

gitextract_ojjfe_ko/

├── .Rbuildignore
├── .github/
│   ├── .gitignore
│   └── workflows/
│       ├── BioC-check.yaml
│       └── check-standard.yaml
├── .gitignore
├── CONDUCT.md
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── NOTICE
├── R/
│   ├── RcppExports.R
│   ├── aaa.R
│   ├── accessors.R
│   ├── celdaGridSearch.R
│   ├── celdaProbabilityMap.R
│   ├── celdaUMAP.R
│   ├── celda_C.R
│   ├── celda_CG.R
│   ├── celda_G.R
│   ├── celda_functions.R
│   ├── celda_heatmap.R
│   ├── celdatSNE.R
│   ├── celdatosce.R
│   ├── clusterProbability.R
│   ├── data.R
│   ├── decon.R
│   ├── elbow.R
│   ├── factorizeMatrix.R
│   ├── featureModuleLookup.R
│   ├── geneSetEnrich.R
│   ├── initialize_clusters.R
│   ├── loglikelihood.R
│   ├── matrixSums.R
│   ├── misc.R
│   ├── moduleHeatmap.R
│   ├── perplexity.R
│   ├── plotHeatmap.R
│   ├── plot_decontx.R
│   ├── plot_dr.R
│   ├── recursiveSplit.R
│   ├── reorderCelda.R
│   ├── reports.R
│   ├── selectFeatures.R
│   ├── semi_pheatmap.R
│   ├── simulateCells.R
│   ├── splitModule.R
│   ├── split_clusters.R
│   └── topRank.R
├── README.md
├── _pkgdown.yml
├── data/
│   ├── celdaCGGridSearchRes.rda
│   ├── celdaCGMod.rda
│   ├── celdaCGSim.rda
│   ├── celdaCMod.rda
│   ├── celdaCSim.rda
│   ├── celdaGMod.rda
│   ├── celdaGSim.rda
│   ├── contaminationSim.rda
│   ├── sampleCells.rda
│   ├── sceCeldaC.rda
│   ├── sceCeldaCG.rda
│   ├── sceCeldaCGGridSearch.rda
│   └── sceCeldaG.rda
├── docs/
│   ├── 404.html
│   ├── CONDUCT.html
│   ├── LICENSE-text.html
│   ├── articles/
│   │   ├── articles/
│   │   │   ├── celda_pbmc3k.html
│   │   │   ├── celda_pbmc3k_files/
│   │   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   │   └── empty-anchor.js
│   │   │   │   ├── header-attrs-2.7/
│   │   │   │   │   └── header-attrs.js
│   │   │   │   ├── kePrint-0.0.1/
│   │   │   │   │   └── kePrint.js
│   │   │   │   └── lightable-0.0.1/
│   │   │   │       └── lightable.css
│   │   │   ├── decontX_pbmc4k.html
│   │   │   ├── decontX_pbmc4k_files/
│   │   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   │   └── empty-anchor.js
│   │   │   │   └── header-attrs-2.7/
│   │   │   │       └── header-attrs.js
│   │   │   ├── installation.html
│   │   │   └── installation_files/
│   │   │       ├── accessible-code-block-0.0.1/
│   │   │       │   └── empty-anchor.js
│   │   │       └── header-attrs-2.7/
│   │   │           └── header-attrs.js
│   │   ├── celda.html
│   │   ├── celda_files/
│   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   └── empty-anchor.js
│   │   │   └── header-attrs-2.7/
│   │   │       └── header-attrs.js
│   │   ├── celda_pbmc3k.html
│   │   ├── celda_pbmc3k_files/
│   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   └── empty-anchor.js
│   │   │   ├── kePrint-0.0.1/
│   │   │   │   └── kePrint.js
│   │   │   └── lightable-0.0.1/
│   │   │       └── lightable.css
│   │   ├── decontX.html
│   │   ├── decontX_files/
│   │   │   ├── accessible-code-block-0.0.1/
│   │   │   │   └── empty-anchor.js
│   │   │   └── header-attrs-2.7/
│   │   │       └── header-attrs.js
│   │   ├── decontX_pbmc4k.html
│   │   ├── decontX_pbmc4k_files/
│   │   │   └── accessible-code-block-0.0.1/
│   │   │       └── empty-anchor.js
│   │   ├── index.html
│   │   ├── installation.html
│   │   └── installation_files/
│   │       └── accessible-code-block-0.0.1/
│   │           └── empty-anchor.js
│   ├── authors.html
│   ├── bootstrap-toc.css
│   ├── bootstrap-toc.js
│   ├── docsearch.css
│   ├── docsearch.js
│   ├── index.html
│   ├── news/
│   │   └── index.html
│   ├── pkgdown.css
│   ├── pkgdown.js
│   ├── pkgdown.yml
│   ├── reference/
│   │   ├── appendCeldaList.html
│   │   ├── availableModels.html
│   │   ├── bestLogLikelihood.html
│   │   ├── celda.html
│   │   ├── celdaCGGridSearchRes.html
│   │   ├── celdaCGMod.html
│   │   ├── celdaCGSim.html
│   │   ├── celdaCMod.html
│   │   ├── celdaCSim.html
│   │   ├── celdaClusters.html
│   │   ├── celdaGMod.html
│   │   ├── celdaGSim.html
│   │   ├── celdaGridSearch.html
│   │   ├── celdaHeatmap.html
│   │   ├── celdaModel.html
│   │   ├── celdaModules.html
│   │   ├── celdaPerplexity-celdaList-method.html
│   │   ├── celdaPerplexity.html
│   │   ├── celdaProbabilityMap.html
│   │   ├── celdaTsne.html
│   │   ├── celdaUmap.html
│   │   ├── celda_C.html
│   │   ├── celda_CG.html
│   │   ├── celda_G.html
│   │   ├── celdatosce.html
│   │   ├── clusterProbability.html
│   │   ├── compareCountMatrix.html
│   │   ├── contaminationSim.html
│   │   ├── countChecksum-celdaList-method.html
│   │   ├── countChecksum.html
│   │   ├── decontX.html
│   │   ├── decontXcounts.html
│   │   ├── distinctColors.html
│   │   ├── eigenMatMultInt.html
│   │   ├── eigenMatMultNumeric.html
│   │   ├── factorizeMatrix.html
│   │   ├── fastNormProp.html
│   │   ├── fastNormPropLog.html
│   │   ├── fastNormPropSqrt.html
│   │   ├── featureModuleLookup.html
│   │   ├── featureModuleTable.html
│   │   ├── geneSetEnrich.html
│   │   ├── index.html
│   │   ├── logLikelihood.html
│   │   ├── logLikelihoodHistory.html
│   │   ├── matrixNames.html
│   │   ├── moduleHeatmap.html
│   │   ├── nonzero.html
│   │   ├── normalizeCounts.html
│   │   ├── params.html
│   │   ├── perplexity.html
│   │   ├── plotCeldaViolin.html
│   │   ├── plotDecontXContamination.html
│   │   ├── plotDecontXMarkerExpression.html
│   │   ├── plotDecontXMarkerPercentage.html
│   │   ├── plotDimReduceCluster.html
│   │   ├── plotDimReduceFeature.html
│   │   ├── plotDimReduceGrid.html
│   │   ├── plotDimReduceModule.html
│   │   ├── plotGridSearchPerplexity.html
│   │   ├── plotHeatmap.html
│   │   ├── plotRPC.html
│   │   ├── recodeClusterY.html
│   │   ├── recodeClusterZ.html
│   │   ├── recursiveSplitCell.html
│   │   ├── recursiveSplitModule.html
│   │   ├── reorderCelda.html
│   │   ├── reportceldaCG.html
│   │   ├── resList.html
│   │   ├── resamplePerplexity.html
│   │   ├── retrieveFeatureIndex.html
│   │   ├── runParams.html
│   │   ├── sampleCells.html
│   │   ├── sampleLabel.html
│   │   ├── sceCeldaC.html
│   │   ├── sceCeldaCG.html
│   │   ├── sceCeldaCGGridSearch.html
│   │   ├── sceCeldaG.html
│   │   ├── selectBestModel.html
│   │   ├── selectFeatures.html
│   │   ├── semiPheatmap.html
│   │   ├── simulateCells.html
│   │   ├── simulateContamination.html
│   │   ├── splitModule.html
│   │   ├── subsetCeldaList.html
│   │   └── topRank.html
│   └── sitemap.xml
├── inst/
│   └── rmarkdown/
│       ├── CeldaCG_PlotResults.Rmd
│       └── CeldaCG_Run.Rmd
├── man/
│   ├── appendCeldaList.Rd
│   ├── availableModels.Rd
│   ├── bestLogLikelihood.Rd
│   ├── celda.Rd
│   ├── celdaCGGridSearchRes.Rd
│   ├── celdaCGMod.Rd
│   ├── celdaCGSim.Rd
│   ├── celdaCMod.Rd
│   ├── celdaCSim.Rd
│   ├── celdaClusters.Rd
│   ├── celdaGMod.Rd
│   ├── celdaGSim.Rd
│   ├── celdaGridSearch.Rd
│   ├── celdaHeatmap.Rd
│   ├── celdaModel.Rd
│   ├── celdaModules.Rd
│   ├── celdaPerplexity-celdaList-method.Rd
│   ├── celdaPerplexity.Rd
│   ├── celdaProbabilityMap.Rd
│   ├── celdaTsne.Rd
│   ├── celdaUmap.Rd
│   ├── celda_C.Rd
│   ├── celda_CG.Rd
│   ├── celda_G.Rd
│   ├── celdatosce.Rd
│   ├── clusterProbability.Rd
│   ├── compareCountMatrix.Rd
│   ├── contaminationSim.Rd
│   ├── countChecksum-celdaList-method.Rd
│   ├── countChecksum.Rd
│   ├── decontX.Rd
│   ├── decontXcounts.Rd
│   ├── distinctColors.Rd
│   ├── eigenMatMultInt.Rd
│   ├── eigenMatMultNumeric.Rd
│   ├── factorizeMatrix.Rd
│   ├── fastNormProp.Rd
│   ├── fastNormPropLog.Rd
│   ├── fastNormPropSqrt.Rd
│   ├── featureModuleLookup.Rd
│   ├── featureModuleTable.Rd
│   ├── geneSetEnrich.Rd
│   ├── logLikelihood.Rd
│   ├── logLikelihoodHistory.Rd
│   ├── matrixNames.Rd
│   ├── moduleHeatmap.Rd
│   ├── nonzero.Rd
│   ├── normalizeCounts.Rd
│   ├── params.Rd
│   ├── perplexity.Rd
│   ├── plotCeldaViolin.Rd
│   ├── plotDecontXContamination.Rd
│   ├── plotDecontXMarkerExpression.Rd
│   ├── plotDecontXMarkerPercentage.Rd
│   ├── plotDimReduceCluster.Rd
│   ├── plotDimReduceFeature.Rd
│   ├── plotDimReduceGrid.Rd
│   ├── plotDimReduceModule.Rd
│   ├── plotGridSearchPerplexity.Rd
│   ├── plotHeatmap.Rd
│   ├── plotRPC.Rd
│   ├── recodeClusterY.Rd
│   ├── recodeClusterZ.Rd
│   ├── recursiveSplitCell.Rd
│   ├── recursiveSplitModule.Rd
│   ├── reorderCelda.Rd
│   ├── reportceldaCG.Rd
│   ├── resList.Rd
│   ├── resamplePerplexity.Rd
│   ├── retrieveFeatureIndex.Rd
│   ├── runParams.Rd
│   ├── sampleCells.Rd
│   ├── sampleLabel.Rd
│   ├── sceCeldaC.Rd
│   ├── sceCeldaCG.Rd
│   ├── sceCeldaCGGridSearch.Rd
│   ├── sceCeldaG.Rd
│   ├── selectBestModel.Rd
│   ├── selectFeatures.Rd
│   ├── semiPheatmap.Rd
│   ├── simulateCells.Rd
│   ├── simulateContamination.Rd
│   ├── splitModule.Rd
│   ├── subsetCeldaList.Rd
│   └── topRank.Rd
├── src/
│   ├── DecontX.cpp
│   ├── Makevars
│   ├── Makevars.win
│   ├── RcppExports.cpp
│   ├── cG_calcGibbsProbY.cpp
│   ├── eigenMatMultInt.cpp
│   ├── matrixNorm.cpp
│   ├── matrixSums.c
│   ├── matrixSumsSparse.cpp
│   └── perplexity.c
├── tests/
│   ├── testthat/
│   │   ├── test-celda-functions.R
│   │   ├── test-celda_C.R
│   │   ├── test-celda_CG.R
│   │   ├── test-celda_G.R
│   │   ├── test-decon.R
│   │   ├── test-intialize_cluster.R
│   │   ├── test-matrixSums.R
│   │   └── test-with_seed.R
│   └── testthat.R
└── vignettes/
    ├── articles/
    │   ├── celda_pbmc3k.Rmd
    │   ├── decontX_pbmc4k.Rmd
    │   └── installation.Rmd
    ├── celda.Rmd
    └── decontX.Rmd

Download .txt

SYMBOL INDEX (51 symbols across 10 files)

FILE: docs/docsearch.js
  function matchedWords (line 54) | function matchedWords(hit) {
  function updateHitURL (line 73) | function updateHitURL(hit) {

FILE: docs/pkgdown.js
  function paths (line 42) | function paths(pathname) {
  function prefix_length (line 53) | function prefix_length(needle, haystack) {
  function changeTooltipMessage (line 72) | function changeTooltipMessage(element, msg) {

FILE: src/DecontX.cpp
  function decontXEM (line 7) | Rcpp::List decontXEM(const Eigen::MappedSparseMatrix<double> &counts,
  function decontXLogLik (line 159) | double decontXLogLik(const Eigen::MappedSparseMatrix<double> &counts,
  function decontXInitialize (line 221) | Rcpp::List decontXInitialize(const Eigen::MappedSparseMatrix<double> &co...
  function calculateNativeMatrix (line 283) | Eigen::SparseMatrix<double> calculateNativeMatrix(const Eigen::MappedSpa...

FILE: src/RcppExports.cpp
  function RcppExport (line 16) | RcppExport SEXP _celda_decontXEM(SEXP countsSEXP, SEXP counts_colsumsSEX...
  function RcppExport (line 36) | RcppExport SEXP _celda_decontXLogLik(SEXP countsSEXP, SEXP thetaSEXP, SE...
  function RcppExport (line 52) | RcppExport SEXP _celda_decontXInitialize(SEXP countsSEXP, SEXP thetaSEXP...
  function RcppExport (line 66) | RcppExport SEXP _celda_calculateNativeMatrix(SEXP countsSEXP, SEXP theta...
  function RcppExport (line 82) | RcppExport SEXP _celda_cG_calcGibbsProbY_Simple(SEXP countsSEXP, SEXP nG...
  function RcppExport (line 103) | RcppExport SEXP _celda_cG_CalcGibbsProbY_ori(SEXP indexSEXP, SEXP counts...
  function RcppExport (line 126) | RcppExport SEXP _celda_cG_CalcGibbsProbY_fastRow(SEXP indexSEXP, SEXP co...
  function RcppExport (line 149) | RcppExport SEXP _celda_cG_CalcGibbsProbY(SEXP indexSEXP, SEXP countsSEXP...
  function RcppExport (line 172) | RcppExport SEXP _celda_eigenMatMultInt(SEXP ASEXP, SEXP BSEXP) {
  function RcppExport (line 184) | RcppExport SEXP _celda_eigenMatMultNumeric(SEXP ASEXP, SEXP BSEXP) {
  function RcppExport (line 196) | RcppExport SEXP _celda_fastNormProp(SEXP R_countsSEXP, SEXP R_alphaSEXP) {
  function RcppExport (line 208) | RcppExport SEXP _celda_fastNormPropLog(SEXP R_countsSEXP, SEXP R_alphaSE...
  function RcppExport (line 220) | RcppExport SEXP _celda_fastNormPropSqrt(SEXP R_countsSEXP, SEXP R_alphaS...
  function RcppExport (line 232) | RcppExport SEXP _celda_nonzero(SEXP R_countsSEXP) {
  function RcppExport (line 243) | RcppExport SEXP _celda_colSumByGroupSparse(SEXP countsSEXP, SEXP groupSE...
  function RcppExport (line 256) | RcppExport SEXP _celda_rowSumByGroupSparse(SEXP countsSEXP, SEXP groupSE...
  function RcppExport (line 269) | RcppExport SEXP _celda_colSumByGroupChangeSparse(SEXP countsSEXP, SEXP p...
  function RcppExport (line 284) | RcppExport SEXP _celda_rowSumByGroupChangeSparse(SEXP countsSEXP, SEXP p...
  function RcppExport (line 339) | RcppExport void R_init_celda(DllInfo *dll) {

FILE: src/cG_calcGibbsProbY.cpp
  function NumericVector (line 8) | NumericVector cG_calcGibbsProbY_Simple(const IntegerMatrix counts,
  function NumericVector (line 53) | NumericVector cG_CalcGibbsProbY_ori(const int index,
  function NumericVector (line 125) | NumericVector cG_CalcGibbsProbY_fastRow(const int index,
  function NumericVector (line 187) | NumericVector cG_CalcGibbsProbY(const int index,

FILE: src/eigenMatMultInt.cpp
  function SEXP (line 11) | SEXP eigenMatMultInt(const Eigen::Map<Eigen::MatrixXd> A, const Eigen::M...
  function SEXP (line 23) | SEXP eigenMatMultNumeric(const Eigen::Map<Eigen::MatrixXd> A, const Eige...

FILE: src/matrixNorm.cpp
  function SEXP (line 11) | SEXP fastNormProp(NumericMatrix R_counts, double R_alpha) {
  function SEXP (line 36) | SEXP fastNormPropLog(NumericMatrix R_counts, double R_alpha) {
  function SEXP (line 61) | SEXP fastNormPropSqrt(NumericMatrix R_counts, double R_alpha) {
  function SEXP (line 86) | SEXP nonzero(NumericMatrix R_counts) {

FILE: src/matrixSums.c
  function SEXP (line 5) | SEXP _rowSumByGroup(SEXP R_x, SEXP R_group)
  function SEXP (line 50) | SEXP _colSumByGroup(SEXP R_x, SEXP R_group)
  function SEXP (line 97) | SEXP _rowSumByGroupChange(SEXP R_x, SEXP R_px, SEXP R_group, SEXP R_pgroup)
  function SEXP (line 147) | SEXP _colSumByGroupChange(SEXP R_x, SEXP R_px, SEXP R_group, SEXP R_pgroup)
  function SEXP (line 198) | SEXP _rowSumByGroup_numeric(SEXP R_x, SEXP R_group)
  function SEXP (line 240) | SEXP _colSumByGroup_numeric(SEXP R_x, SEXP R_group)
  function SEXP (line 282) | SEXP _rowSumByGroupChange_numeric(SEXP R_x, SEXP R_px, SEXP R_group, SEX...
  function SEXP (line 332) | SEXP _colSumByGroupChange_numeric(SEXP R_x, SEXP R_px, SEXP R_group, SEX...

FILE: src/matrixSumsSparse.cpp
  function colSumByGroupSparse (line 9) | Rcpp::NumericMatrix colSumByGroupSparse(
  function rowSumByGroupSparse (line 44) | Rcpp::NumericMatrix rowSumByGroupSparse(
  function colSumByGroupChangeSparse (line 85) | Rcpp::NumericMatrix colSumByGroupChangeSparse(
  function rowSumByGroupChangeSparse (line 141) | Rcpp::NumericMatrix rowSumByGroupChangeSparse(

FILE: src/perplexity.c
  function SEXP (line 5) | SEXP _perplexityG(SEXP R_x, SEXP R_phi, SEXP R_psi, SEXP R_group)

Download .json

Condensed preview — 301 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (3,858K chars).

[
  {
    "path": ".Rbuildignore",
    "chars": 158,
    "preview": "^renv$\n^renv\\.lock$\n^.*\\.Rproj$\n^\\.Rproj\\.user$\n^CONDUCT\\.md$\n.travis.yml\nNOTICE\n_pkgdown.yml\n^doc$\n^Meta$\nrenv*\n\n^_pkgd"
  },
  {
    "path": ".github/.gitignore",
    "chars": 7,
    "preview": "*.html\n"
  },
  {
    "path": ".github/workflows/BioC-check.yaml",
    "chars": 1649,
    "preview": "# Workflow derived from https://github.com/r-lib/actions/tree/master/examples\n# Need help debugging build failures? Star"
  },
  {
    "path": ".github/workflows/check-standard.yaml",
    "chars": 1884,
    "preview": "# Workflow derived from https://github.com/r-lib/actions/tree/master/examples\n# Need help debugging build failures? Star"
  },
  {
    "path": ".gitignore",
    "chars": 763,
    "preview": "# History files\n.Rhistory\n.Rapp.history\n\n# Session Data files\n.RData\n# Example code in package build process\n*-Ex.R\n# Ou"
  },
  {
    "path": "CONDUCT.md",
    "chars": 1387,
    "preview": "# Contributor Code of Conduct\n\nAs contributors and maintainers of this project, we pledge to respect all people who \ncon"
  },
  {
    "path": "DESCRIPTION",
    "chars": 1848,
    "preview": "Package: celda\nTitle: CEllular Latent Dirichlet Allocation\nVersion: 1.18.2\nAuthors@R: c(person(\"Joshua\", \"Campbell\", ema"
  },
  {
    "path": "LICENSE",
    "chars": 1074,
    "preview": "MIT License\n\nCopyright (c) 2018 Joshua D Campbell\n\nPermission is hereby granted, free of charge, to any person obtaining"
  },
  {
    "path": "NAMESPACE",
    "chars": 4665,
    "preview": "# Generated by roxygen2: do not edit by hand\n\nexport(\"celdaClusters<-\")\nexport(\"celdaModules<-\")\nexport(\"decontXcounts<-"
  },
  {
    "path": "NEWS.md",
    "chars": 2597,
    "preview": "# celda v1.18.2 (2024-04-02)\n* Updated Makevar files to new CRAN standards \n* Fixed unit test causing error\n\n# celda v1."
  },
  {
    "path": "NOTICE",
    "chars": 18385,
    "preview": "The celda package incldues other open source software components, including \nfunctions adapted from other R libraries. T"
  },
  {
    "path": "R/RcppExports.R",
    "chars": 4497,
    "preview": "# Generated by using Rcpp::compileAttributes() -> do not edit by hand\n# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD"
  },
  {
    "path": "R/aaa.R",
    "chars": 629,
    "preview": "setClass(\"celdaModel\",\n    slots = c(params = \"list\",\n        # K, L, model priors, checksum\n        names = \"list\",\n   "
  },
  {
    "path": "R/accessors.R",
    "chars": 15063,
    "preview": "#' @title Get or set the cell cluster labels from a celda\n#'  \\linkS4class{SingleCellExperiment} object or celda model\n#"
  },
  {
    "path": "R/celdaGridSearch.R",
    "chars": 26782,
    "preview": "#' @title Run Celda in parallel with multiple parameters\n#' @description Run Celda with different combinations of parame"
  },
  {
    "path": "R/celdaProbabilityMap.R",
    "chars": 13055,
    "preview": "#' @title Probability map for a celda model\n#' @description Renders probability and relative expression heatmaps to\n#'  "
  },
  {
    "path": "R/celdaUMAP.R",
    "chars": 13325,
    "preview": "#' @title Uniform Manifold Approximation and Projection (UMAP) dimension\n#'  reduction for celda \\code{sce} object\n#' @d"
  },
  {
    "path": "R/celda_C.R",
    "chars": 28528,
    "preview": "#' @title Cell clustering with Celda\n#' @description Clusters the columns of a count matrix containing single-cell\n#'  d"
  },
  {
    "path": "R/celda_CG.R",
    "chars": 32178,
    "preview": "#' @title Cell and feature clustering with Celda\n#' @description Clusters the rows and columns of a count matrix contain"
  },
  {
    "path": "R/celda_G.R",
    "chars": 24565,
    "preview": "#' @title Feature clustering with Celda\n#' @description Clusters the rows of a count matrix containing single-cell data\n"
  },
  {
    "path": "R/celda_functions.R",
    "chars": 25663,
    "preview": ".sampleLl <- function(llProbs) {\n  probsSub <- exp(llProbs - max(llProbs))\n  probsNorm <- probsSub / sum(probsSub)\n  pro"
  },
  {
    "path": "R/celda_heatmap.R",
    "chars": 4329,
    "preview": "#' @title Plot celda Heatmap\n#' @description Render a stylable heatmap of count data based on celda\n#'  clustering resul"
  },
  {
    "path": "R/celdatSNE.R",
    "chars": 10687,
    "preview": "#' @title t-Distributed Stochastic Neighbor Embedding (t-SNE) dimension\n#'  reduction for celda \\code{sce} object\n#' @de"
  },
  {
    "path": "R/celdatosce.R",
    "chars": 6899,
    "preview": "\n#' @title Convert old celda model object to \\code{SCE} object\n#' @description Convert a old celda model object (\\code{c"
  },
  {
    "path": "R/clusterProbability.R",
    "chars": 7320,
    "preview": "#' @title Get the conditional probabilities of cell in subpopulations from celda\n#'  model\n#' @description Calculate the"
  },
  {
    "path": "R/data.R",
    "chars": 4486,
    "preview": "#' @title available models\n#' @export\navailableModels <- c(\"celda_C\", \"celda_G\", \"celda_CG\")\n\n\n#' A toy count matrix for"
  },
  {
    "path": "R/decon.R",
    "chars": 45137,
    "preview": "#' @title Contamination estimation with decontX\n#'\n#' @description Identifies contamination from factors such as ambient"
  },
  {
    "path": "R/elbow.R",
    "chars": 1330,
    "preview": "# https://stackoverflow.com/questions/35194048/using-r-how-to-calculate\n#-the-distance-from-one-point-to-a-line\n# http:/"
  },
  {
    "path": "R/factorizeMatrix.R",
    "chars": 16760,
    "preview": "#' @title Generate factorized matrices showing each feature's influence on cell\n#'  / gene clustering\n#' @description Ge"
  },
  {
    "path": "R/featureModuleLookup.R",
    "chars": 2656,
    "preview": "#' @title Obtain the gene module of a gene of interest\n#' @description This function will output the corresponding featu"
  },
  {
    "path": "R/geneSetEnrich.R",
    "chars": 4734,
    "preview": "#' @title Gene set enrichment\n#' @description Identify and return significantly-enriched terms for each gene\n#'  module "
  },
  {
    "path": "R/initialize_clusters.R",
    "chars": 10424,
    "preview": ".initializeCluster <- function(N,\n                               len,\n                               z = NULL,\n         "
  },
  {
    "path": "R/loglikelihood.R",
    "chars": 9844,
    "preview": "#' @title Calculate the Log-likelihood of a celda model\n#' @description Calculate the log-likelihood for cell population"
  },
  {
    "path": "R/matrixSums.R",
    "chars": 4152,
    "preview": ".rowSumByGroup <- function(counts, group, L) {\n  if (inherits(counts, \"matrix\") & is.integer(counts)) {\n    res <- .rowS"
  },
  {
    "path": "R/misc.R",
    "chars": 2043,
    "preview": "#' @title Celda models\n#' @description List of available Celda models with correpsonding descriptions.\n#' @export\n#' @ex"
  },
  {
    "path": "R/moduleHeatmap.R",
    "chars": 20401,
    "preview": "#' @title Heatmap for featureModules\n#' @description Renders a heatmap for selected \\code{featureModule}. Cells are\n#'  "
  },
  {
    "path": "R/perplexity.R",
    "chars": 40185,
    "preview": "#' @title Calculate the perplexity of a celda model\n#' @description Perplexity is a statistical measure of how well a pr"
  },
  {
    "path": "R/plotHeatmap.R",
    "chars": 12195,
    "preview": "#' @title Plots heatmap based on Celda model\n#' @description Renders a heatmap based on a matrix of counts where rows ar"
  },
  {
    "path": "R/plot_decontx.R",
    "chars": 20889,
    "preview": "#' @title Plots contamination on UMAP coordinates\n#' @description A scatter plot of the UMAP dimensions generated by Dec"
  },
  {
    "path": "R/plot_dr.R",
    "chars": 51687,
    "preview": "#' @title Mapping the dimension reduction plot\n#' @description Creates a scatterplot given two dimensions from a data\n#'"
  },
  {
    "path": "R/recursiveSplit.R",
    "chars": 51522,
    "preview": ".singleSplitZ <- function(counts,\n                          z,\n                          s,\n                          K,"
  },
  {
    "path": "R/reorderCelda.R",
    "chars": 9721,
    "preview": "#' @title Reorder cells populations and/or features modules using\n#'  hierarchical clustering\n#' @description Apply hier"
  },
  {
    "path": "R/reports.R",
    "chars": 8471,
    "preview": "#' @title Generate an HTML report for celda_CG\n#' @name reportceldaCG\n#' @description \\code{reportCeldaCGRun} will run \\"
  },
  {
    "path": "R/selectFeatures.R",
    "chars": 3009,
    "preview": "#' @title Simple feature selection by feature counts\n#' @description A simple heuristic feature selection procedure.\n#' "
  },
  {
    "path": "R/semi_pheatmap.R",
    "chars": 55736,
    "preview": "# Adapted originally from the very excellent pheatmap package\n# (https://cran.r-project.org/web/packages/pheatmap/index."
  },
  {
    "path": "R/simulateCells.R",
    "chars": 18780,
    "preview": "#' @title Simulate count data from the celda generative models.\n#' @description This function generates a \\linkS4class{S"
  },
  {
    "path": "R/splitModule.R",
    "chars": 5096,
    "preview": "#' @title Split celda feature module\n#' @description Manually select a celda feature module to split into 2 or\n#'  more "
  },
  {
    "path": "R/split_clusters.R",
    "chars": 17468,
    "preview": "# .cCCalcLL = function(mCPByS, nGByCP, s, z, K, nS, nG, alpha, beta)\n.cCSplitZ <- function(counts,\n                     "
  },
  {
    "path": "R/topRank.R",
    "chars": 2671,
    "preview": "#' @title Identify features with the highest influence on clustering.\n#' @description topRank() can quickly identify the"
  },
  {
    "path": "README.md",
    "chars": 4141,
    "preview": "<!-- badges: start -->\n[![R-CMD-check](https://github.com/campbio/celda/workflows/R-CMD-check/badge.svg)](https://github"
  },
  {
    "path": "_pkgdown.yml",
    "chars": 3241,
    "preview": "template:\n  params:\n    bootswatch: yeti\n\nreference:\n- title: Primary celda functions\n  desc: Functions for clustering o"
  },
  {
    "path": "docs/404.html",
    "chars": 5569,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/CONDUCT.html",
    "chars": 7111,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/LICENSE-text.html",
    "chars": 6537,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/articles/articles/celda_pbmc3k.html",
    "chars": 555194,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/articles/celda_pbmc3k_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/articles/celda_pbmc3k_files/header-attrs-2.7/header-attrs.js",
    "chars": 507,
    "preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
  },
  {
    "path": "docs/articles/articles/celda_pbmc3k_files/kePrint-0.0.1/kePrint.js",
    "chars": 275,
    "preview": "$(document).ready(function(){\n    if (typeof $('[data-toggle=\"tooltip\"]').tooltip === 'function') {\n        $('[data-tog"
  },
  {
    "path": "docs/articles/articles/celda_pbmc3k_files/lightable-0.0.1/lightable.css",
    "chars": 5090,
    "preview": "/*!\n * lightable v0.0.1\n * Copyright 2020 Hao Zhu\n * Licensed under MIT (https://github.com/haozhu233/kableExtra/blob/ma"
  },
  {
    "path": "docs/articles/articles/decontX_pbmc4k.html",
    "chars": 47951,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/articles/decontX_pbmc4k_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/articles/decontX_pbmc4k_files/header-attrs-2.7/header-attrs.js",
    "chars": 507,
    "preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
  },
  {
    "path": "docs/articles/articles/installation.html",
    "chars": 10409,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/articles/installation_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/articles/installation_files/header-attrs-2.7/header-attrs.js",
    "chars": 507,
    "preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
  },
  {
    "path": "docs/articles/celda.html",
    "chars": 52700,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/celda_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/celda_files/header-attrs-2.7/header-attrs.js",
    "chars": 507,
    "preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
  },
  {
    "path": "docs/articles/celda_pbmc3k.html",
    "chars": 564249,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/celda_pbmc3k_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/celda_pbmc3k_files/kePrint-0.0.1/kePrint.js",
    "chars": 275,
    "preview": "$(document).ready(function(){\n    if (typeof $('[data-toggle=\"tooltip\"]').tooltip === 'function') {\n        $('[data-tog"
  },
  {
    "path": "docs/articles/celda_pbmc3k_files/lightable-0.0.1/lightable.css",
    "chars": 5090,
    "preview": "/*!\n * lightable v0.0.1\n * Copyright 2020 Hao Zhu\n * Licensed under MIT (https://github.com/haozhu233/kableExtra/blob/ma"
  },
  {
    "path": "docs/articles/decontX.html",
    "chars": 54060,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/decontX_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/decontX_files/header-attrs-2.7/header-attrs.js",
    "chars": 507,
    "preview": "// Pandoc 2.9 adds attributes on both header and div. We remove the former (to\n// be compatible with the behavior of Pan"
  },
  {
    "path": "docs/articles/decontX_pbmc4k.html",
    "chars": 54092,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/decontX_pbmc4k_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/articles/index.html",
    "chars": 5983,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/articles/installation.html",
    "chars": 14513,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/articles/installation_files/accessible-code-block-0.0.1/empty-anchor.js",
    "chars": 653,
    "preview": "// Hide empty <a> tag within highlighted CodeBlock for screen reader accessibility (see https://github.com/jgm/pandoc/is"
  },
  {
    "path": "docs/authors.html",
    "chars": 6561,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/bootstrap-toc.css",
    "chars": 1843,
    "preview": "/*!\n * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)\n * Copyright 2015 Aidan Feldman\n * Lic"
  },
  {
    "path": "docs/bootstrap-toc.js",
    "chars": 4764,
    "preview": "/*!\n * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)\n * Copyright 2015 Aidan Feldman\n * Lic"
  },
  {
    "path": "docs/docsearch.css",
    "chars": 11758,
    "preview": "/* Docsearch -------------------------------------------------------------- */\n/*\n  Source: https://github.com/algolia/d"
  },
  {
    "path": "docs/docsearch.js",
    "chars": 2018,
    "preview": "$(function() {\n\n  // register a handler to move the focus to the search bar\n  // upon pressing shift + \"/\" (i.e. \"?\")\n  "
  },
  {
    "path": "docs/index.html",
    "chars": 16257,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\">\n<head>\n<meta http-equiv=\"Content-Type"
  },
  {
    "path": "docs/news/index.html",
    "chars": 13074,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/pkgdown.css",
    "chars": 7308,
    "preview": "/* Sticky footer */\n\n/**\n * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/\n * Details"
  },
  {
    "path": "docs/pkgdown.js",
    "chars": 3248,
    "preview": "/* http://gregfranko.com/blog/jquery-best-practices/ */\n(function($) {\n  $(function() {\n\n    $('.navbar-fixed-top').head"
  },
  {
    "path": "docs/pkgdown.yml",
    "chars": 238,
    "preview": "pandoc: 3.1.11.1\npkgdown: 2.0.7\npkgdown_sha: ~\narticles:\n  celda_pbmc3k: celda_pbmc3k.html\n  decontX_pbmc4k: decontX_pbm"
  },
  {
    "path": "docs/reference/appendCeldaList.html",
    "chars": 7509,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/availableModels.html",
    "chars": 6188,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/bestLogLikelihood.html",
    "chars": 8544,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celda.html",
    "chars": 7165,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaCGGridSearchRes.html",
    "chars": 6286,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaCGMod.html",
    "chars": 6261,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaCGSim.html",
    "chars": 6279,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaCMod.html",
    "chars": 6161,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaCSim.html",
    "chars": 6248,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaClusters.html",
    "chars": 14062,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaGMod.html",
    "chars": 6161,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaGSim.html",
    "chars": 6247,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaGridSearch.html",
    "chars": 16526,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaHeatmap.html",
    "chars": 10704,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaModel.html",
    "chars": 8146,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaModules.html",
    "chars": 9437,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaPerplexity-celdaList-method.html",
    "chars": 7516,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaPerplexity.html",
    "chars": 7332,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaProbabilityMap.html",
    "chars": 17962,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaTsne.html",
    "chars": 13170,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdaUmap.html",
    "chars": 14926,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celda_C.html",
    "chars": 19350,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celda_CG.html",
    "chars": 24521,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celda_G.html",
    "chars": 19325,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/celdatosce.html",
    "chars": 12412,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/clusterProbability.html",
    "chars": 9810,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/compareCountMatrix.html",
    "chars": 9333,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/contaminationSim.html",
    "chars": 6273,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/countChecksum-celdaList-method.html",
    "chars": 7455,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/countChecksum.html",
    "chars": 7275,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/decontX.html",
    "chars": 25536,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/decontXcounts.html",
    "chars": 8362,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/distinctColors.html",
    "chars": 8527,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/eigenMatMultInt.html",
    "chars": 6597,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/eigenMatMultNumeric.html",
    "chars": 6628,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/factorizeMatrix.html",
    "chars": 15132,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/fastNormProp.html",
    "chars": 6640,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/fastNormPropLog.html",
    "chars": 6652,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/fastNormPropSqrt.html",
    "chars": 6656,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/featureModuleLookup.html",
    "chars": 10123,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/featureModuleTable.html",
    "chars": 12574,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/geneSetEnrich.html",
    "chars": 12546,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/index.html",
    "chars": 20463,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/logLikelihood.html",
    "chars": 10070,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/logLikelihoodHistory.html",
    "chars": 9118,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/matrixNames.html",
    "chars": 19734,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/moduleHeatmap.html",
    "chars": 23343,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/nonzero.html",
    "chars": 6602,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/normalizeCounts.html",
    "chars": 10065,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/params.html",
    "chars": 9233,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/perplexity.html",
    "chars": 12222,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotCeldaViolin.html",
    "chars": 12019,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDecontXContamination.html",
    "chars": 8252,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDecontXMarkerExpression.html",
    "chars": 11723,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDecontXMarkerPercentage.html",
    "chars": 12383,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDimReduceCluster.html",
    "chars": 16478,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDimReduceFeature.html",
    "chars": 22950,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDimReduceGrid.html",
    "chars": 18690,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotDimReduceModule.html",
    "chars": 18672,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotGridSearchPerplexity.html",
    "chars": 10708,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotHeatmap.html",
    "chars": 17177,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/plotRPC.html",
    "chars": 10442,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/recodeClusterY.html",
    "chars": 8780,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/recodeClusterZ.html",
    "chars": 8856,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/recursiveSplitCell.html",
    "chars": 37742,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/recursiveSplitModule.html",
    "chars": 27632,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/reorderCelda.html",
    "chars": 13066,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/reportceldaCG.html",
    "chars": 18737,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/resList.html",
    "chars": 8688,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/resamplePerplexity.html",
    "chars": 13033,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/retrieveFeatureIndex.html",
    "chars": 11344,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/runParams.html",
    "chars": 11011,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/sampleCells.html",
    "chars": 6522,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/sampleLabel.html",
    "chars": 24991,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/sceCeldaC.html",
    "chars": 9143,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/sceCeldaCG.html",
    "chars": 11572,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/sceCeldaCGGridSearch.html",
    "chars": 9221,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/sceCeldaG.html",
    "chars": 10442,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/selectBestModel.html",
    "chars": 11059,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/selectFeatures.html",
    "chars": 11270,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/semiPheatmap.html",
    "chars": 23827,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/simulateCells.html",
    "chars": 12737,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/simulateContamination.html",
    "chars": 10017,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/splitModule.html",
    "chars": 9997,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/subsetCeldaList.html",
    "chars": 11591,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/reference/topRank.html",
    "chars": 8606,
    "preview": "<!DOCTYPE html>\n<!-- Generated by pkgdown: do not edit by hand --><html lang=\"en\"><head><meta http-equiv=\"Content-Type\" "
  },
  {
    "path": "docs/sitemap.xml",
    "chars": 6365,
    "preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n  <url>\n    <loc>/40"
  },
  {
    "path": "inst/rmarkdown/CeldaCG_PlotResults.Rmd",
    "chars": 11374,
    "preview": "---\ntitle: \"Celda_CG Results\"\ndate: \"`r format(Sys.time(), '%B %d, %Y')`\"\nparams:\n  sce: sce\n  altExpName: altExpName\n  "
  },
  {
    "path": "inst/rmarkdown/CeldaCG_Run.Rmd",
    "chars": 9866,
    "preview": "---\ntitle: \"Celda_CG Run\"\ndate: \"`r format(Sys.time(), '%B %d, %Y')`\"\noutput: \n html_document:\n     toc: true\n     toc_f"
  },
  {
    "path": "man/appendCeldaList.Rd",
    "chars": 657,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/misc.R\n\\name{appendCeldaList}\n\\alias{appen"
  },
  {
    "path": "man/availableModels.Rd",
    "chars": 310,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{availableModel"
  },
  {
    "path": "man/bestLogLikelihood.Rd",
    "chars": 1042,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/loglikelihood.R\n\\name{bestLogLikelihood}\n\\"
  },
  {
    "path": "man/celda.Rd",
    "chars": 267,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/misc.R\n\\name{celda}\n\\alias{celda}\n\\title{C"
  },
  {
    "path": "man/celdaCGGridSearchRes.Rd",
    "chars": 365,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaCGGridSea"
  },
  {
    "path": "man/celdaCGMod.Rd",
    "chars": 333,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaCGMod}\n\\a"
  },
  {
    "path": "man/celdaCGSim.Rd",
    "chars": 367,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaCGSim}\n\\a"
  },
  {
    "path": "man/celdaCMod.Rd",
    "chars": 281,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaCMod}\n\\al"
  },
  {
    "path": "man/celdaCSim.Rd",
    "chars": 351,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaCSim}\n\\al"
  },
  {
    "path": "man/celdaClusters.Rd",
    "chars": 1882,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/accessors.R\n\\name{celdaClusters}\n\\alias{ce"
  },
  {
    "path": "man/celdaGMod.Rd",
    "chars": 281,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaGMod}\n\\al"
  },
  {
    "path": "man/celdaGSim.Rd",
    "chars": 350,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/data.R\n\\docType{data}\n\\name{celdaGSim}\n\\al"
  },
  {
    "path": "man/celdaGridSearch.Rd",
    "chars": 4451,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/celdaGridSearch.R\n\\name{celdaGridSearch}\n\\"
  },
  {
    "path": "man/celdaHeatmap.Rd",
    "chars": 1636,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/celda_heatmap.R\n\\name{celdaHeatmap}\n\\alias"
  },
  {
    "path": "man/celdaModel.Rd",
    "chars": 904,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/accessors.R\n\\name{celdaModel}\n\\alias{celda"
  },
  {
    "path": "man/celdaModules.Rd",
    "chars": 1369,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/accessors.R\n\\name{celdaModules}\n\\alias{cel"
  },
  {
    "path": "man/celdaPerplexity-celdaList-method.Rd",
    "chars": 689,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/accessors.R\n\\name{celdaPerplexity,celdaLis"
  },
  {
    "path": "man/celdaPerplexity.Rd",
    "chars": 633,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/accessors.R\n\\name{celdaPerplexity}\n\\alias{"
  },
  {
    "path": "man/celdaProbabilityMap.Rd",
    "chars": 4244,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/celdaProbabilityMap.R\n\\name{celdaProbabili"
  },
  {
    "path": "man/celdaTsne.Rd",
    "chars": 3861,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/celdatSNE.R\n\\name{celdaTsne}\n\\alias{celdaT"
  },
  {
    "path": "man/celdaUmap.Rd",
    "chars": 4785,
    "preview": "% Generated by roxygen2: do not edit by hand\n% Please edit documentation in R/celdaUMAP.R\n\\name{celdaUmap}\n\\alias{celdaU"
  }
]

// ... and 101 more files (download for full content)

About this extraction

This page contains the full source code of the campbio/celda GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 301 files (3.4 MB), approximately 915.5k tokens, and a symbol index with 51 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo