Repository: bioFAM/MOFA2 Branch: master Commit: 207dcb69c267 Files: 143 Total size: 721.6 KB Directory structure: gitextract_72sduoul/ ├── .Rbuildignore ├── .gitattributes ├── .gitignore ├── .gitmodules ├── DESCRIPTION ├── Dockerfile ├── LICENSE ├── NAMESPACE ├── R/ │ ├── AllClasses.R │ ├── AllGenerics.R │ ├── QC.R │ ├── basilisk.R │ ├── calculate_variance_explained.R │ ├── cluster_samples.R │ ├── compare_models.R │ ├── contribution_scores.R │ ├── correlate_covariates.R │ ├── create_mofa.R │ ├── dimensionality_reduction.R │ ├── enrichment.R │ ├── get_methods.R │ ├── imports.R │ ├── impute.R │ ├── load_model.R │ ├── make_example_data.R │ ├── mefisto.R │ ├── plot_data.R │ ├── plot_factors.R │ ├── plot_weights.R │ ├── predict.R │ ├── prepare_mofa.R │ ├── run_mofa.R │ ├── set_methods.R │ ├── subset.R │ └── utils.R ├── README.md ├── configure ├── configure.win ├── inst/ │ ├── CITATION │ ├── extdata/ │ │ └── test_data.RData │ └── scripts/ │ ├── template_script.R │ ├── template_script.py │ ├── template_script_dataframe.py │ └── template_script_matrix.py ├── man/ │ ├── .Rapp.history │ ├── MOFA.Rd │ ├── add_mofa_factors_to_seurat.Rd │ ├── calculate_contribution_scores.Rd │ ├── calculate_variance_explained.Rd │ ├── calculate_variance_explained_per_sample.Rd │ ├── cluster_samples.Rd │ ├── compare_elbo.Rd │ ├── compare_factors.Rd │ ├── correlate_factors_with_covariates.Rd │ ├── covariates_names.Rd │ ├── create_mofa.Rd │ ├── create_mofa_from_MultiAssayExperiment.Rd │ ├── create_mofa_from_Seurat.Rd │ ├── create_mofa_from_SingleCellExperiment.Rd │ ├── create_mofa_from_df.Rd │ ├── create_mofa_from_matrix.Rd │ ├── factors_names.Rd │ ├── features_metadata.Rd │ ├── features_names.Rd │ ├── get_covariates.Rd │ ├── get_data.Rd │ ├── get_default_data_options.Rd │ ├── get_default_mefisto_options.Rd │ ├── get_default_model_options.Rd │ ├── get_default_stochastic_options.Rd │ ├── get_default_training_options.Rd │ ├── get_dimensions.Rd │ ├── get_elbo.Rd │ ├── get_expectations.Rd │ ├── get_factors.Rd │ ├── get_group_kernel.Rd │ ├── get_imputed_data.Rd │ ├── get_interpolated_factors.Rd │ ├── get_lengthscales.Rd │ ├── get_scales.Rd │ ├── get_variance_explained.Rd │ ├── get_weights.Rd │ ├── groups_names.Rd │ ├── impute.Rd │ ├── interpolate_factors.Rd │ ├── load_model.Rd │ ├── make_example_data.Rd │ ├── pipe.Rd │ ├── plot_alignment.Rd │ ├── plot_ascii_data.Rd │ ├── plot_data_heatmap.Rd │ ├── plot_data_overview.Rd │ ├── plot_data_scatter.Rd │ ├── plot_data_vs_cov.Rd │ ├── plot_dimred.Rd │ ├── plot_enrichment.Rd │ ├── plot_enrichment_detailed.Rd │ ├── plot_enrichment_heatmap.Rd │ ├── plot_factor.Rd │ ├── plot_factor_cor.Rd │ ├── plot_factors.Rd │ ├── plot_factors_vs_cov.Rd │ ├── plot_group_kernel.Rd │ ├── plot_interpolation_vs_covariate.Rd │ ├── plot_sharedness.Rd │ ├── plot_smoothness.Rd │ ├── plot_top_weights.Rd │ ├── plot_variance_explained.Rd │ ├── plot_variance_explained_by_covariates.Rd │ ├── plot_variance_explained_per_feature.Rd │ ├── plot_weights.Rd │ ├── plot_weights_heatmap.Rd │ ├── plot_weights_scatter.Rd │ ├── predict.Rd │ ├── prepare_mofa.Rd │ ├── run_enrichment.Rd │ ├── run_mofa.Rd │ ├── run_tsne.Rd │ ├── run_umap.Rd │ ├── samples_metadata.Rd │ ├── samples_names.Rd │ ├── select_model.Rd │ ├── set_covariates.Rd │ ├── subset_factors.Rd │ ├── subset_features.Rd │ ├── subset_groups.Rd │ ├── subset_samples.Rd │ ├── subset_views.Rd │ ├── summarise_factors.Rd │ └── views_names.Rd ├── setup.py ├── tests/ │ ├── testthat/ │ │ ├── barcodes.tsv │ │ ├── genes.tsv │ │ ├── matrix.csv │ │ ├── matrix.mtx │ │ ├── test_create_model.R │ │ ├── test_load_model.R │ │ ├── test_plot.R │ │ └── test_prepare_model.R │ └── testthat.R └── vignettes/ ├── MEFISTO_temporal.Rmd ├── downstream_analysis.Rmd └── getting_started_R.Rmd ================================================ FILE CONTENTS ================================================ ================================================ FILE: .Rbuildignore ================================================ ^.*\.Rproj$ ^\.Rproj\.user$ mofapy2 Dockerfile setup.py ================================================ FILE: .gitattributes ================================================ *.sh text eol=lf ================================================ FILE: .gitignore ================================================ # Resilio Sync .sync # MAC *Icon* .DS_Store # Rstudio projects *.Rproj .Rhistory *_site/ # Pycharm .idea # HTML # *.html # Models outputs *.hdf5 # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.pyc *.egg-info/ .installed.cfg *.egg .Rproj.user *.Rcheck .Rproj.user .Rhistory .RData *.ipynb_checkpoints *_cache/ *_files/ *.tar.gz ================================================ FILE: .gitmodules ================================================ [submodule "mofapy2"] path = mofapy2 url = git@github.com:bioFAM/mofapy2 ================================================ FILE: DESCRIPTION ================================================ Package: MOFA2 Type: Package Title: Multi-Omics Factor Analysis v2 Version: 1.21.3 Maintainer: Ricard Argelaguet Authors@R: c(person("Ricard", "Argelaguet", role = c("aut", "cre"), email = "ricard.argelaguet@gmail.com", comment = c(ORCID = "http://orcid.org/0000-0003-3199-3722")), person("Damien", "Arnol", role = "aut", email = "damien.arnol@gmail.com", comment = c(ORCID = "http://orcid.org/0000-0003-2462-534X")), person("Danila", "Bredikhin", role = "aut", email = "danila.bredikhin@embl.de", comment = c(ORCID = "https://orcid.org/0000-0001-8089-6983")), person("Britta", "Velten", role = "aut", email = "britta.velten@gmail.com", comment = c(ORCID = "http://orcid.org/0000-0002-8397-3515")) ) Date: 2023-01-12 License: file LICENSE Description: The MOFA2 package contains a collection of tools for training and analysing multi-omic factor analysis (MOFA). MOFA is a probabilistic factor model that aims to identify principal axes of variation from data sets that can comprise multiple omic layers and/or groups of samples. Additional time or space information on the samples can be incorporated using the MEFISTO framework, which is part of MOFA2. Downstream analysis functions to inspect molecular features underlying each factor, visualisation, imputation etc are available. Encoding: UTF-8 Depends: R (>= 4.0) Imports: rhdf5, dplyr, tidyr, reshape2, pheatmap, ggplot2, methods, RColorBrewer, cowplot, ggrepel, reticulate, HDF5Array, grDevices, stats, magrittr, forcats, utils, corrplot, DelayedArray, Rtsne, uwot, basilisk, stringi Suggests: knitr, testthat, Seurat, SeuratObject, ggpubr, foreach, psych, MultiAssayExperiment, SummarizedExperiment, SingleCellExperiment, ggrastr, mvtnorm, GGally, rmarkdown, data.table, tidyverse, BiocStyle, Matrix, markdown biocViews: DimensionReduction, Bayesian, Visualization URL: https://biofam.github.io/MOFA2/index.html BugReports: https://github.com/bioFAM/MOFA2 VignetteBuilder: knitr LazyData: false StagedInstall: no NeedsCompilation: yes RoxygenNote: 7.3.3 SystemRequirements: Python (>=3), numpy, pandas, h5py, scipy, argparse, sklearn, mofapy2 ================================================ FILE: Dockerfile ================================================ FROM r-base:4.0.2 WORKDIR /mofa2 ADD . /mofa2 RUN apt-get update && apt-get install -f && apt-get install -y python3 python3-setuptools python3-dev python3-pip RUN apt-get install -y libcurl4-openssl-dev RUN apt-get install -y libcairo2-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev libxt-dev libharfbuzz-dev libfribidi-dev # Install mofapy2 RUN python3 -m pip install 'https://github.com/bioFAM/mofapy2/tarball/master' # Install bioconductor dependencies RUN R --vanilla -e "\ if (!requireNamespace('BiocManager', quietly = TRUE)) install.packages('BiocManager', repos = 'https://cran.r-project.org'); \ sapply(c('rhdf5', 'dplyr', 'tidyr', 'reshape2', 'pheatmap', 'corrplot', \ 'ggplot2', 'ggbeeswarm', 'scales', 'GGally', 'doParallel', 'RColorBrewer', \ 'cowplot', 'ggrepel', 'foreach', 'reticulate', 'HDF5Array', 'DelayedArray', \ 'ggpubr', 'forcats', 'Rtsne', 'uwot', \ 'systemfonts', 'ragg', 'Cairo', 'ggrastr', 'basilisk', 'mvtnorm'), \ BiocManager::install)" RUN R CMD INSTALL --build . CMD [] ================================================ FILE: LICENSE ================================================ GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below. 0. Additional Definitions. As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License. "The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below. An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library. A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version". The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version. The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work. 1. Exception to Section 3 of the GNU GPL. You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL. 2. Conveying Modified Versions. If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version: a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy. 3. Object Code Incorporating Material from Library Header Files. The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following: a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the object code with a copy of the GNU GPL and this license document. 4. Combined Works. You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following: a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License. b) Accompany the Combined Work with a copy of the GNU GPL and this license document. c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document. d) Do one of the following: 0) Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source. 1) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version. e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.) 5. Combined Libraries. You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License. b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 6. Revised Versions of the GNU Lesser General Public License. The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. ================================================ FILE: NAMESPACE ================================================ # Generated by roxygen2: do not edit by hand export("%>%") export("covariates_names<-") export("factors_names<-") export("features_metadata<-") export("features_names<-") export("groups_names<-") export("samples_metadata<-") export("samples_names<-") export("views_names<-") export(add_mofa_factors_to_seurat) export(calculate_contribution_scores) export(calculate_variance_explained) export(calculate_variance_explained_per_sample) export(cluster_samples) export(compare_elbo) export(compare_factors) export(correlate_factors_with_covariates) export(covariates_names) export(create_mofa) export(create_mofa_from_MultiAssayExperiment) export(create_mofa_from_Seurat) export(create_mofa_from_SingleCellExperiment) export(create_mofa_from_df) export(create_mofa_from_matrix) export(factors_names) export(features_metadata) export(features_names) export(get_covariates) export(get_data) export(get_default_data_options) export(get_default_mefisto_options) export(get_default_model_options) export(get_default_stochastic_options) export(get_default_training_options) export(get_dimensions) export(get_elbo) export(get_expectations) export(get_factors) export(get_group_kernel) export(get_imputed_data) export(get_interpolated_factors) export(get_lengthscales) export(get_scales) export(get_variance_explained) export(get_weights) export(groups_names) export(impute) export(interpolate_factors) export(load_model) export(make_example_data) export(plot_alignment) export(plot_ascii_data) export(plot_data_heatmap) export(plot_data_overview) export(plot_data_scatter) export(plot_data_vs_cov) export(plot_dimred) export(plot_enrichment) export(plot_enrichment_detailed) export(plot_enrichment_heatmap) export(plot_factor) export(plot_factor_cor) export(plot_factors) export(plot_factors_vs_cov) export(plot_group_kernel) export(plot_interpolation_vs_covariate) export(plot_sharedness) export(plot_smoothness) export(plot_top_weights) export(plot_variance_explained) export(plot_variance_explained_by_covariates) export(plot_variance_explained_per_feature) export(plot_weights) export(plot_weights_heatmap) export(plot_weights_scatter) export(predict) export(prepare_mofa) export(run_enrichment) export(run_mofa) export(run_tsne) export(run_umap) export(samples_metadata) export(samples_names) export(select_model) export(set_covariates) export(subset_factors) export(subset_features) export(subset_groups) export(subset_samples) export(subset_views) export(summarise_factors) export(views_names) exportClasses(MOFA) exportMethods("covariates_names<-") exportMethods("factors_names<-") exportMethods("features_metadata<-") exportMethods("features_names<-") exportMethods("groups_names<-") exportMethods("samples_metadata<-") exportMethods("samples_names<-") exportMethods("views_names<-") exportMethods(covariates_names) exportMethods(factors_names) exportMethods(features_metadata) exportMethods(features_names) exportMethods(groups_names) exportMethods(samples_metadata) exportMethods(samples_names) exportMethods(views_names) import(basilisk) import(cowplot) import(dplyr) import(ggplot2) import(grDevices) import(methods) import(pheatmap) import(reshape2) import(reticulate) import(tidyr) importFrom(DelayedArray,DelayedArray) importFrom(HDF5Array,HDF5ArraySeed) importFrom(RColorBrewer,brewer.pal) importFrom(Rtsne,Rtsne) importFrom(basilisk,BasiliskEnvironment) importFrom(corrplot,corrplot) importFrom(cowplot,plot_grid) importFrom(dplyr,bind_rows) importFrom(dplyr,desc) importFrom(dplyr,filter) importFrom(dplyr,group_by) importFrom(dplyr,left_join) importFrom(dplyr,mutate) importFrom(dplyr,summarise) importFrom(dplyr,top_n) importFrom(forcats,fct_na_value_to_level) importFrom(ggrepel,geom_text_repel) importFrom(grDevices,colorRampPalette) importFrom(magrittr,"%>%") importFrom(magrittr,set_colnames) importFrom(pheatmap,pheatmap) importFrom(reshape2,melt) importFrom(rhdf5,h5ls) importFrom(rhdf5,h5read) importFrom(stats,as.formula) importFrom(stats,complete.cases) importFrom(stats,cor) importFrom(stats,dist) importFrom(stats,kmeans) importFrom(stats,median) importFrom(stats,p.adjust) importFrom(stats,p.adjust.methods) importFrom(stats,pnorm) importFrom(stats,pt) importFrom(stats,quantile) importFrom(stats,rbinom) importFrom(stats,rnorm) importFrom(stats,rpois) importFrom(stats,sd) importFrom(stats,var) importFrom(stats,wilcox.test) importFrom(stringi,stri_enc_mark) importFrom(tidyr,gather) importFrom(tidyr,spread) importFrom(utils,as.relistable) importFrom(utils,head) importFrom(utils,modifyList) importFrom(utils,relist) importFrom(utils,tail) importFrom(uwot,umap) ================================================ FILE: R/AllClasses.R ================================================ ########################################################## ## Define a general class to store a MOFA trained model ## ########################################################## #' @title Class to store a mofa model #' @description #' The \code{MOFA} is an S4 class used to store all relevant data to analyse a MOFA model #' @slot data The input data #' @slot intercepts Feature intercepts #' @slot samples_metadata Samples metadata #' @slot features_metadata Features metadata. #' @slot imputed_data The imputed data. #' @slot expectations expected values of the factors and the loadings. #' @slot dim_red non-linear dimensionality reduction manifolds. #' @slot training_stats model training statistics. #' @slot data_options Data processing options. #' @slot training_options Model training options. #' @slot stochastic_options Stochastic variational inference options. #' @slot model_options Model options. #' @slot mefisto_options Options for the use of MEFISO #' @slot dimensions Dimensionalities of the model: #' M for the number of views, #' G for the number of groups, #' N for the number of samples (per group), #' C for the number of covariates per sample, #' D for the number of features (per view), #' K for the number of factors. #' @slot on_disk Logical indicating whether data is loaded from disk. #' @slot cache Cache. #' @slot status Auxiliary variable indicating whether the model has been trained. #' @slot covariates optional slot to store sample covariate for training in MEFISTO #' @slot covariates_warped optional slot to store warped sample covariate for training in MEFISTO #' @slot interpolated_Z optional slot to store interpolated factor values (used only with MEFISTO) #' @name MOFA #' @rdname MOFA #' @aliases MOFA-class #' @exportClass MOFA setClassUnion("listOrNULL",members = c("list","NULL")) setClass("MOFA", slots=c( data = "list", covariates = "listOrNULL", covariates_warped = "listOrNULL", intercepts = "list", imputed_data = "list", interpolated_Z = "list", samples_metadata = "list", features_metadata = "list", expectations = "list", training_stats = "list", data_options = "list", model_options = "list", training_options = "list", stochastic_options = "list", mefisto_options = "list", dimensions = "list", on_disk = "logical", dim_red = "list", cache = "list", status = "character" ) ) # Printing method setMethod("show", "MOFA", function(object) { if (!.hasSlot(object, "dimensions") || length(object@dimensions) == 0) stop("Error: dimensions not defined") if (!.hasSlot(object, "status") || length(object@status) == 0) stop("Error: status not defined") if (object@status == "trained") { nfactors <- object@dimensions[["K"]] if(!.hasSlot(object, "covariates") || is.null(object@covariates)) { cat(sprintf("Trained MOFA with the following characteristics: \n Number of views: %d \n Views names: %s \n Number of features (per view): %s \n Number of groups: %d \n Groups names: %s \n Number of samples (per group): %s \n Number of factors: %d \n", object@dimensions[["M"]], paste(views_names(object), collapse=" "), paste(as.character(object@dimensions[["D"]]), collapse=" "), object@dimensions[["G"]], paste(groups_names(object), collapse=" "), paste(as.character(object@dimensions[["N"]]), collapse=" "), nfactors)) } else { cat(sprintf("Trained MEFISTO with the following characteristics: \n Number of views: %d \n Views names: %s \n Number of features (per view): %s \n Number of groups: %d \n Groups names: %s \n Number of samples (per group): %s \n Number of covariates per sample: %d \n Number of factors: %d \n", object@dimensions[["M"]], paste(views_names(object), collapse=" "), paste(as.character(object@dimensions[["D"]]), collapse=" "), object@dimensions[["G"]], paste(groups_names(object), collapse=" "), paste(as.character(object@dimensions[["N"]]), collapse=" "), object@dimensions[["C"]], nfactors)) } } else { if(!.hasSlot(object, "covariates") || is.null(object@covariates)) { cat(sprintf("Untrained MOFA model with the following characteristics: \n Number of views: %d \n Views names: %s \n Number of features (per view): %s \n Number of groups: %d \n Groups names: %s \n Number of samples (per group): %s \n ", object@dimensions[["M"]], paste(views_names(object), collapse=" "), paste(as.character(object@dimensions[["D"]]), collapse=" "), object@dimensions[["G"]], paste(groups_names(object), collapse=" "), paste(as.character(object@dimensions[["N"]]), collapse=" "))) } else { cat(sprintf("Untrained MEFISTO model with the following characteristics: \n Number of views: %d \n Views names: %s \n Number of features (per view): %s \n Number of groups: %d \n Groups names: %s \n Number of samples (per group): %s \n Number of covariates per sample: %d \n ", object@dimensions[["M"]], paste(views_names(object), collapse=" "), paste(as.character(object@dimensions[["D"]]), collapse=" "), object@dimensions[["G"]], paste(groups_names(object), collapse=" "), paste(as.character(object@dimensions[["N"]]), collapse=" "), object@dimensions[["C"]])) } } cat("\n") }) ================================================ FILE: R/AllGenerics.R ================================================ ################## ## Factor Names ## ################## #' @title factors_names: set and retrieve factor names #' @name factors_names #' @rdname factors_names #' @export setGeneric("factors_names", function(object) { standardGeneric("factors_names") }) #' @name factors_names #' @rdname factors_names #' @aliases factors_names<- #' @export setGeneric("factors_names<-", function(object, value) { standardGeneric("factors_names<-") }) ##################### ## Covariate Names ## ##################### #' @title covariates_names: set and retrieve covariate names #' @name covariates_names #' @rdname covariates_names #' @export setGeneric("covariates_names", function(object) { standardGeneric("covariates_names") }) #' @name covariates_names #' @rdname covariates_names #' @aliases covariates_names<- #' @export setGeneric("covariates_names<-", function(object, value) { standardGeneric("covariates_names<-") }) ################## ## Sample Names ## ################## #' @title samples_names: set and retrieve sample names #' @name samples_names #' @rdname samples_names #' @export setGeneric("samples_names", function(object) { standardGeneric("samples_names") }) #' @name samples_names #' @rdname samples_names #' @aliases samples_names<- #' @export setGeneric("samples_names<-", function(object, value) { standardGeneric("samples_names<-") }) ##################### ## Sample Metadata ## ##################### #' @title samples_metadata: retrieve sample metadata #' @name samples_metadata #' @rdname samples_metadata #' @export setGeneric("samples_metadata", function(object) { standardGeneric("samples_metadata") }) #' @name samples_metadata #' @rdname samples_metadata #' @aliases samples_metadata<- #' @export setGeneric("samples_metadata<-", function(object, value) { standardGeneric("samples_metadata<-") }) ################### ## Feature Names ## ################### #' @title features_names: set and retrieve feature names #' @name features_names #' @rdname features_names #' @export setGeneric("features_names", function(object) { standardGeneric("features_names") }) #' @name features_names #' @rdname features_names #' @aliases features_names<- #' @export setGeneric("features_names<-", function(object, value) { standardGeneric("features_names<-") }) ###################### ## Feature Metadata ## ###################### #' @title features_metadata: set and retrieve feature metadata #' @name features_metadata #' @rdname features_metadata #' @export setGeneric("features_metadata", function(object) { standardGeneric("features_metadata") }) #' @name features_metadata #' @rdname features_metadata #' @aliases features_metadata<- #' @export setGeneric("features_metadata<-", function(object, value) { standardGeneric("features_metadata<-") }) ################ ## View Names ## ################ #' @title views_names: set and retrieve view names #' @name views_names #' @rdname views_names #' @export setGeneric("views_names", function(object) { standardGeneric("views_names") }) #' @name views_names #' @rdname views_names #' @aliases views_names<- #' @export setGeneric("views_names<-", function(object, value) { standardGeneric("views_names<-") }) ################ ## group Names ## ################ #' @title groups_names: set and retrieve group names #' @name groups_names #' @rdname groups_names #' @export setGeneric("groups_names", function(object) { standardGeneric("groups_names") }) #' @name groups_names #' @rdname groups_names #' @aliases groups_names<- #' @export setGeneric("groups_names<-", function(object, value) { standardGeneric("groups_names<-") }) ================================================ FILE: R/QC.R ================================================ #' @importFrom stringi stri_enc_mark .quality_control <- function(object, verbose = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Check views names if (verbose == TRUE) message("Checking views names...") stopifnot(!is.null(views_names(object))) stopifnot(!duplicated(views_names(object))) if (any(grepl("/", views_names(object)))) { stop("Some of the views names contain `/` symbol, which is not supported. This can be fixed e.g. with: views_names(object) <- gsub(\"/\", \"-\", views_names(object))") } # Check groups names if (verbose == TRUE) message("Checking groups names...") if (any(grepl("/", groups_names(object)))) { stop("Some of the groups names contain `/` symbol, which is not supported. This can be fixed e.g. with: groups_names(object) <- gsub(\"/\", \"-\", groups_names(object))") } stopifnot(!is.null(groups_names(object))) stopifnot(!duplicated(groups_names(object))) # Check samples names if (verbose == TRUE) message("Checking samples names...") stopifnot(!is.null(samples_names(object))) stopifnot(!duplicated(unlist(samples_names(object)))) enc <- stringi::stri_enc_mark(unlist(samples_names(object))) if (any(enc!="ASCII")) { tmp <- unname(unlist(samples_names(object))[enc!="ASCII"]) stop(sprintf("non-ascii characters detected in the following samples names, please rename them and run again create_mofa():\n- %s ", paste(tmp, collapse="\n- "))) print() } # Check features names if (verbose == TRUE) message("Checking features names...") stopifnot(!is.null(features_names(object))) stopifnot(!duplicated(unlist(features_names(object)))) enc <- stringi::stri_enc_mark(unlist(features_names(object))) if (any(enc!="ASCII")) { tmp <- unname(unlist(features_names(object))[enc!="ASCII"]) stop(sprintf("non-ascii characters detected in the following features names, please rename them and run again create_mofa():\n- %s ", paste(tmp, collapse="\n- "))) print() } # Check dimensionalities in the input data if (verbose == TRUE) message("Checking dimensions...") N <- object@dimensions$N D <- object@dimensions$D for (i in views_names(object)) { for (j in groups_names(object)) { stopifnot(ncol(object@data[[i]][[j]]) == N[[j]]) stopifnot(nrow(object@data[[i]][[j]]) == D[[i]]) stopifnot(length(colnames(object@data[[i]][[j]])) == N[[j]]) stopifnot(length(rownames(object@data[[i]][[j]])) == D[[i]]) } } # Check that there are no features with complete missing values (across all groups) if (object@status == "untrained" || object@data_options[["loaded"]]) { if (verbose == TRUE) message("Checking there are no features with complete missing values...") for (i in views_names(object)) { if (!(is(object@data[[i]][[1]], "dgCMatrix") || is(object@data[[i]][[1]], "dgTMatrix"))) { tmp <- as.data.frame(sapply(object@data[[i]], function(x) rowMeans(is.na(x)), simplify = TRUE)) if (any(unlist(apply(tmp, 1, function(x) mean(x==1)))==1)) warning("You have features which do not contain a single observation in any group, consider removing them...") } } } # check dimensionalities of sample_covariates if (verbose == TRUE) message("Checking sample covariates...") if(.hasSlot(object, "covariates") && !is.null(object@covariates)){ stopifnot(ncol(object@covariates) == sum(object@dimensions$N)) stopifnot(nrow(object@covariates) == object@dimensions$C) stopifnot(all(unlist(samples_names(object)) == colnames(object@covariates))) } # Sanity checks that are exclusive for an untrained model if (object@status == "untrained") { # Check features names if (verbose == TRUE) message("Checking features names...") tmp <- lapply(object@data, function(x) unique(lapply(x,rownames))) for (x in tmp) stopifnot(length(x)==1) for (x in tmp) if (any(duplicated(x[[1]]))) stop("There are duplicated features names within the same view. Please rename") all_names <- unname(unlist(tmp)) duplicated_names <- unique(all_names[duplicated(all_names)]) if (length(duplicated_names)>0) warning("There are duplicated features names across different views. We will add the suffix *_view* only for those features Example: if you have both TP53 in mRNA and mutation data it will be renamed to TP53_mRNA, TP53_mutation") for (i in names(object@data)) { for (j in names(object@data[[i]])) { tmp <- which(rownames(object@data[[i]][[j]]) %in% duplicated_names) if (length(tmp)>0) { rownames(object@data[[i]][[j]])[tmp] <- paste(rownames(object@data[[i]][[j]])[tmp], i, sep="_") } } } # Sanity checks that are exclusive for a trained model } else if (object@status == "trained") { # Check expectations if (verbose == TRUE) message("Checking expectations...") stopifnot(all(c("W", "Z") %in% names(object@expectations))) # if(.hasSlot(object, "covariates") && !is.null(object@covariates)) stopifnot("Sigma" %in% names(object@expectations)) stopifnot(all(sapply(object@expectations$W, is.matrix))) stopifnot(all(sapply(object@expectations$Z, is.matrix))) # Check for intercept factors if (object@data_options[["loaded"]]) { if (verbose == TRUE) message("Checking for intercept factors...") if (!is.null(object@data)) { factors <- do.call("rbind",get_factors(object)) r <- suppressWarnings( t(do.call('rbind', lapply(object@data, function(x) abs(cor(colMeans(do.call("cbind",x),na.rm=TRUE),factors, use="pairwise.complete.obs")) ))) ) intercept_factors <- which(rowSums(r>0.75)>0) if (length(intercept_factors)) { warning(sprintf("Factor(s) %s are strongly correlated with the average expression of features for at least one of your omics. Such factors appear when there are differences in the total 'levels' between your samples, *sometimes* because of poor normalisation in the preprocessing steps.\n",paste(intercept_factors,collapse=", "))) } } } # Check for correlated factors if (verbose == TRUE) message("Checking for highly correlated factors...") Z <- do.call("rbind",get_factors(object)) op <- options(warn=-1) # suppress warnings noise <- matrix(rnorm(n=length(Z), mean=0, sd=1e-10), nrow(Z), ncol(Z)) tmp <- cor(Z+noise); diag(tmp) <- NA options(op) # activate warnings again if (max(tmp,na.rm=TRUE)>0.5) { warning("The model contains highly correlated factors (see `plot_factor_cor(MOFAobject)`). \nWe recommend that you train the model with less factors and that you let it train for a longer time.\n") } } return(object) } ================================================ FILE: R/basilisk.R ================================================ # .mofapy2_dependencies <- c( # "h5py==3.1.0", # "pandas==1.2.1", # "scikit-learn==0.24.1", # "dtw-python==1.1.10" # ) .mofapy2_dependencies <- c( "python=3.12.12", "numpy=1.26.4", "scipy=1.12.0", "pandas=2.2.1", "h5py=3.10.0", "scikit-learn=1.4.0", "dtw-python=1.3.1" ) .mofapy2_version <- "0.7.3" #' @importFrom basilisk BasiliskEnvironment mofa_env <- BasiliskEnvironment("mofa_env", pkgname="MOFA2", packages=.mofapy2_dependencies, pip = paste0("mofapy2==",.mofapy2_version)) ================================================ FILE: R/calculate_variance_explained.R ================================================ #' @title Calculate variance explained by the model #' @description This function takes a trained MOFA model as input and calculates the proportion of variance explained #' (i.e. the coefficient of determinations (R^2)) by the MOFA factors across the different views. #' @name calculate_variance_explained #' @param object a \code{\link{MOFA}} object. #' @param views character vector with the view names, or numeric vector with view indexes. Default is 'all' #' @param groups character vector with the group names, or numeric vector with group indexes. Default is 'all' #' @param factors character vector with the factor names, or numeric vector with the factor indexes. Default is 'all' #' @return a list with matrices with the amount of variation explained per factor and view. #' @importFrom utils relist as.relistable #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Calculate variance explained (R2) #' r2 <- calculate_variance_explained(model) #' #' # Plot variance explained values (view as x-axis, and factor as y-axis) #' plot_variance_explained(model, x="view", y="factor") #' #' # Plot variance explained values (view as x-axis, and group as y-axis) #' plot_variance_explained(model, x="view", y="group") #' #' # Plot variance explained values for factors 1 to 3 #' plot_variance_explained(model, x="view", y="group", factors=1:3) #' #' # Scale R2 values #' plot_variance_explained(model, max_r2 = 0.25) calculate_variance_explained <- function(object, views = "all", groups = "all", factors = "all") { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (any(object@model_options$likelihoods!="gaussian")) stop("Not possible to recompute the variance explained estimates when using non-gaussian likelihoods.") if (any(object@model_options$likelihoods!="gaussian")) if (isFALSE(object@data_options$loaded)) stop("Data is not loaded, cannot compute variance explained.") # Define factors, views and groups views <- .check_and_get_views(object, views) groups <- .check_and_get_groups(object, groups) factors <- .check_and_get_factors(object, factors) K <- length(factors) # Collect relevant expectations W <- get_weights(object, views=views, factors=factors) Z <- get_factors(object, groups=groups, factors=factors) Y <- lapply(get_data(object, add_intercept = FALSE)[views], function(view) view[groups]) Y <- lapply(Y, function(x) lapply(x,t)) # Replace masked values on Z by 0 (so that they do not contribute to predictions) for (g in groups) { Z[[g]][is.na(Z[[g]])] <- 0 } # Calculate coefficient of determination per group and view r2_m <- tryCatch({ lapply(groups, function(g) sapply(views, function(m) { a <- sum((as.matrix(Y[[m]][[g]]) - tcrossprod(Z[[g]], W[[m]]))**2, na.rm = TRUE) b <- sum(Y[[m]][[g]]**2, na.rm = TRUE) return(1 - a/b) }) )}, error = function(err) { stop(paste0("Calculating explained variance doesn't work with the current version of DelayedArray.\n", " Do not sort factors if you're trying to load the model (sort_factors = FALSE),\n", " or load the full dataset into memory (on_disk = FALSE).")) return(err) }) r2_m <- .name_views_and_groups(r2_m, groups, views) # Lower bound is zero r2_m = lapply(r2_m, function(x){ x[x < 0] = 0 return(x) }) # Calculate coefficient of determination per group, factor and view r2_mk <- lapply(groups, function(g) { tmp <- sapply(views, function(m) { sapply(factors, function(k) { a <- sum((as.matrix(Y[[m]][[g]]) - tcrossprod(Z[[g]][,k], W[[m]][,k]))**2, na.rm = TRUE) b <- sum(Y[[m]][[g]]**2, na.rm = TRUE) return(1 - a/b) }) }) tmp <- matrix(tmp, ncol = length(views), nrow = length(factors)) colnames(tmp) <- views rownames(tmp) <- factors return(tmp) }) names(r2_mk) <- groups # Lower bound is 0 r2_mk = lapply(r2_mk, function(x){ x[x < 0] = 0 return(x) }) # Transform from fraction to percentage r2_mk = utils::relist(unlist(utils::as.relistable(r2_mk)) * 100 ) r2_m = utils::relist(unlist(utils::as.relistable(r2_m)) * 100 ) # Store results r2_list <- list(r2_total = r2_m, r2_per_factor = r2_mk) return(r2_list) } #' @title Calculate variance explained by the MOFA factors for each sample #' @description This function takes a trained MOFA model as input and calculates, **for each sample** the proportion of variance explained #' (i.e. the coefficient of determinations (R^2)) by the MOFA factors across the different views. #' @name calculate_variance_explained_per_sample #' @param object a \code{\link{MOFA}} object. #' @param views character vector with the view names, or numeric vector with view indexes. Default is 'all' #' @param groups character vector with the group names, or numeric vector with group indexes. Default is 'all' #' @param factors character vector with the factor names, or numeric vector with the factor indexes. Default is 'all' #' @return a list with matrices with the amount of variation explained per sample and view. #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Calculate variance explained (R2) #' r2 <- calculate_variance_explained_per_sample(model) #' calculate_variance_explained_per_sample <- function(object, views = "all", groups = "all", factors = "all") { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (any(object@model_options$likelihoods!="gaussian")) stop("Not possible to recompute the variance explained estimates when using non-gaussian likelihoods.") if (any(object@model_options$likelihoods!="gaussian")) if (isFALSE(object@data_options$loaded)) stop("Data is not loaded, cannot compute variance explained.") # Define factors, views and groups views <- .check_and_get_views(object, views) groups <- .check_and_get_groups(object, groups) factors <- .check_and_get_factors(object, factors) # Collect relevant expectations W <- get_weights(object, views=views, factors=factors) Z <- get_factors(object, groups=groups, factors=factors) Y <- lapply(get_data(object, add_intercept = FALSE)[views], function(view) view[groups]) Y <- lapply(Y, function(x) lapply(x,t)) # Replace masked values on Z by 0 (so that they do not contribute to predictions) for (g in groups) { Z[[g]][is.na(Z[[g]])] <- 0 } # samples <- unlist(samples_names(object)[groups]) samples <- samples_names(object)[groups] # Calculate coefficient of determination per sample and view r2 <- lapply(groups, function(g) { tmp <- sapply(views, function(m) { a <- rowSums((Y[[m]][[g]] - tcrossprod(Z[[g]],W[[m]]))**2, na.rm=TRUE) b <- rowSums(Y[[m]][[g]]**2, na.rm = TRUE) return(100*(1-a/b)) }) tmp <- matrix(tmp, ncol = length(views), nrow = length(samples[[g]])) tmp[tmp<0] <- 0 colnames(tmp) <- views rownames(tmp) <- samples[[g]] return(tmp) }); names(r2) <- groups return(r2) } #' @title Plot variance explained by the model #' @description plots the variance explained by the MOFA factors across different views and groups, as specified by the user. #' Consider using cowplot::plot_grid(plotlist = ...) to combine the multiple plots that this function generates. #' @name plot_variance_explained #' @param object a \code{\link{MOFA}} object #' @param x character specifying the dimension for the x-axis ("view", "factor", or "group"). #' @param y character specifying the dimension for the y-axis ("view", "factor", or "group"). #' @param split_by character specifying the dimension to be faceted ("view", "factor", or "group"). #' @param factors character vector with a factor name(s), or numeric vector with the index(es) of the factor(s). Default is "all". #' @param plot_total logical value to indicate if to plot the total variance explained (for the variable in the x-axis) #' @param min_r2 minimum variance explained for the color scheme (default is 0). #' @param max_r2 maximum variance explained for the color scheme. #' @param legend logical indicating whether to add a legend to the plot (default is TRUE). #' @param use_cache logical indicating whether to use cache (default is TRUE) #' @param ... extra arguments to be passed to \code{\link{calculate_variance_explained}} #' @import ggplot2 #' @importFrom cowplot plot_grid #' @importFrom stats as.formula #' @importFrom reshape2 melt #' @return A list of \code{\link{ggplot}} objects (if \code{plot_total} is TRUE) or a single \code{\link{ggplot}} object #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Calculate variance explained (R2) #' r2 <- calculate_variance_explained(model) #' #' # Plot variance explained values (view as x-axis, and factor as y-axis) #' plot_variance_explained(model, x="view", y="factor") #' #' # Plot variance explained values (view as x-axis, and group as y-axis) #' plot_variance_explained(model, x="view", y="group") #' #' # Plot variance explained values for factors 1 to 3 #' plot_variance_explained(model, x="view", y="group", factors=1:3) #' #' # Scale R2 values #' plot_variance_explained(model, max_r2=0.25) plot_variance_explained <- function(object, x = "view", y = "factor", split_by = NA, plot_total = FALSE, factors = "all", min_r2 = 0, max_r2 = NULL, legend = TRUE, use_cache = TRUE, ...) { # Sanity checks if (length(unique(c(x, y, split_by))) != 3) { stop(paste0("Please ensure x, y, and split_by arguments are different.\n", " Possible values are `view`, `group`, and `factor`.")) } # Automatically fill split_by in if (is.na(split_by)) split_by <- setdiff(c("view", "factor", "group"), c(x, y, split_by)) # Calculate variance explained if ((use_cache) & .hasSlot(object, "cache") && ("variance_explained" %in% names(object@cache))) { r2_list <- object@cache$variance_explained } else { r2_list <- calculate_variance_explained(object, factors = factors, ...) } r2_mk <- r2_list$r2_per_factor # convert matrix to long data frame for ggplot2 r2_mk_df <- melt( lapply(r2_mk, function(x) melt(as.matrix(x), varnames = c("factor", "view")) ), id.vars=c("factor", "view", "value") ) colnames(r2_mk_df)[ncol(r2_mk_df)] <- "group" # Subset factors for plotting if ((length(factors) == 1) && (factors[1] == "all")) { factors <- factors_names(object) } else { if (is.numeric(factors)) { factors <- factors_names(object)[factors] } else { stopifnot(all(factors %in% factors_names(object))) } r2_mk_df <- r2_mk_df[r2_mk_df$factor %in% factors,] } r2_mk_df$factor <- factor(r2_mk_df$factor, levels = factors) r2_mk_df$group <- factor(r2_mk_df$group, levels = groups_names(object)) r2_mk_df$view <- factor(r2_mk_df$view, levels = views_names(object)) # Detect whether to split by group or by view groups <- names(r2_list$r2_total) views <- colnames(r2_list$r2_per_factor[[1]]) # Set R2 limits if (!is.null(min_r2)) r2_mk_df$value[r2_mk_df$valuemax_r2] <- max_r2 } else { max_r2 = max(r2_mk_df$value) } # Grid plot with the variance explained per factor and view/group p1 <- ggplot(r2_mk_df, aes(x=.data[[x]], y=.data[[y]])) + geom_tile(aes(fill=.data$value), color="black") + facet_wrap(as.formula(sprintf('~%s',split_by)), nrow=1) + labs(x="", y="", title="") + scale_fill_gradientn(colors=c("gray97","darkblue"), guide="colorbar", limits=c(min_r2,max_r2)) + guides(fill=guide_colorbar("Var. (%)")) + theme( axis.text.x = element_text(size=rel(1.0), color="black"), axis.text.y = element_text(size=rel(1.1), color="black"), axis.line = element_blank(), axis.ticks = element_blank(), panel.background = element_blank(), strip.background = element_blank(), strip.text = element_text(size=rel(1.0)) ) if (isFALSE(legend)) p1 <- p1 + theme(legend.position = "none") # remove facet title if (length(unique(r2_mk_df[,split_by]))==1) p1 <- p1 + theme(strip.text = element_blank()) # Add total variance explained bar plots if (plot_total) { r2_m_df <- melt(lapply(r2_list$r2_total, function(x) lapply(x, function(z) z)), varnames=c("view", "group"), value.name="R2") colnames(r2_m_df)[(ncol(r2_m_df)-1):ncol(r2_m_df)] <- c("view", "group") r2_m_df$group <- factor(r2_m_df$group, levels = MOFA2::groups_names(object)) r2_m_df$view <- factor(r2_m_df$view, levels = views_names(object)) # Barplots for total variance explained min_lim_bplt <- min(0, r2_m_df$R2) max_lim_bplt <- max(r2_m_df$R2) # Barplot with variance explained per view/group (across all factors) p2 <- ggplot(r2_m_df, aes(x=.data[[x]], y=.data$R2)) + # ggtitle(sprintf("%s\nTotal variance explained per %s", i, x)) + geom_bar(stat="identity", fill="deepskyblue4", color="black", width=0.9) + facet_wrap(as.formula(sprintf('~%s',split_by)), nrow=1) + xlab("") + ylab("Variance explained (%)") + scale_y_continuous(limits=c(min_lim_bplt, max_lim_bplt), expand=c(0.005, 0.005)) + theme( axis.ticks.x = element_blank(), axis.text.x = element_text(color="black"), axis.text.y = element_text(color="black"), axis.title.y = element_text(color="black"), axis.line = element_line(color="black"), panel.background = element_blank(), strip.background = element_blank(), strip.text = element_text() ) # remove facet title if (length(unique(r2_m_df[,split_by]))==1) p2 <- p2 + theme(strip.text = element_blank()) # Bind plots plot_list <- list(p1,p2) } else { plot_list <- p1 } return(plot_list) } #' @title Plot variance explained by the model for a set of features #' #' @description Returns a tile plot with a group on the X axis and a feature along the Y axis #' #' @name plot_variance_explained_per_feature #' @param object a \code{\link{MOFA}} object. #' @param view a view name or index. #' @param features a vector with indices or names for features from the respective view, #' or number of top features to be fetched by their loadings across specified factors. #' "all" to plot all features. #' @param split_by_factor logical indicating whether to split R2 per factor or plot R2 jointly #' @param group_features_by column name of features metadata to group features by #' @param groups a vector with indices or names for sample groups (default is all) #' @param factors a vector with indices or names for factors (default is all) #' @param min_r2 minimum variance explained for the color scheme (default is 0). #' @param max_r2 maximum variance explained for the color scheme. #' @param legend logical indicating whether to add a legend to the plot (default is TRUE). #' @param return_data logical indicating whether to return the data frame to plot instead of plotting #' @param ... extra arguments to be passed to \code{\link{calculate_variance_explained}} #' @return ggplot object #' @import ggplot2 #' @importFrom cowplot plot_grid #' @importFrom stats as.formula #' @importFrom reshape2 melt #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_variance_explained_per_feature(model, view = 1) plot_variance_explained_per_feature <- function(object, view, features = 10, split_by_factor = FALSE, group_features_by = NULL, groups = "all", factors = "all", min_r2 = 0, max_r2 = NULL, legend = TRUE, return_data = FALSE, ...) { # Check that one view is requested view <- .check_and_get_views(object, view) if (length(view) != 1) stop("Please choose a single view to plot features from") # Fetch loadings, factors, and data if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Fetch relevant features) if (is.numeric(features) && (length(features) == 1)) { features <- as.integer(features) features <- .get_top_features_by_loading(object, view = view, factors = factors, nfeatures = features) } else if (is.character(features)) { if (features[1]=="all") features <- 1:object@dimensions$D[[view]] } features <- .check_and_get_features_from_view(object, view = view, features) # Collect relevant expectations groups <- .check_and_get_groups(object, groups) factors <- .check_and_get_factors(object, factors) # 1. Loadings: choose a view, one or multiple factors, and subset chosen features W <- get_weights(object, views = view, factors = factors) W <- lapply(W, function(W_m) W_m[rownames(W_m) %in% features,,drop=FALSE]) # 2. Factor values: choose one or multiple groups and factors Z <- get_factors(object, groups = groups, factors = factors) # 3. Data: Choose a view, one or multiple groups, and subset chosen features # Y <- lapply(get_expectations(object, "Y")[view], function(Y_m) lapply(Y_m[groups], t)) Y <- lapply(get_data(object, add_intercept = FALSE)[view], function(Y_m) lapply(Y_m[groups], t)) Y <- lapply(Y, function(Y_m) lapply(Y_m, function(Y_mg) Y_mg[,colnames(Y_mg) %in% features,drop=FALSE])) # Replace masked values on Z by 0 (so that they do not contribute to predictions) for (g in groups) { Z[[g]][is.na(Z[[g]])] <- 0 } m <- view # Use shorter notation when calculating R2 if (split_by_factor) { # Calculate coefficient of determination per group, factor and feature r2_gdk <- lapply(groups, function(g) { r2_g <- sapply(features, function(d) { sapply(factors, function(k) { a <- sum((as.matrix(Y[[m]][[g]][,d,drop=FALSE]) - tcrossprod(Z[[g]][,k,drop=FALSE], W[[m]][d,k,drop=FALSE]))**2, na.rm = TRUE) b <- sum(Y[[m]][[g]][,d,drop=FALSE]**2, na.rm = TRUE) return(1 - a/b) }) }) r2_g <- matrix(r2_g, ncol = length(features), nrow = length(factors)) colnames(r2_g) <- features rownames(r2_g) <- factors # Lower bound is zero r2_g[r2_g < 0] <- 0 r2_g }) names(r2_gdk) <- groups # Convert matrix to long data frame for ggplot2 r2_gdk_df <- do.call(rbind, r2_gdk) r2_gdk_df <- data.frame(r2_gdk_df, "group" = rep(groups, lapply(r2_gdk, nrow)), "factor" = rownames(r2_gdk_df)) r2_gdk_df <- melt(r2_gdk_df, id.vars = c("group", "factor")) colnames(r2_gdk_df) <- c("group", "factor", "feature", "value") r2_gdk_df$group <- factor(r2_gdk_df$group, levels = unique(r2_gdk_df$group)) r2_df <- r2_gdk_df } else { # Calculate coefficient of determination per group and feature r2_gd <- lapply(groups, function(g) { r2_g <- lapply(features, function(d) { a <- sum((as.matrix(Y[[m]][[g]][,d,drop=FALSE]) - tcrossprod(Z[[g]], W[[m]][d,,drop=FALSE]))**2, na.rm = TRUE) b <- sum(Y[[m]][[g]][,d,drop=FALSE]**2, na.rm = TRUE) return(1 - a/b) }) names(r2_g) <- features # Lower bound is zero r2_g[r2_g < 0] <- 0 r2_g }) names(r2_gd) <- groups # Convert matrix to long data frame for ggplot2 tmp <- as.matrix(data.frame(lapply(r2_gd, unlist))) colnames(tmp) <- groups r2_gd_df <- melt(tmp) colnames(r2_gd_df) <- c("feature", "group", "value") r2_gd_df$group <- factor(r2_gd_df$group, levels = unique(r2_gd_df$group)) r2_df <- r2_gd_df } # Transform from fraction to percentage r2_df$value <- 100*r2_df$value # Calculate minimum R2 to display if (!is.null(min_r2)) { r2_df$value[r2_df$valuemax_r2] <- max_r2 } else { max_r2 <- max(r2_df$value) } # Group features if (!is.null(group_features_by)) { features_indices <- match(r2_df$feature, features_metadata(object)$feature) features_grouped <- features_metadata(object)[,group_features_by,drop=FALSE][features_indices,,drop=FALSE] # If features grouped using multiple variables, concatenate them if (length(group_features_by) > 1) { features_grouped <- apply(features_grouped, 1, function(row) paste0(row, collapse="_")) } else { features_grouped <- features_grouped[,group_features_by,drop=TRUE] } r2_df["feature_group"] <- features_grouped } if (return_data) return(r2_df) if (split_by_factor) { r2_df$factor <- factor(r2_df$factor, levels = factors_names(object)) } # Grid plot with the variance explained per feature in every group p <- ggplot(r2_df, aes(x = .data$group, y = .data$feature)) + geom_tile(aes(fill = .data$value), color = "black") + guides(fill = guide_colorbar("R2 (%)")) + labs(x = "", y = "", title = "") + scale_fill_gradientn(colors=c("gray97","darkblue"), guide="colorbar", limits=c(min_r2, max_r2)) + theme_classic() + theme( axis.text = element_text(size = 12), axis.line = element_blank(), axis.ticks = element_blank(), strip.text = element_text(size = 12), ) if (!is.null(group_features_by) && split_by_factor) { p <- p + facet_grid(feature_group ~ factor, scales = "free_y") } else if (split_by_factor) { p <- p + facet_wrap(~factor, nrow = 1) } else if (!is.null(group_features_by)) { p <- p + facet_wrap(~feature_group, ncol = 1, scales = "free") } if (!legend) p <- p + theme(legend.position = "none") return(p) } ================================================ FILE: R/cluster_samples.R ================================================ ########################################################## ## Functions to cluster samples based on latent factors ## ########################################################## #' @title K-means clustering on samples based on latent factors #' @name cluster_samples #' @description MOFA factors are continuous in nature but they can be used to predict discrete clusters of samples. \cr #' The clustering can be performed in a single factor, which is equivalent to setting a manual threshold. #' More interestingly, it can be done using multiple factors, where multiple sources of variation are aggregated. \cr #' Importantly, this type of clustering is not weighted and does not take into account the different importance of the latent factors. #' @param object a trained \code{\link{MOFA}} object. #' @param k number of clusters (integer). #' @param factors character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. #' Default is 'all' #' @param ... extra arguments passed to \code{\link{kmeans}} #' @details In some cases, due to model technicalities, samples can have missing values in the latent factor space. #' In such a case, these samples are currently ignored in the clustering procedure. #' @return output from \code{\link{kmeans}} function #' @importFrom stats kmeans #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Cluster samples in the factor space using factors 1 to 3 and K=2 clusters #' clusters <- cluster_samples(model, k=2, factors=1:3) cluster_samples <- function(object, k, factors = "all", ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define factors factors <- .check_and_get_factors(object, factors) # Collect relevant data Z <- get_factors(object, factors=factors) if (is(Z, "list")) Z <- do.call(rbind, Z) N <- nrow(Z) # For now remove sample with missing values on factors # (TO-DO) incorporate a clustering function that is able to cope with missing values haveAllZ <- apply(Z, 1, function(x) all(!is.na(x))) if(!all(haveAllZ)) warning(paste("Removing", sum(!haveAllZ), "samples with missing values on at least one factor")) Z <- Z[haveAllZ,] # Perform k-means clustering kmeans.out <- kmeans(Z, centers=k, ...) return(kmeans.out) } ================================================ FILE: R/compare_models.R ================================================ ################################################ ## Functions to compare different MOFA models ## ################################################ #' @title Plot the correlation of factors between different models #' @name compare_factors #' @description Different \code{\link{MOFA}} objects are compared in terms of correlation between their factors. #' @param models a list with \code{\link{MOFA}} objects. #' @param ... extra arguments passed to pheatmap #' @details If assessing model robustness across trials, the output should look like a block diagonal matrix, #' suggesting that all factors are robustly detected in all model instances. #' @return Plots a heatmap of the Pearson correlation between latent factors across all input models. #' @importFrom stats cor #' @importFrom pheatmap pheatmap #' @importFrom grDevices colorRampPalette #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model1 <- load_model(file) #' model2 <- load_model(file) #' #' # Compare factors between models #' compare_factors(list(model1,model2)) compare_factors <- function(models, ...) { # Sanity checks if(!is.list(models)) stop("'models' has to be a list") if (!all(sapply(models, function (l) is(l, "MOFA")))) stop("Each element of the the list 'models' has to be an instance of MOFA") # Give generic names if no names present if(is.null(names(models))) names(models) <- paste("model", seq_len(length(models)), sep="") # Get latent factors LFs <- lapply(seq_along(models), function(i){ do.call(rbind, get_factors(models[[i]])) }) # Sanity checks if (is.null(Reduce(intersect,lapply(LFs, rownames)))) stop("No common samples in all models for comparison") # Align samples between models samples_names <- Reduce(intersect, lapply(LFs, rownames)) LFs <- lapply(LFs, function(z) { z[samples_names,,drop=FALSE] }) # Rename factors for (i in seq_along(LFs)) colnames(LFs[[i]]) <- paste(names(models)[i], colnames(LFs[[i]]), sep="_") # calculate correlation between factors across models corLFs <- cor(Reduce(cbind, LFs), use="complete.obs") corLFs[is.na(corLFs)] <- 0 corLFs <- abs(corLFs) # Plot heatmap breaksList <- seq(0,1, by=0.01) colors <- colorRampPalette(c("white",RColorBrewer::brewer.pal(9,name="YlOrRd")))(length(breaksList)) pheatmap(corLFs, color = colors, breaks = breaksList, ...) } #' @title Compare different trained \code{\link{MOFA}} objects in terms of the final value of the ELBO statistics and number of inferred factors #' @name compare_elbo #' @description Different objects of \code{\link{MOFA}} are compared in terms of the final value of the ELBO statistics. #' For model selection the model with the highest ELBO value is selected. #' @param models a list containing \code{\link{MOFA}} objects. #' @param log logical indicating whether to plot the log of the ELBO. #' @param return_data logical indicating whether to return a data.frame with the ELBO values per model #' @return A \code{\link{ggplot}} object or the underlying data.frame if return_data is TRUE #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model1 <- load_model(file) #' model2 <- load_model(file) #' #' # Compare ELBO between models #' \dontrun{compare_elbo(list(model1,model2))} compare_elbo <- function(models, log = FALSE, return_data = FALSE) { # Sanity checks if(!is.list(models)) stop("'models' has to be a list") if (!all(sapply(models, function (l) is(l, "MOFA")))) stop("Each element of the the list 'models' has to be an instance of MOFA") # Give generic names if no names present if (is.null(names(models))) names(models) <- paste0("model_", seq_along(models)) # Get ELBO values elbo_vals <- sapply(models, get_elbo) # Generate plot df <- data.frame( ELBO = elbo_vals, model = names(models) ) # take the log if (log) { message("Plotting the log2 of the negative of the ELBO (the higher the better)") df$ELBO <- log2(-df$ELBO) } if (all(df$ELBO<0)) { df$ELBO <- abs(df$ELBO) message("Plotting the absolute value of the ELBO for every model (the smaller the better)") } else { message("Plotting the ELBO for every model (the higher the better)") } # return data if (return_data) return(df) gg <- ggplot(df, aes(x=.data$model, y=.data$ELBO)) + geom_bar(stat="identity", color="black", fill="grey70") + labs(x="", y="Evidence Lower Bound (ELBO)") + theme_classic() return(gg) } #' @title Select a model from a list of trained \code{\link{MOFA}} objects based on the best ELBO value #' @name select_model #' @description Different objects of \code{\link{MOFA}} are compared in terms of the final value of the ELBO statistics #' and the model with the highest ELBO value is selected. #' @param models a list containing \code{\link{MOFA}} objects. #' @param plot boolean indicating whether to show a plot of the ELBO for each model instance #' @return A \code{\link{MOFA}} object #' @export select_model <- function(models, plot = FALSE) { # Sanity checks if(!is.list(models)) stop("'models' has to be a list") if (!all(sapply(models, function (l) is(l, "MOFA")))) stop("Each element of the the list 'models' has to be an instance of MOFA") elbo_vals <- sapply(models, get_elbo) if(plot) compare_elbo(models) models[[which.max(elbo_vals)]] } ================================================ FILE: R/contribution_scores.R ================================================ #' @title Calculate contribution scores for each view in each sample #' @description This function calculates, *for each sample* how much each view contributes to its location in the latent manifold, what we call \emph{contribution scores} #' @name calculate_contribution_scores #' @param object a trained \code{\link{MOFA}} object. #' @param views character vector with the view names, or numeric vector with view indexes. Default is 'all' #' @param groups character vector with the group names, or numeric vector with group indexes. Default is 'all' #' @param factors character vector with the factor names, or numeric vector with the factor indexes. Default is 'all' #' @param scale logical indicating whether to scale the sample-wise variance explained values by the total amount of variance explained per view. #' This effectively normalises each view by its total variance explained. It is important when different amounts of variance is explained for each view (check with \code{plot_variance_explained(..., plot_total=TRUE)}) #' @details Contribution scores are calculated in three steps: #' \itemize{ #' \item{\strong{Step 1}: calculate variance explained for each cell i and each view m (\eqn{R_{im}}), using all factors} #' \item{\strong{Step 2} (optional): scale values by the total variance explained for each view} #' \item{\strong{Step 3}: calculate contribution score (\eqn{C_{im}}) for cell i and view m as: \deqn{C_{im} = \frac{R2_{im}}{\sum_{m} R2_{im}} } } #' } #' Note that contribution scores can be calculated using any number of data modalities, but it is easier to interpret when you specify two. \cr #' Please note that this functionality is still experimental, contact the authors if you have questions. #' @return adds the contribution scores to the metadata slot (\code{samples_metadata(MOFAobject)}) and to the \code{MOFAobject@cache} slot #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' model <- calculate_contribution_scores(model) #' calculate_contribution_scores <- function(object, views = "all", groups = "all", factors = "all", scale = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (any(object@model_options$likelihoods!="gaussian")) stop("Not possible to compute contribution scores when using non-gaussian likelihoods.") # Define factors, views and groups views <- .check_and_get_views(object, views) if (length(views)<2) stop("contribution scores only make sense when having at least 2 views") groups <- .check_and_get_groups(object, groups) factors <- .check_and_get_factors(object, factors) if (length(factors)<2) stop("contribution scores only make sense when having at least 2 factors") # fetch variance explained values r2.per.sample <- calculate_variance_explained_per_sample(object, factors=factors, views = views, groups = groups) # scale the variance explained values to the total amount of variance explained per view if (scale) { r2.per.view <- get_variance_explained(object, factors=factors, views = views, groups = groups)[["r2_total"]] r2.per.sample <- lapply(1:length(groups), function(g) sweep(r2.per.sample[[g]], 2, r2.per.view[[g]],"/")) } # concatenate groups r2.per.sample <- do.call("rbind",r2.per.sample) # Calculate the fraction of (relative) variance explained for each view in each cell -> the contribution score contribution_scores <- r2.per.sample / rowSums(r2.per.sample) # Add contribution scores to the sample metadata for (i in colnames(contribution_scores)) { object <- .add_column_to_metadata(object, contribution_scores[,i], paste0(i,"_contribution")) } # Add contribution scores to the cache object@cache[["contribution_scores"]] <- contribution_scores return(object) } get_contribution_scores <- function(object, groups = "all", views = "all", factors = "all", as.data.frame = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get factors and groups groups <- .check_and_get_groups(object, groups) views <- .check_and_get_views(object, views) factors <- .check_and_get_factors(object, factors) # Fetch if (.hasSlot(object, "cache") && ("contribution_scores" %in% names(object@cache))) { scores_list <- object@cache[["contribution_scores"]] } else { scores_list <- calculate_contribution_scores(object, factors = factors, views = views, groups = groups) } # Convert to data.frame format if (as.data.frame) { scores <- reshape2::melt( do.call("rbind",scores_list) ) colnames(scores) <- c("sample", "view", "value") } else { scores <- scores_list } return(scores) } plot_contribution_scores <- function(object, samples = "all", group_by = NULL, return_data = FALSE, ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # (TO-DO) get samples # get contribution scores scores <- get_contribution_scores(object, as.data.frame = TRUE, ...) # TO-DO: CHECK THAT GROUP IS A CHARACTER/FACTOR # individual samples if (is.null(group_by)) { to.plot <- scores if (return_data) return(to.plot) p <- ggplot(to.plot, aes(x=.data$view, y=.data$value)) + geom_bar(aes(fill=view), stat="identity", color="black") + facet_wrap(~sample) + labs(x="", y="Contribution score") + theme_classic() + theme( axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "top", legend.title = element_blank() ) return(p) # group samples } else { to.plot <- merge(scores, object@samples_metadata[,c("sample",group_by)], by="sample") if (return_data) return(to.plot) p <- ggplot(to.plot, aes(x=.data$view, y=.data$value)) + geom_boxplot(aes(fill=view)) + facet_wrap(as.formula(paste("~", group_by))) + labs(x="", y="Contribution score") + theme_classic() + theme( axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "top", legend.title = element_blank() ) return(p) } } ================================================ FILE: R/correlate_covariates.R ================================================ #' @title Plot correlation of factors with external covariates #' @name correlate_factors_with_covariates #' @description Function to correlate factor values with external covariates. #' @param object a trained \code{\link{MOFA}} object. #' @param covariates #' \itemize{ #' \item{\strong{data.frame}: a data.frame where the samples are stored in the rows and the covariates are stored in the columns. #' Use row names for sample names and column names for covariate names. Columns values must be numeric. } #' \item{\strong{character vector}: character vector with names of columns that are present in the sample metadata (\code{samples_metadata(model)}} #' } #' @param factors character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. Default is 'all'. #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups. #' @param abs logical indicating whether to take the absolute value of the correlation coefficient (default is \code{TRUE}). #' @param plot character indicating whether to plot Pearson correlation coefficients (\code{plot="r"}) or log10 adjusted p-values (\code{plot="log_pval"}). #' @param return_data logical indicating whether to return the correlation results instead of plotting #' @param transpose logical indicating whether to transpose the plot #' @param alpha p-value threshold #' @param ... extra arguments passed to \code{\link[corrplot]{corrplot}} (if \code{plot=="r"}) or \code{\link[pheatmap]{pheatmap}} (if \code{plot=="log_pval"}). #' @importFrom pheatmap pheatmap #' @importFrom corrplot corrplot #' @return A \code{\link[corrplot]{corrplot}} (if \code{plot=="r"}) or \code{\link[pheatmap]{pheatmap}} (if \code{plot=="log_pval"}) or the underlying data.frame if return_data is TRUE #' @export correlate_factors_with_covariates <- function(object, covariates, factors = "all", groups = "all", abs = FALSE, plot = c("log_pval","r"), alpha = 0.05, return_data = FALSE, transpose = FALSE, ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") groups <- .check_and_get_groups(object,groups) # Get covariates metadata <- samples_metadata(object) metadata <- metadata[metadata$group%in%groups,] if (is.character(covariates)) { stopifnot(all(covariates %in% colnames(metadata))) covariates <- metadata[,covariates,drop=FALSE] } else if (is.data.frame(covariates)) { samples <- metadata$sample if (is.null(rownames(covariates))) stop("The 'covariates' data.frame does not have samples names") stopifnot(all(rownames(covariates) %in% samples)) covariates <- metadata[match(rownames(covariates), metadata$sample),] } else { stop("covariates argument not recognised. Please read the documentation: ?correlate_factors_with_covariates") } # convert character columns to factors cols <- which(sapply(covariates, is.character)) if (length(cols>=1)) { covariates[cols] <- lapply(covariates[cols], as.factor) } # convert all columns to numeric cols <- which(!sapply(covariates,class)%in%c("numeric","integer")) if (length(cols>=1)) { cols.factor <- which(sapply(covariates,class)=="factor") covariates[cols] <- lapply(covariates[cols], as.numeric) warning("There are non-numeric values in the covariates data.frame, converting to numeric...") covariates[cols] <- lapply(covariates[cols], as.numeric) } stopifnot(all(sapply(covariates,class)%in%c("numeric","integer"))) # Get factors factors <- .check_and_get_factors(object, factors) Z <- get_factors(object, factors = factors, groups = groups, as.data.frame=FALSE) Z <- do.call(rbind, Z) # correlation cor <- psych::corr.test(Z, covariates, method = "pearson", adjust = "BH") # plot plot <- match.arg(plot) if (plot=="r") { stat <- cor$r if (abs) stat <- abs(stat) if (transpose) stat <- t(stat) if (return_data) return(stat) corrplot(stat, tl.col = "black", title="Pearson correlation coefficient", ...) } else if (plot=="log_pval") { stat <- cor$p stat[stat>alpha] <- 1.0 if (all(stat==1.0)) stop("All p-values are 1.0, nothing to plot") stat <- -log10(stat) stat[is.infinite(stat)] <- 1000 if (transpose) stat <- t(stat) if (return_data) return(stat) col <- colorRampPalette(c("lightgrey", "red"))(n=100) pheatmap::pheatmap(stat, main="log10 adjusted p-values", cluster_rows = FALSE, color=col, ...) } else { stop("'plot' argument not recognised. Please read the documentation: ?correlate_factors_with_covariates") } } #' @title Summarise factor values using external groups #' @name summarise_factors #' @description Function to summarise factor values using a discrete grouping of samples. #' @param object a trained \code{\link{MOFA}} object. #' @param df a data.frame with the columns "sample" and "level", where level is a factor with discrete group assignments for each sample. #' @param factors character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. Default is 'all'. #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups. #' @param abs logical indicating whether to take the absolute value of the factors (default is \code{FALSE}). #' @param return_data logical indicating whether to return the fa instead of plotting #' @import ggplot2 #' @importFrom dplyr group_by summarise mutate #' @importFrom stats median #' @importFrom magrittr %>% #' @return A \code{\link{ggplot}} object or a \code{data.frame} if return_data is TRUE #' @export summarise_factors <- function(object, df, factors = "all", groups = "all", abs = FALSE, return_data = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(is.data.frame(df)) stopifnot((c("sample","level")%in%colnames(df))) stopifnot(df$sample %in% unlist(samples_names(object))) stopifnot(length(df$level)>1) df$level <- as.factor(df$level) # Get factors factors <- .check_and_get_factors(object, factors) groups <- .check_and_get_groups(object, groups) factors_df <- get_factors(object, factors = factors, groups = groups, as.data.frame=TRUE) %>% group_by(factor) %>% mutate(value=value/max(abs(value),na.rm=TRUE)) # Scale factor values # Merge data.frames to.plot <- merge(factors_df, df, by="sample") %>% group_by(level,factor,group) %>% summarise(value=median(value,na.rm=TRUE)) if (abs) { to.plot$value <- abs(to.plot$value) } # Plot if (length(unique(factors_df$group))>1) { to.plot$group <- factor(to.plot$group, levels=groups) p <- ggplot(to.plot, aes(x=.data$group, y=.data$level, fill=.data$value)) + facet_wrap(~factor) } else { p <- ggplot(to.plot, aes(x=.data$factor, y=.data$level, fill=.data$value)) } p <- p + geom_tile() + theme_classic() + labs(x="", y="", fill="Score") + theme( axis.text.x = element_text(color="black", angle=30, hjust=1), axis.text.y = element_text(color="black") ) if (abs) { p <- p + scale_fill_gradient2(low = "white", high = "red") } else { # center the color scheme at 0 p <- p + scale_fill_distiller(type = "div", limit = max(abs(to.plot$value),na.rm=TRUE)*c(-1,1)) } # Return data or plot if (return_data) { return(to.plot) } else { return(p) } } ================================================ FILE: R/create_mofa.R ================================================ #' @title create a MOFA object #' @name create_mofa #' @description Method to create a \code{\link{MOFA}} object. Depending on the input data format, this method calls one of the following functions: #' \itemize{ #' \item{\strong{long data.frame}: \code{\link{create_mofa_from_df}}} #' \item{\strong{List of matrices}: \code{\link{create_mofa_from_matrix}}} #' \item{\strong{MultiAssayExperiment}: \code{\link{create_mofa_from_MultiAssayExperiment}}} #' \item{\strong{Seurat}: \code{\link{create_mofa_from_Seurat}}} #' \item{\strong{SingleCellExperiment}: \code{\link{create_mofa_from_SingleCellExperiment}}} #' } #' Please read the documentation of the corresponding function for more details on your specific data format. #' @param data one of the formats above #' @param groups group information, only relevant when using the multi-group framework. #' @param extract_metadata logical indicating whether to incorporate the sample metadata from the input object into the MOFA object ( #' not relevant when the input is a list of matrices). Default is \code{TRUE}. #' @param ... further arguments that can be passed to the function depending on the input data format. #' See the documentation of above functions for details. #' @return Returns an untrained \code{\link{MOFA}} object #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data (in long data.frame format) #' load(file) #' MOFAmodel <- create_mofa(dt) create_mofa <- function(data, groups = NULL, extract_metadata = TRUE, ...) { # Creating MOFA object from a Seurat object if (is(data, "Seurat")) { message("Creating MOFA object from a Seurat object...") object <- create_mofa_from_Seurat(data, groups, extract_metadata = extract_metadata, ...) # Creating MOFA object from a SingleCellExperiment object } else if (is(data, "SingleCellExperiment")) { message("Creating MOFA object from a SingleCellExperiment object...") object <- create_mofa_from_SingleCellExperiment(data, groups, extract_metadata = extract_metadata, ...) # Creating MOFA object from a data.frame object } else if (is(data, "data.frame")) { message("Creating MOFA object from a data.frame...") object <- create_mofa_from_df(data, extract_metadata = extract_metadata) # Creating MOFA object from a (sparse) matrix object } else if (is(data, "list") && (length(data) > 0) && (all(sapply(data, function(x) is(x, "matrix"))) || all(sapply(data, function(x) is(x, "dgCMatrix"))) || all(sapply(data, function(x) is(x, "dgTMatrix"))))) { message("Creating MOFA object from a list of matrices (features as rows, sample as columns)...\n") object <- create_mofa_from_matrix(data, groups) # Creating MOFA object from MultiAssayExperiment object } else if(is(data, "MultiAssayExperiment")){ object <- create_mofa_from_MultiAssayExperiment(data, groups, extract_metadata = extract_metadata, ...) } else { stop("Error: input data has to be provided as a list of matrices, a data frame or a Seurat object. Please read the documentation for more details.") } return(object) } #' @title create a MOFA object from a MultiAssayExperiment object #' @name create_mofa_from_MultiAssayExperiment #' @description Method to create a \code{\link{MOFA}} object from a MultiAssayExperiment object #' @param mae a MultiAssayExperiment object #' @param groups a string specifying column name of the colData to use it as a group variable. #' Alternatively, a character vector with group assignment for every sample. #' Default is \code{NULL} (no group structure). #' @param extract_metadata logical indicating whether to incorporate the metadata from the MultiAssayExperiment object into the MOFA object #' @return Returns an untrained \code{\link{MOFA}} object #' @export create_mofa_from_MultiAssayExperiment <- function(mae, groups = NULL, extract_metadata = FALSE) { # Sanity check if(!requireNamespace("MultiAssayExperiment", quietly = TRUE)){ stop("Package \"MultiAssayExperiment\" is required but is not installed.", call. = FALSE) } else { # Re-arrange data for training in MOFA to matrices, fill in NAs data_list <- lapply(names(mae), function(m) { # Extract general sample names primary <- unique(MultiAssayExperiment::sampleMap(mae)[,"primary"]) # Extract view subdata <- as.matrix(MultiAssayExperiment::assays(mae)[[m]]) # Rename view-specific sample IDs with the general sample names stopifnot(colnames(subdata)==MultiAssayExperiment::sampleMap(mae)[MultiAssayExperiment::sampleMap(mae)[,"assay"]==m,"colname"]) colnames(subdata) <- MultiAssayExperiment::sampleMap(mae)[MultiAssayExperiment::sampleMap(mae)[,"assay"]==m,"primary"] # Fill subdata with NAs subdata_filled <- .subset_augment(subdata, primary) return(subdata_filled) }) # Define groups if (is(groups, 'character') && (length(groups) == 1)) { if (!(groups %in% colnames(MultiAssayExperiment::colData(mae)))) stop(paste0(groups, " is not found in the colData of the MultiAssayExperiment.\n", "If you want to use groups information from MultiAssayExperiment,\n", "please ensure to provide a column name that exists. The columns of colData are:\n", paste0(colnames(MultiAssayExperiment::colData(mae)), collapse = ", "))) groups <- MultiAssayExperiment::colData(mae)[,groups] } # If no groups provided, treat all samples as coming from one group if (is.null(groups)) { # message("No groups provided as argument, we assume that all samples belong to the same group.\n") groups <- rep("group1", length(unique(MultiAssayExperiment::sampleMap(mae)[,"primary"]))) } # Initialise MOFA object object <- new("MOFA") object@status <- "untrained" object@data <- .split_data_into_groups(data_list, groups) # groups_nms <- unique(as.character(groups)) groups_nms <- names(object@data[[1]]) # Set dimensionalities object@dimensions[["M"]] <- length(data_list) object@dimensions[["G"]] <- length(groups_nms) object@dimensions[["D"]] <- sapply(data_list, nrow) object@dimensions[["N"]] <- sapply(groups_nms, function(x) sum(groups == x)) object@dimensions[["K"]] <- 0 # Set view names views_names(object) <- names(mae) # Set samples group names groups_names(object) <- groups_nms # Extract metadata if (extract_metadata) { if (ncol(MultiAssayExperiment::colData(mae)) > 0) { object@samples_metadata <- data.frame(MultiAssayExperiment::colData(mae)) } } # Create sample metadata object <- .create_samples_metadata(object) # Create features metadata object <- .create_features_metadata(object) # Rename duplicated features object <- .rename_duplicated_features(object) # Do quality control object <- .quality_control(object) return(object) } } #' @title create a MOFA object from a data.frame object #' @name create_mofa_from_df #' @description Method to create a \code{\link{MOFA}} object from a data.frame object #' @param df \code{data.frame} object with at most 5 columns: \code{sample}, \code{group}, \code{feature}, \code{view}, \code{value}. #' The \code{group} column (optional) indicates the group of each sample when using the multi-group framework. #' The \code{view} column (optional) indicates the view of each feature when having multi-view data. #' @param extract_metadata logical indicating whether to incorporate the extra columns as sample metadata into the MOFA object #' @return Returns an untrained \code{\link{MOFA}} object #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data (in long data.frame format) #' load(file) #' MOFAmodel <- create_mofa_from_df(dt) create_mofa_from_df <- function(df, extract_metadata = TRUE) { # Quality controls df <- as.data.frame(df) if (!"group" %in% colnames(df)) { # message('No "group" column found in the data.frame, we will assume a common group for all samples') df$group <- "single_group" } if (!"view" %in% colnames(df)) { # message('No "view" column found in the data.frame, we will assume a common view for all features') df$view <- "single_view" } stopifnot(all(c("sample","feature","value") %in% colnames(df))) # stopifnot(all(colnames(df) %in% (c("sample","feature","value","group","view")))) stopifnot(all(is.numeric(df$value))) # Convert 'sample' and 'feature' columns to factors if (!is.factor(df$sample)) df$sample <- as.factor(df$sample) if (!is.factor(df$feature)) df$feature <- as.factor(df$feature) # Convert 'group' columns to factors if (!"group" %in% colnames(df)) { df$group <- factor("group1") } else { df$group <- factor(df$group) } # Convert 'view' columns to factors if (!"view" %in% colnames(df)) { df$view <- factor("view1") } else { df$view <- factor(df$view) } data_matrix <- list() for (m in levels(df$view)) { data_matrix[[m]] <- list() features <- as.character( unique( df[df$view==m,"feature",drop=TRUE] ) ) for (g in levels(df$group)) { samples <- as.character( unique( df[df$group==g,"sample",drop=TRUE] ) ) Y <- df[df$view==m & df$group==g,] Y$sample <- factor(Y$sample, levels=samples) Y$feature <- factor(Y$feature, levels=features) if (nrow(Y)==0) { data_matrix[[m]][[g]] <- matrix(as.numeric(NA), ncol=length(samples), nrow=length(features)) rownames(data_matrix[[m]][[g]]) <- features colnames(data_matrix[[m]][[g]]) <- samples } else { data_matrix[[m]][[g]] <- .df_to_matrix( reshape2::dcast(Y, feature~sample, value.var="value", fill=NA, drop=FALSE) ) } } } # Create MOFA object object <- new("MOFA") object@status <- "untrained" object@data <- data_matrix # Set dimensionalities object@dimensions[["M"]] <- length(levels(df$view)) object@dimensions[["D"]] <- sapply(levels(df$view), function(m) length(unique(df[df$view==m,]$feature))) object@dimensions[["G"]] <- length(levels(df$group)) object@dimensions[["N"]] <- sapply(levels(df$group), function(g) length(unique(df[df$group==g,]$sample))) object@dimensions[["K"]] <- 0 # Set view names views_names(object) <- levels(df$view) # Set group names groups_names(object) <- levels(df$group) # save other sample-level columns to samples metadata (e.g. covariates) if(extract_metadata && !all(colnames(df) %in% (c("sample","feature","value","group","view")))) { cols2keep <- df %>% group_by(sample) %>% select(-c("view", "feature", "value", "group", "value")) %>% summarise(across(!starts_with("sample"), function(x) length(unique(x)), .names = "{col}")) cols2keep <- colnames(cols2keep)[apply(cols2keep, 2, function(x) all(x == 1))] if (length(cols2keep) > 0){ df_meta <- df[, c("sample",cols2keep)] %>% distinct() object@samples_metadata <- df_meta %>% select(-sample) rownames(object@samples_metadata) <- df_meta$sample } } # Create sample metadata object <- .create_samples_metadata(object) # Create features metadata object <- .create_features_metadata(object) # Rename duplicated features object <- .rename_duplicated_features(object) # Do quality control object <- .quality_control(object) return(object) } #' @title create a MOFA object from a SingleCellExperiment object #' @name create_mofa_from_SingleCellExperiment #' @description Method to create a \code{\link{MOFA}} object from a SingleCellExperiment object #' @param sce SingleCellExperiment object #' @param groups a string specifying column name of the colData to use it as a group variable. #' Alternatively, a character vector with group assignment for every sample. #' Default is \code{NULL} (no group structure). #' @param assay assay to use, default is \code{logcounts}. #' @param extract_metadata logical indicating whether to incorporate the metadata from the SingleCellExperiment object into the MOFA object #' @return Returns an untrained \code{\link{MOFA}} object #' @export create_mofa_from_SingleCellExperiment <- function(sce, groups = NULL, assay = "logcounts", extract_metadata = FALSE) { # Check is SingleCellExperiment is installed if (!requireNamespace("SingleCellExperiment", quietly = TRUE)) { stop("Package \"SingleCellExperiment\" is required but is not installed.", call. = FALSE) } else if(!requireNamespace("SummarizedExperiment", quietly = TRUE)){ stop("Package \"SummarizedExperiment\" is required but is not installed.", call. = FALSE) } else { stopifnot(assay%in%names(SummarizedExperiment::assays(sce))) # Define groups of cells if (is.null(groups)) { # message("No groups provided as argument... we assume that all samples are coming from the same group.\n") groups <- rep("group1", dim(sce)[2]) } else { if (is(groups,'character')) { if (length(groups) == 1) { stopifnot(groups %in% colnames(colData(sce))) groups <- colData(sce)[,groups] } else { stopifnot(length(groups) == ncol(sce)) } } else { stop("groups wrongly specified. Please see the documentation and the examples") } } # Extract data matrices data_matrices <- list( .split_sce_into_groups(sce, groups, assay) ) names(data_matrices) <- assay # Create MOFA object object <- new("MOFA") object@status <- "untrained" object@data <- data_matrices # Define dimensions object@dimensions[["M"]] <- length(assay) object@dimensions[["D"]] <- vapply(data_matrices, function(m) nrow(m[[1]]), 1L) object@dimensions[["G"]] <- length(data_matrices[[1]]) object@dimensions[["N"]] <- vapply(data_matrices[[1]], function(g) ncol(g), 1L) object@dimensions[["K"]] <- 0 # Set views & groups names groups_names(object) <- as.character(names(data_matrices[[1]])) views_names(object) <- assay # Set metadata if (extract_metadata) { object@samples_metadata <- as.data.frame(colData(sce)) # object@features_metadata <- as.data.frame(rowData(sce)) } # Create sample metadata object <- .create_samples_metadata(object) # Create features metadata object <- .create_features_metadata(object) # Rename duplicated features object <- .rename_duplicated_features(object) # Do quality control object <- .quality_control(object) return(object) } } #' @title create a MOFA object from a Seurat object #' @name create_mofa_from_Seurat #' @description Method to create a \code{\link{MOFA}} object from a Seurat object #' @param seurat Seurat object #' @param groups a string specifying column name of the samples metadata to use it as a group variable. #' Alternatively, a character vector with group assignment for every sample. #' Default is \code{NULL} (no group structure). #' @param assays assays to use, default is \code{NULL}, it fetched all assays available #' @param layer layer to be used (default is data). #' @param features a list with vectors, which are used to subset features, with names corresponding to assays; a vector can be provided when only one assay is used #' @param extract_metadata logical indicating whether to incorporate the metadata from the Seurat object into the MOFA object #' @return Returns an untrained \code{\link{MOFA}} object #' @export create_mofa_from_Seurat <- function(seurat, groups = NULL, assays = NULL, layer = "data", features = NULL, extract_metadata = FALSE) { # Check is Seurat is installed if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" is required but is not installed.", call. = FALSE) } else { # Check Seurat version if (SeuratObject::Version(seurat)$major != 5) stop("Please install Seurat v5") # Define assays if (is.null(assays)) { assays <- SeuratObject::Assays(seurat) message(paste0("No assays specified, using all assays by default: ", paste(assays,collapse=" "))) } else { stopifnot(assays%in%Seurat::Assays(seurat)) } # Define groups of cells if (is(groups, 'character') && (length(groups) == 1)) { if (!(groups %in% colnames(seurat@meta.data))) stop(paste0(groups, " is not found in the Seurat@meta.data.\n", "please ensure to provide a column name that exists. The columns of meta data are:\n", paste0(colnames(seurat@meta.data), sep = ", "))) groups <- seurat@meta.data[,groups] } # If features to subset are provided, # make sure they are a list with respective views (assays) names. # A vector is accepted if there's one assay to be used if (is(features, "list")) { if (!is.null(features) && !all(names(features) %in% assays)) { stop("Please make sure all the names of the features list correspond to views (assays) names being used for the model") } } else { # By default select highly variable features if present in the Seurat object if (is.null(features)) { message("No features specified, using variable features from the Seurat object...") features <- lapply(assays, function(i) seurat@assays[[i]]@var.features) names(features) <- assays if (any(sapply(features,length)==0)) stop("No list of features provided and variable features not detected in the Seurat object") } else if (all(is(features, "character"))) { features <- list(features) names(features) <- assays } else { stop("Features not recognised. Please either provide a list of features (per assay) or calculate variable features in the Seurat object") } } # If no groups provided, treat all samples as coming from one group if (is.null(groups)) { # message("No groups provided as argument... we assume that all samples are coming from the same group.\n") groups <- rep("group1", dim(seurat)[2]) } # Extract data matrices data_matrices <- lapply(assays, function(i) .split_seurat_into_groups(seurat, groups = groups, assay = i, layer = layer, features = features[[i]])) names(data_matrices) <- assays # Create MOFA object object <- new("MOFA") object@status <- "untrained" object@data <- data_matrices # Define dimensions object@dimensions[["M"]] <- length(assays) object@dimensions[["D"]] <- vapply(data_matrices, function(m) nrow(m[[1]]), 1L) object@dimensions[["G"]] <- length(data_matrices[[1]]) object@dimensions[["N"]] <- vapply(data_matrices[[1]], function(g) ncol(g), 1L) object@dimensions[["K"]] <- 0 # Set views & groups names groups_names(object) <- as.character(names(data_matrices[[1]])) views_names(object) <- assays # Set metadata if (extract_metadata) { object@samples_metadata <- seurat@meta.data # object@features_metadata <- do.call(rbind, lapply(assays, function(a) seurat@assays[[a]]@meta.features)) } # Create sample metadata object <- .create_samples_metadata(object) # Create features metadata object <- .create_features_metadata(object) # Rename duplicated features object <- .rename_duplicated_features(object) # Do quality control object <- .quality_control(object) return(object) } } #' @title create a MOFA object from a a list of matrices #' @name create_mofa_from_matrix #' @description Method to create a \code{\link{MOFA}} object from a list of matrices #' @param data A list of matrices, where each entry corresponds to one view. #' Samples are stored in columns and features in rows. #' Missing values must be filled in prior to creating the MOFA object (see for example the CLL tutorial) #' @param groups A character vector with group assignment for every sample. Default is \code{NULL}, no group structure. #' @return Returns an untrained \code{\link{MOFA}} object #' @export #' @examples #' m <- make_example_data() #' create_mofa_from_matrix(m$data) create_mofa_from_matrix <- function(data, groups = NULL) { # Quality control: check that the matrices are all numeric stopifnot(all(sapply(data, function(g) all(is.numeric(g)))) || all(sapply(data, function(x) class(x) %in% c("dgTMatrix", "dgCMatrix")))) # Quality control: check that all matrices have the same samples tmp <- lapply(data, function(m) colnames(m)) if(length(unique(sapply(tmp,length)))>1) stop("Views have different number of samples (columns)... please make sure that all views contain the same samples in the same order (see documentation)") if (length(unique(tmp))>1) stop("Views have different sample names (columns)... please make sure that all views contain the same samples in the same order (see documentation)") # Make a dgCMatrix out of dgTMatrix if (all(sapply(data, function(x) is(x, "dgTMatrix")))) { data <- lapply(data, function(m) as(m, "dgCMatrix")) } # Set groups names if (is.null(groups)) { # message("No groups provided as argument... we assume that all samples are coming from the same group.\n") groups <- rep("group1", ncol(data[[1]])) } # Set views names if (is.null(names(data))) { default_views <- paste0("view_", seq_len(length(data))) message(paste0("View names are not specified in the data, using default: ", paste(default_views, collapse=", "), "\n")) names(data) <- default_views } views_names <- as.character(names(data)) # Initialise MOFA object object <- new("MOFA") object@status <- "untrained" object@data <- .split_data_into_groups(data, groups) # groups_names <- as.character(unique(groups)) groups_names <- names(object@data[[1]]) # Set dimensionalities object@dimensions[["M"]] <- length(data) object@dimensions[["G"]] <- length(groups_names) object@dimensions[["D"]] <- sapply(data, nrow) object@dimensions[["N"]] <- sapply(groups_names, function(x) sum(groups == x)) object@dimensions[["K"]] <- 0 # Set features names for (m in seq_len(length(data))) { if (is.null(rownames(data[[m]]))) { warning(sprintf("Feature names are not specified for view %d, using default: feature1_v%d, feature2_v%d...", m, m, m)) for (g in seq_len(length(object@data[[m]]))) { rownames(object@data[[m]][[g]]) <- paste0("feature_", seq_len(nrow(object@data[[m]][[g]])), "_v", m) } } } # Set samples names for (g in seq_len(object@dimensions[["G"]])) { if (is.null(colnames(object@data[[1]][[g]]))) { warning(sprintf("Sample names for group %d are not specified, using default: sample1_g%d, sample2_g%d,...", g, g, g)) for (m in seq_len(object@dimensions[["M"]])) { colnames(object@data[[m]][[g]]) <- paste0("sample_", seq_len(ncol(object@data[[m]][[g]])), "_g", g) } } } # Set view names views_names(object) <- views_names # Set samples group names groups_names(object) <- groups_names # Create sample metadata object <- .create_samples_metadata(object) # Create features metadata object <- .create_features_metadata(object) # Rename duplicated features object <- .rename_duplicated_features(object) # Do quality control object <- .quality_control(object) return(object) } # (Hidden) function to split a list of matrices into a nested list of matrices .split_data_into_groups <- function(data, groups) { group_indices <- split(seq_along(groups), factor(groups, exclude = character(0))) # factor call avoids dropping NA lapply(data, function(x) { lapply(group_indices, function(idx) { x[, idx, drop = FALSE] }) }) } # (Hidden) function to split data in Seurat object into a list of matrices .split_seurat_into_groups <- function(seurat, groups, assay = "RNA", layer = "data", features = NULL) { data <- SeuratObject::GetAssayData(object = seurat, assay = assay, layer = layer) if(is.null(data) | any(dim(data) == 0)){ stop(paste("No data present in the layer",layer, "of the assay",assay ,"in the Seurat object.")) } if (!is.null(features)) data <- data[features, , drop=FALSE] .split_data_into_groups(list(data), groups)[[1]] } # (Hidden) function to split data in a SingleCellExperiment object into a list of matrices .split_sce_into_groups <- function(sce, groups, assay) { if(!requireNamespace("SummarizedExperiment", quietly = TRUE)){ stop("Package \"SummarizedExperiment\" is required but is not installed.", call. = FALSE) } else { data <- SummarizedExperiment::assay(sce, i = assay) .split_data_into_groups(list(data), groups)[[1]] } } # (Hidden) function to fill NAs for missing samples .subset_augment<-function(mat, samp) { samp <- unique(samp) mat <- t(mat) aug_mat<-matrix(NA, ncol=ncol(mat), nrow=length(samp)) aug_mat<-mat[match(samp,rownames(mat)),,drop=FALSE] rownames(aug_mat)<-samp colnames(aug_mat)<-colnames(mat) return(t(aug_mat)) } .df_to_matrix <- function(x) { m <- as.matrix(x[,-1]) rownames(m) <- x[[1]] if (ncol(m) == 1) colnames(m) <- colnames(x)[2:ncol(x)] m } .create_samples_metadata <- function(object) { # TO-DO: CHECK SAMPLE AND GROUP COLUMN IN PROVIDED METADATA foo <- lapply(object@data[[1]], colnames) tmp <- data.frame( sample = unname(unlist(foo)), group = unlist(lapply(names(foo), function(x) rep(x, length(foo[[x]])) )), stringsAsFactors = FALSE ) if (.hasSlot(object, "samples_metadata") && (length(object@samples_metadata) > 0)) { object@samples_metadata <- cbind(tmp, object@samples_metadata[match(tmp$sample, rownames(object@samples_metadata)),, drop = FALSE]) } else { object@samples_metadata <- tmp } return(object) } .create_features_metadata <- function(object) { tmp <- data.frame( feature = unname(unlist(lapply(object@data, function(x) rownames(x[[1]])))), view = unlist(lapply(seq_len(object@dimensions$M), function(x) rep(views_names(object)[[x]], object@dimensions$D[[x]]) )), stringsAsFactors = FALSE ) if (.hasSlot(object, "features_metadata") && (length(object@features_metadata) > 0)) { object@features_metadata <- cbind(tmp, object@features_metadata[match(tmp$feature, rownames(object@features_metadata)),]) } else { object@features_metadata <- tmp } return(object) } .rename_duplicated_features <- function(object) { feature_names <- unname(unlist(lapply(object@data, function(x) rownames(x[[1]])))) duplicated_names <- unique(feature_names[duplicated(feature_names)]) if (length(duplicated_names)>0) warning("There are duplicated features names across different views. We will add the suffix *_view* only for those features Example: if you have both TP53 in mRNA and mutation data it will be renamed to TP53_mRNA, TP53_mutation") # Rename data matrices for (m in names(object@data)) { for (g in names(object@data[[m]])) { tmp <- which(rownames(object@data[[m]][[g]]) %in% duplicated_names) if (length(tmp)>0) { rownames(object@data[[m]][[g]])[tmp] <- paste(rownames(object@data[[m]][[g]])[tmp], m, sep="_") } } } # Rename features metadata tmp <- object@features_metadata[["feature"]] %in% duplicated_names object@features_metadata[tmp,"feature"] <- paste(object@features_metadata[tmp,"feature"], object@features_metadata[tmp,"view"], sep="_") return(object) } ================================================ FILE: R/dimensionality_reduction.R ================================================ ################################################################## ## Functions to do dimensionality reduction on the MOFA factors ## ################################################################## #' @title Run t-SNE on the MOFA factors #' @name run_tsne #' @param object a trained \code{\link{MOFA}} object. #' @param factors character vector with the factor names, or numeric vector with the indices of the factors to use, or "all" to use all factors (default). #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use all groups (default). #' @param ... arguments passed to \code{\link{Rtsne}} #' @details This function calls \code{\link[Rtsne]{Rtsne}} to calculate a TSNE representation from the MOFA factors. #' Subsequently, you can plot the TSNE representation with \code{\link{plot_dimred}} or fetch the coordinates using \code{plot_dimred(..., method="TSNE", return_data=TRUE)}. #' Remember to use set.seed before the function call to get reproducible results. #' @return Returns a \code{\link{MOFA}} object with the \code{MOFAobject@dim_red} slot filled with the t-SNE output #' @importFrom Rtsne Rtsne #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Run #' \dontrun{ model <- run_tsne(model, perplexity = 15) } #' #' # Plot #' \dontrun{ model <- plot_dimred(model, method="TSNE") } #' #' # Fetch data #' \dontrun{ tsne.df <- plot_dimred(model, method="TSNE", return_data=TRUE) } #' run_tsne <- function(object, factors = "all", groups = "all", ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get factor values Z <- get_factors(object, factors=factors, groups=groups) # Concatenate groups Z <- do.call(rbind, Z) # Replace missing values by zero Z[is.na(Z)] <- 0 # Run t-SNE tsne_embedding <- Rtsne(Z, check_duplicates = FALSE, pca = FALSE, ...) # Add sample names and enumerate latent dimensions (e.g. TSNE1 and TSNE2) object@dim_red$TSNE <- data.frame(rownames(Z), tsne_embedding$Y) colnames(object@dim_red$TSNE) <- c("sample", paste0("TSNE", 1:ncol(tsne_embedding$Y))) return(object) } #' @title Run UMAP on the MOFA factors #' @name run_umap #' @param object a trained \code{\link{MOFA}} object. #' @param factors character vector with the factor names, or numeric vector with the indices of the factors to use, or "all" to use all factors (default). #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use all groups (default). #' @param n_neighbors number of neighbouring points used in local approximations of manifold structure. Larger values will result in more global structure being preserved at the loss of detailed local structure. In general this parameter should often be in the range 5 to 50. #' @param min_dist This controls how tightly the embedding is allowed compress points together. Larger values ensure embedded points are more evenly distributed, while smaller values allow the algorithm to optimise more accurately with regard to local structure. Sensible values are in the range 0.01 to 0.5 #' @param metric choice of metric used to measure distance in the input space #' @param ... arguments passed to \code{\link[uwot]{umap}} #' @details This function calls \code{\link[uwot]{umap}} to calculate a UMAP representation from the MOFA factors #' For details on the hyperparameters of UMAP see the documentation of \code{\link[uwot]{umap}}. #' Subsequently, you can plot the UMAP representation with \code{\link{plot_dimred}} or fetch the coordinates using \code{plot_dimred(..., method="UMAP", return_data=TRUE)}. #' Remember to use set.seed before the function call to get reproducible results. #' @return Returns a \code{\link{MOFA}} object with the \code{MOFAobject@dim_red} slot filled with the UMAP output #' @importFrom uwot umap #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Change hyperparameters passed to umap #' \dontrun{ model <- run_umap(model, min_dist = 0.01, n_neighbors = 10) } #' # Plot #' \dontrun{ model <- plot_dimred(model, method="UMAP") } #' #' # Fetch data #' \dontrun{ umap.df <- plot_dimred(model, method="UMAP", return_data=TRUE) } #' run_umap <- function(object, factors = "all", groups = "all", n_neighbors = 30, min_dist = 0.3, metric = "cosine", ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get factor values Z <- get_factors(object, factors = factors, groups = groups) # Concatenate groups Z <- do.call(rbind, Z) # Replace missing values by zero Z[is.na(Z)] <- 0 # Run UMAP umap_embedding <- umap(Z, n_neighbors=n_neighbors, min_dist=min_dist, metric=metric, ...) # Add sample names and enumerate latent dimensions (e.g. UMAP1 and UMAP2) object@dim_red$UMAP <- data.frame(rownames(Z), umap_embedding) colnames(object@dim_red$UMAP) <- c("sample", paste0("UMAP", 1:ncol(umap_embedding))) return(object) } #' @title Plot dimensionality reduction based on MOFA factors #' @name plot_dimred #' @param object a trained \code{\link{MOFA}} object. #' @param method string indicating which method has been used for non-linear dimensionality reduction (either 'umap' or 'tsne') #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups. #' @param show_missing logical indicating whether to include samples for which \code{shape_by} or \code{color_by} is missing #' @param color_by specifies groups or values used to color the samples. This can be either: #' (1) a character giving the name of a feature present in the training data. #' (2) a character giving the same of a column present in the sample metadata. #' (3) a vector of the same length as the number of samples specifying discrete groups or continuous numeric values. #' @param shape_by specifies groups or values used to shape the samples. This can be either: #' (1) a character giving the name of a feature present in the training data, #' (2) a character giving the same of a column present in the sample metadata. #' (3) a vector of the same length as the number of samples specifying discrete groups. #' @param color_name name for color legend. #' @param shape_name name for shape legend. #' @param label logical indicating whether to label the medians of the clusters. Only if color_by is specified #' @param dot_size numeric indicating dot size. #' @param stroke numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically). #' @param alpha_missing numeric indicating dot transparency of missing data. #' @param legend logical indicating whether to add legend. #' @param return_data logical indicating whether to return the long data frame to plot instead of plotting #' @param rasterize logical indicating whether to rasterize plot using \code{\link[ggrastr]{geom_point_rast}} #' @param ... extra arguments passed to \code{\link{run_umap}} or \code{\link{run_tsne}}. #' @details This function plots dimensionality reduction projections that are stored in the \code{dim_red} slot. #' Typically this contains UMAP or t-SNE projections computed using \code{\link{run_tsne}} or \code{\link{run_umap}}, respectively. #' @return Returns a \code{ggplot2} object or a long data.frame (if return_data is TRUE) #' @import ggplot2 #' @importFrom dplyr filter #' @importFrom stats complete.cases #' @importFrom tidyr spread gather #' @importFrom magrittr %>% set_colnames #' @importFrom ggrepel geom_text_repel #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Run UMAP #' model <- run_umap(model) #' #' # Plot UMAP #' plot_dimred(model, method = "UMAP") #' #' # Plot UMAP, colour by Factor 1 values #' plot_dimred(model, method = "UMAP", color_by = "Factor1") #' #' # Plot UMAP, colour by the values of a specific feature #' plot_dimred(model, method = "UMAP", color_by = "feature_0_view_0") #' plot_dimred <- function(object, method = c("UMAP", "TSNE"), groups = "all", show_missing = TRUE, color_by = NULL, shape_by = NULL, color_name = NULL, shape_name = NULL, label = FALSE, dot_size = 1.5, stroke = NULL, alpha_missing = 1, legend = TRUE, rasterize = FALSE, return_data = FALSE, ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # If UMAP or TSNE is requested but were not computed, compute the requested embedding if ((method %in% c("UMAP", "TSNE")) && (!.hasSlot(object, "dim_red") || !(method %in% names(object@dim_red)))) { message(paste0(method, " embedding was not computed. Running run_", tolower(method), "()...")) if (method == "UMAP") { object <- run_umap(object, ...) } else if (method == "TSNE") { object <- run_tsne(object, ...) } } # make sure the slot for the requested method exists method <- match.arg(method, names(object@dim_red)) # Plotting multiple features if (length(color_by)>1) { .args <- as.list(match.call()[-1]) plist <- lapply(color_by, function(i) { .args[["color_by"]] <- i do.call(plot_dimred, .args) }) p <- cowplot::plot_grid(plotlist=plist) return(p) } # Remember color_name and shape_name if not provided if (!is.null(color_by) && (length(color_by) == 1) && is.null(color_name)) color_name <- color_by if (!is.null(shape_by) && (length(shape_by) == 1) && is.null(shape_name)) shape_name <- shape_by # Fetch latent manifold Z <- object@dim_red[[method]] latent_dimensions_names <- colnames(Z)[-1] Z <- gather(Z, -sample, key="latent_dimension", value="value") # Subset groups groups <- .check_and_get_groups(object, groups) Z <- Z[Z$sample%in%unlist(samples_names(object)[groups]),] # Set color and shape color_by <- .set_colorby(object, color_by) shape_by <- .set_shapeby(object, shape_by) # Merge factor values with color and shape information df <- merge(Z, color_by, by="sample") df <- merge(df, shape_by, by="sample") df$shape_by <- as.character(df$shape_by) # Remove missing values if(!show_missing) df <- filter(df, !is.na(color_by) & !is.na(shape_by)) df$observed <- as.factor(!is.na(df$color_by)) # spread over latent dimensions df <- spread(df, key="latent_dimension", value="value") df <- set_colnames(df, c(colnames(df)[seq_len(4)], "x", "y")) # Return data if requested instead of plotting if (return_data) return(df) # Set stroke if (is.null(stroke)) if (length(unique(df$sample))<1000) { stroke <- 0.5 } else { stroke <- 0 } # Generate plot p <- ggplot(df, aes(x = .data$x, y = .data$y)) + labs(x = latent_dimensions_names[1], y = latent_dimensions_names[2]) + theme_classic() + theme( axis.text = element_blank(), axis.title = element_blank(), axis.line = element_line(color = "black", linewidth = 0.5), axis.ticks = element_blank() ) # Add dots if (rasterize) { message("for rasterizing the plot we use ggrastr::geom_point_rast()") p <- p + ggrastr::geom_point_rast(aes(fill = .data$color_by, shape = .data$shape_by, alpha = .data$observed), size = dot_size, stroke = stroke) } else { p <- p + geom_point(aes(fill = .data$color_by, shape = .data$shape_by, alpha = .data$observed), size = dot_size, stroke = stroke) } # Add legend for alpha if (length(unique(df$observed))>1) { p <- p + scale_alpha_manual(values = c("TRUE"=1.0, "FALSE"=alpha_missing)) } else { p <- p + scale_alpha_manual(values = 1.0) } p <- p + guides(alpha="none") # Label clusters if (label && length(unique(df$color_by)) > 1 && length(unique(df$color_by))<50) { groups <- unique(df$color_by) labels.loc <- lapply( X = groups, FUN = function(i) { data.use <- df[df[,"color_by"] == i, , drop = FALSE] data.medians <- as.data.frame(x = t(x = apply(X = data.use[, c("x","y"), drop = FALSE], MARGIN = 2, FUN = median, na.rm = TRUE))) data.medians[, "color_by"] <- i return(data.medians) } ) %>% do.call("rbind",.) p <- p + geom_text_repel(aes(label=.data$color_by), data=labels.loc) } # Add legend p <- .add_legend(p, df, legend, color_name, shape_name) return(p) } ================================================ FILE: R/enrichment.R ================================================ ########################################################## ## Functions to perform Feature Set Enrichment Analysis ## ########################################################## #' @title Run feature set Enrichment Analysis #' @name run_enrichment #' @description Method to perform feature set enrichment analysis. Here we use a slightly modified version of the \code{\link[PCGSE]{pcgse}} function. #' @param object a \code{\link{MOFA}} object. #' @param view a character with the view name, or a numeric vector with the index of the view to use. #' @param feature.sets data structure that holds feature set membership information. #' Must be a binary membership matrix (rows are feature sets and columns are features). See details below for some pre-built gene set matrices. #' @param factors character vector with the factor names, or numeric vector with the index of the factors for which to perform the enrichment. #' @param set.statistic the set statistic computed from the feature statistics. Must be one of the following: "mean.diff" (default) or "rank.sum". #' @param statistical.test the statistical test used to compute the significance of the feature set statistics under a competitive null hypothesis. #' Must be one of the following: "parametric" (default), "cor.adj.parametric", "permutation". #' @param sign use only "positive" or "negative" weights. Default is "all". #' @param min.size Minimum size of a feature set (default is 10). #' @param nperm number of permutations. Only relevant if statistical.test is set to "permutation". Default is 1000 #' @param p.adj.method Method to adjust p-values factor-wise for multiple testing. Can be any method in p.adjust.methods(). Default uses Benjamini-Hochberg procedure. #' @param alpha FDR threshold to generate lists of significant pathways. Default is 0.1 #' @param verbose boolean indicating whether to print messages on progress #' @details #' The aim of this function is to relate each factor to pre-defined biological pathways by performing a gene set enrichment analysis on the feature weights. \cr #' This function is particularly useful when a factor is difficult to characterise based only on the genes with the highest weight. \cr #' We provide a few pre-built gene set matrices in the MOFAdata package. See \code{https://github.com/bioFAM/MOFAdata} for details. \cr #' The function we implemented is based on the \code{\link[PCGSE]{pcgse}} function with some modifications. #' Please read this paper https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4543476 for details on the math. #' @return a list with five elements: #' \item{\strong{pval}:}{ matrices with nominal p-values. } #' \item{\strong{pval.adj}:}{ matrices with FDR-adjusted p-values. } #' \item{\strong{feature.statistics}:}{ matrices with the local (feature-wise) statistics. } #' \item{\strong{set.statistics}:}{ matrices with the global (gene set-wise) statistics. } #' \item{\strong{sigPathways}}{ list with significant pathways per factor. } #' @importFrom stats p.adjust var p.adjust.methods #' @export run_enrichment <- function(object, view, feature.sets, factors = "all", set.statistic = c("mean.diff", "rank.sum"), statistical.test = c("parametric", "cor.adj.parametric", "permutation"), sign = c("all","positive","negative"), min.size = 10, nperm = 1000, p.adj.method = "BH", alpha = 0.1, verbose = TRUE) { # Quality control if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (!(is(feature.sets, "matrix") & all(feature.sets %in% c(0,1)))) stop("feature.sets has to be a list or a binary matrix.") # Define views view <- .check_and_get_views(object, view) # Define factors factors <- .check_and_get_factors(object, factors) # Parse inputs sign <- match.arg(sign) set.statistic <- match.arg(set.statistic) statistical.test <- match.arg(statistical.test) # Collect observed data data <- get_data(object, views = view, as.data.frame = FALSE)[[1]] if(is(data, "list")) data <- Reduce(cbind, data) # concatenate groups data <- t(data) # Collect relevant expectations W <- get_weights(object, views=view, factors=factors, scale = TRUE)[[1]] Z <- get_factors(object, factors=factors) if(is(Z, "list")) Z <- Reduce(rbind, Z) stopifnot(rownames(data) == rownames(Z)) # Remove features with no variance # if (statistical.test %in% c("cor.adj.parametric")) { idx <- apply(data,2, function(x) var(x,na.rm=TRUE))==0 if (sum(idx)>=1) { warning(sprintf("%d features were removed because they had no variance in the data.\n",sum(idx))) data <- data[,!idx] W <- W[!idx,] } # Check if some features do not intersect between the feature sets and the observed data and remove them features <- intersect(colnames(data),colnames(feature.sets)) if(length(features)== 0) stop("Feature names in feature.sets do not match feature names in model.") if(verbose) { message(sprintf("Intersecting features names in the model and the gene set annotation results in a total of %d features.",length(features))) } data <- data[,features] W <- W[features,,drop=FALSE] feature.sets <- feature.sets[,features] # Filter feature sets with small number of features feature.sets <- feature.sets[rowSums(feature.sets)>=min.size,] # Subset weights by sign if (sign=="positive") { W[W<0] <- 0 # W[W<0] <- NA } else if (sign=="negative") { W[W>0] <- 0 # W[W>0] <- NA W <- abs(W) } # Print options if(verbose) { message("\nRunning feature set Enrichment Analysis with the following options...\n", sprintf("View: %s \n", view), sprintf("Number of feature sets: %d \n", nrow(feature.sets)), sprintf("Set statistic: %s \n", set.statistic), sprintf("Statistical test: %s \n", statistical.test) ) if (sign%in%c("positive","negative")) message(sprintf("Subsetting weights with %s sign",sign)) if (statistical.test=="permutation") { message(sprintf("Number of permutations: %d", nperm)) } message("\n") } if (nperm<100) warning("A large number of permutations (at least 1000) is required for the permutation approach!\n") # Non-parametric permutation test if (statistical.test == "permutation") { null_dist_tmp <- lapply(seq_len(nperm), function(i) { print(sprintf("Running permutation %d/%d...",i,nperm)) perm <- sample(ncol(data)) # Permute rows of the weight matrix to obtain a null distribution W_null <- W[perm,] rownames(W_null) <- rownames(W) colnames(W_null) <- colnames(W) # Permute columns of the data matrix correspondingly (only matters for cor.adjusted test) data_null <- data[,perm] rownames(data_null) <- rownames(data) # Compute null (or background) statistic s.background <- .pcgse( data = data_null, prcomp.output = list(rotation=W_null, x=Z), pc.indexes = seq_along(factors), feature.sets = feature.sets, set.statistic = set.statistic, set.test = "parametric")$statistic return(abs(s.background)) }) null_dist <- do.call("rbind", null_dist_tmp) colnames(null_dist) <- factors # Compute foreground statistics results <- .pcgse( data = data, prcomp.output = list(rotation=W, x=Z), pc.indexes = seq_along(factors), feature.sets = feature.sets, set.statistic = set.statistic, set.test = "parametric") s.foreground <- results$statistic # Calculate p-values based on fraction true statistic per factor and feature set is larger than permuted xx <- array(unlist(null_dist_tmp), dim = c(nrow(null_dist_tmp[[1]]), ncol(null_dist_tmp[[1]]), length(null_dist_tmp))) ll <- lapply(seq_len(nperm), function(i) xx[,,i] > abs(s.foreground)) results$p.values <- Reduce("+",ll)/nperm # Parametric test } else { results <- .pcgse( data = data, prcomp.output = list(rotation=W, x=Z), pc.indexes = seq_along(factors), feature.sets = feature.sets, set.statistic = set.statistic, set.test = statistical.test ) } # Parse results pathways <- rownames(feature.sets) colnames(results$p.values) <- colnames(results$statistics) <- colnames(results$feature.statistics) <- factors rownames(results$p.values) <- rownames(results$statistics) <- pathways rownames(results$feature.statistics) <- colnames(data) # adjust for multiple testing if(!p.adj.method %in% p.adjust.methods) stop("p.adj.method needs to be an element of p.adjust.methods") adj.p.values <- apply(results$p.values, 2,function(lfw) p.adjust(lfw, method = p.adj.method)) # If we specify a direction, we are only interested in overrepresented pathways in the selected direction if (sign%in%c("positive","negative")) { results$p.values[results$statistics<0] <- 1.0 adj.p.values[results$statistics<0] <- 1.0 results$statistics[results$statistics<0] <- 0 } # If we specify a direction, we are only interested in overrepresented pathways in the selected direction if (sign%in%c("positive","negative")) { results$p.values[results$statistics<0] <- 1.0 adj.p.values[results$statistics<0] <- 1.0 results$statistics[results$statistics<0] <- 0 } # obtain list of significant pathways sigPathways <- lapply(factors, function(j) rownames(adj.p.values)[adj.p.values[,j] <= alpha]) # prepare output output <- list( feature.sets = feature.sets, pval = results$p.values, pval.adj = adj.p.values, feature.statistics = results$feature.statistics, set.statistics = results$statistics, sigPathways = sigPathways ) return(output) } ######################## ## Plotting functions ## ######################## #' @title Plot output of gene set Enrichment Analysis #' @name plot_enrichment #' @description Method to plot the results of the gene set Enrichment Analysis #' @param enrichment.results output of \link{run_enrichment} function #' @param factor a string with the factor name or an integer with the factor index #' @param alpha p.value threshold to filter out gene sets #' @param max.pathways maximum number of enriched pathways to display #' @param text_size text size #' @param dot_size dot size #' @details it requires \code{\link{run_enrichment}} to be run beforehand. #' @return a \code{ggplot2} object #' @import ggplot2 #' @importFrom utils head #' @export plot_enrichment <- function(enrichment.results, factor, alpha = 0.1, max.pathways = 25, text_size = 1.0, dot_size = 5.0) { # Sanity checks stopifnot(is.numeric(alpha)) stopifnot(length(factor)==1) if (is.numeric(factor)) factor <- colnames(enrichment.results$pval.adj)[factor] if(!factor %in% colnames(enrichment.results$pval)) stop(paste0("No gene set enrichment calculated for factor ", factor)) # get p-values p.values <- enrichment.results$pval.adj # Get data tmp <- data.frame( pvalues = p.values[,factor, drop=TRUE], pathway = rownames(p.values) ) # Filter out pathways tmp <- tmp[tmp$pvalue<=alpha,,drop=FALSE] if (nrow(tmp)==0) stop("No significant pathways at the specified alpha threshold") # If there are too many pathways enriched, just keep the 'max_pathways' more significant if (nrow(tmp)>max.pathways) tmp <- head(tmp[order(tmp$pvalue),],n=max.pathways) # Convert pvalues to log scale tmp$logp <- -log10(tmp$pvalue+1e-100) #order according to significance tmp$pathway <- factor(tmp$pathway <- rownames(tmp), levels = tmp$pathway[order(tmp$pvalue, decreasing = TRUE)]) tmp$start <- 0 p <- ggplot(tmp, aes(x=.data$pathway, y=.data$logp)) + geom_point(size=dot_size) + geom_hline(yintercept=-log10(alpha), linetype="longdash") + scale_color_manual(values=c("black","red")) + geom_segment(aes(xend=.data$pathway, yend=.data$start)) + ylab("-log pvalue") + coord_flip() + theme( axis.text.y = element_text(size=rel(text_size), hjust=1, color='black'), axis.text.x = element_text(size=rel(1.2), vjust=0.5, color='black'), axis.title.y=element_blank(), legend.position='none', panel.background = element_blank() ) return(p) } #' @title Heatmap of Feature Set Enrichment Analysis results #' @name plot_enrichment_heatmap #' @description This method generates a heatmap with the adjusted p.values that #' result from the the feature set enrichment analysis. Rows are feature sets and columns are factors. #' @param enrichment.results output of \link{run_enrichment} function #' @param alpha FDR threshold to filter out unsignificant feature sets which are #' not represented in the heatmap. Default is 0.10. #' @param cap cap p-values below this threshold #' @param log_scale logical indicating whether to plot the -log of the p.values. #' @param ... extra arguments to be passed to the \link{pheatmap} function #' @return produces a heatmap #' @importFrom pheatmap pheatmap #' @importFrom grDevices colorRampPalette #' @export plot_enrichment_heatmap <- function(enrichment.results, alpha = 0.1, cap = 1e-50, log_scale = TRUE, ...) { # get p-values p.values <- enrichment.results$pval.adj # remove factors that are full of NAs p.values <- p.values[,colMeans(is.na(p.values))<1] # cap p-values p.values[p.valuesmax.pathways) baz <- head(baz[order(baz$pvalue),],n=max.pathways) } # order pathways according to significance baz$pathway <- factor(baz$pathway, levels = baz$pathway[order(baz$pvalue, decreasing = TRUE)]) # Merge foobar <- merge(foo, bar, by="feature") tmp <- merge(foobar, baz, by="pathway") # Select the top N features with the largest feature.statistic (per pathway) tmp_filt <- top_n(group_by(tmp, pathway), n=max.genes, abs(feature.statistic)) # Add number of features and p-value per pathway pathways <- unique(tmp_filt$pathway) # Add Ngenes and p-values to the pathway name df <- data.frame(pathway=pathways, nfeatures=rowSums(feature.sets,na.rm=TRUE)[pathways]) df <- merge(df, baz, by="pathway") df$pathway_long_name <- sprintf("%s\n (Ngenes = %d) \n (p-val = %0.2g)",df$pathway, df$nfeatures, df$pvalue) tmp <- merge(tmp, df[,c("pathway","pathway_long_name")], by="pathway") tmp_filt <- merge(tmp_filt, df[,c("pathway","pathway_long_name")], by="pathway") # sort pathways by p-value order_pathways <- df$pathway_long_name[order(df$pvalue,decreasing=TRUE) ] tmp$pathway_long_name <- factor(tmp$pathway_long_name, levels=order_pathways) tmp_filt$pathway_long_name <- factor(tmp_filt$pathway_long_name, levels=order_pathways) p <- ggplot(tmp, aes(x=.data[["pathway_long_name"]], y=.data[["feature.statistic"]])) + geom_text_repel(aes(x=.data[["pathway_long_name"]], y=.data[["feature.statistic"]], label=.data$feature), size=text_size, color="black", force=1, data=tmp_filt) + geom_point(size=0.5, color="lightgrey") + geom_point(aes(x=.data[["pathway_long_name"]], y=.data[["feature.statistic"]]), size=1, color="black", data=tmp_filt) + labs(x="", y="Weight (scaled)", title="") + coord_flip() + theme( axis.line = element_line(color="black"), axis.text.y = element_text(size=rel(0.75), hjust=1, color='black'), axis.text.x = element_text(size=rel(1.0), vjust=0.5, color='black'), axis.title.y=element_blank(), legend.position='none', panel.background = element_blank() ) return(p) } ############################################################# ## Internal methods for enrichment analysis (not exported) ## ############################################################# # This is a modified version of the PCGSE module .pcgse = function(data, prcomp.output, feature.sets, pc.indexes, set.statistic, set.test) { # Sanity checks if (is.null(feature.sets)) stop("'feature.sets' must be specified!") if (!(set.statistic %in% c("mean.diff", "rank.sum"))) stop("set.statistic must be 'mean.diff' or 'rank.sum'") if (!(set.test %in% c("parametric", "cor.adj.parametric", "permutation"))) stop("set.test must be one of 'parametric', 'cor.adj.parametric', 'permutation'") # Turn the feature set matrix into list form set.indexes <- feature.sets if (is.matrix(feature.sets)) { set.indexes <- .createVarGroupList(var.groups=feature.sets) } # Compute the feature statistics. feature.statistics <- matrix(0, nrow=ncol(data), ncol=length(pc.indexes)) for (i in seq_along(pc.indexes)) { feature.statistics[,i] <- .compute_feature_statistics( data = data, prcomp.output = prcomp.output, pc.index = pc.indexes[i] ) } # Compute the set statistics. if (set.test == "parametric" || set.test == "cor.adj.parametric") { if (set.statistic == "mean.diff") { results <- .pcgse_ttest( data = data, prcomp.output = prcomp.output, pc.indexes = pc.indexes, set.indexes = set.indexes, feature.statistics = feature.statistics, cor.adjustment = (set.test == "cor.adj.parametric") ) } else if (set.statistic == "rank.sum") { results <- .pcgse_wmw( data = data, prcomp.output = prcomp.output, pc.indexes = pc.indexes, set.indexes = set.indexes, feature.statistics = feature.statistics, cor.adjustment = (set.test == "cor.adj.parametric") ) } } # Add feature.statistics to the results results$feature.statistics <- feature.statistics return (results) } # Turn the annotation matrix into a list of var group indexes for the valid sized var groups .createVarGroupList <- function(var.groups) { var.group.indexes <- list() for (i in seq_len(nrow(var.groups))) { member.indexes <- which(var.groups[i,]==1) var.group.indexes[[i]] <- member.indexes } names(var.group.indexes) <- rownames(var.groups) return (var.group.indexes) } # Computes the feature-level statistics .compute_feature_statistics <- function(data, prcomp.output, pc.index) { feature.statistics <- prcomp.output$rotation[,pc.index] feature.statistics <- vapply(feature.statistics, abs, numeric(1)) return (feature.statistics) } # Compute enrichment via t-test #' @importFrom stats pt var .pcgse_ttest <- function(data, prcomp.output, pc.indexes, set.indexes, feature.statistics, cor.adjustment) { num.feature.sets <- length(set.indexes) # Create matrix for p-values p.values <- matrix(0, nrow=num.feature.sets, ncol=length(pc.indexes)) rownames(p.values) <- names(set.indexes) # Create matrix for set statistics set.statistics <- matrix(TRUE, nrow=num.feature.sets, ncol=length(pc.indexes)) rownames(set.statistics) <- names(set.indexes) for (i in seq_len(num.feature.sets)) { indexes.for.feature.set <- set.indexes[[i]] m1 <- length(indexes.for.feature.set) not.set.indexes <- which(!(seq_len(ncol(data)) %in% indexes.for.feature.set)) m2 <- length(not.set.indexes) if (cor.adjustment) { # compute sample correlation matrix for members of feature set cor.mat <- cor(data[,indexes.for.feature.set], use = "complete.obs") # compute the mean pair-wise correlation mean.cor <- (sum(cor.mat) - m1)/(m1*(m1-1)) # compute the VIF, using CAMERA formula from Wu et al., based on Barry et al. vif <- 1 + (m1 -1)*mean.cor } for (j in seq_along(pc.indexes)) { # get the feature statistics for this PC pc.feature.stats <- feature.statistics[,j] # compute the mean difference of the feature-level statistics mean.diff <- mean(pc.feature.stats[indexes.for.feature.set],na.rm=TRUE) - mean(pc.feature.stats[not.set.indexes], na.rm=TRUE) # compute the pooled standard deviation pooled.sd <- sqrt(((m1-1)*var(pc.feature.stats[indexes.for.feature.set], na.rm=TRUE) + (m2-1)*var(pc.feature.stats[not.set.indexes], na.rm=TRUE))/(m1+m2-2)) # compute the t-statistic if (cor.adjustment) { t.stat <- mean.diff/(pooled.sd*sqrt(vif/m1 + 1/m2)) df <- nrow(data)-2 } else { t.stat <- mean.diff/(pooled.sd*sqrt(1/m1 + 1/m2)) df <- m1+m2-2 } set.statistics[i,j] <- t.stat # compute the p-value via a two-sided test lower.p <- pt(t.stat, df=df, lower.tail=TRUE) upper.p <- pt(t.stat, df=df, lower.tail=FALSE) p.values[i,j] <- 2*min(lower.p, upper.p) } } # Build the result list results <- list() results$p.values <- p.values results$statistics <- set.statistics return (results) } # Compute enrichment via Wilcoxon Mann Whitney #' @importFrom stats wilcox.test pnorm .pcgse_wmw <- function(data, prcomp.output, pc.indexes, set.indexes, feature.statistics, cor.adjustment) { num.feature.sets <- length(set.indexes) # Create matrix for p-values p.values <- matrix(0, nrow=num.feature.sets, ncol=length(pc.indexes)) rownames(p.values) <- names(set.indexes) # Create matrix for set statistics set.statistics <- matrix(TRUE, nrow=num.feature.sets, ncol=length(pc.indexes)) rownames(set.statistics) <- names(set.indexes) for (i in seq_len(num.feature.sets)) { indexes.for.feature.set <- set.indexes[[i]] m1 <- length(indexes.for.feature.set) not.set.indexes <- which(!(seq_len(ncol(data)) %in% indexes.for.feature.set)) m2 <- length(not.set.indexes) if (cor.adjustment) { # compute sample correlation matrix for members of feature set cor.mat <- cor(data[,indexes.for.feature.set], use="complete.obs") # compute the mean pair-wise correlation mean.cor <- (sum(cor.mat) - m1)/(m1*(m1-1)) } for (j in seq_along(pc.indexes)) { # get the feature-level statistics for this PC pc.feature.stats <- feature.statistics[,j] # compute the rank sum statistic feature-level statistics wilcox.results <- wilcox.test(x=pc.feature.stats[indexes.for.feature.set], y=pc.feature.stats[not.set.indexes], alternative="two.sided", exact=FALSE, correct=FALSE) rank.sum = wilcox.results$statistic if (cor.adjustment) { # Using correlation-adjusted formula from Wu et al. var.rank.sum <- ((m1*m2)/(2*pi))* (asin(1) + (m2 - 1)*asin(.5) + (m1-1)*(m2-1)*asin(mean.cor/2) +(m1-1)*asin((mean.cor+1)/2)) } else { var.rank.sum <- m1*m2*(m1+m2+1)/12 } z.stat <- (rank.sum - (m1*m2)/2)/sqrt(var.rank.sum) set.statistics[i,j] <- z.stat # compute the p-value via a two-sided z-test lower.p <- pnorm(z.stat, lower.tail=TRUE) upper.p <- pnorm(z.stat, lower.tail=FALSE) p.values[i,j] <- 2*min(lower.p, upper.p) } } # Build the result list results <- list() results$p.values <- p.values results$statistics <- set.statistics return (results) } ================================================ FILE: R/get_methods.R ================================================ ################################################ ## Get functions to fetch data from the model ## ################################################ #' @title Get dimensions #' @name get_dimensions #' @description Extract dimensionalities from the model. #' @details K indicates the number of factors, M indicates the number of views, D indicates the number of features (per view), #' N indicates the number of samples (per group) and C indicates the number of covariates. #' @param object a \code{\link{MOFA}} object. #' @return list containing the dimensionalities of the model #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' dims <- get_dimensions(model) get_dimensions <- function(object) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") return(object@dimensions) } #' @title Get ELBO #' @name get_elbo #' @description Extract the value of the ELBO statistics after model training. This can be useful for model selection. #' @details This can be useful for model selection. #' @param object a \code{\link{MOFA}} object. #' @return Value of the ELBO #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' elbo <- get_elbo(model) get_elbo <- function(object) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") return(max(object@training_stats$elbo, na.rm=TRUE)) } #' @title Get lengthscales #' @name get_lengthscales #' @description Extract the inferred lengthscale for each factor after model training. #' @details This can be used only if covariates are passed to the MOFAobject upon creation and GP_factors is set to True. #' @param object a \code{\link{MOFA}} object. #' @return A numeric vector containing the lengthscale for each factor. #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' ls <- get_lengthscales(model) get_lengthscales <- function(object) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if(!.hasSlot(object, "covariates") || is.null(object@covariates)) stop("No covariates specified in 'object'") if(is.null(object@training_stats$length_scales)) stop("No lengthscales saved in 'object' \n Make sure you specify the covariates and train setting the option 'GP_factors' to TRUE.") tmp <- object@training_stats$length_scales return(tmp) } #' @title Get scales #' @name get_scales #' @description Extract the inferred scale for each factor after model training. #' @details This can be used only if covariates are passed to the MOFAobject upon creation and GP_factors is set to True. #' @param object a \code{\link{MOFA}} object. #' @return A numeric vector containing the scale for each factor. #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' s <- get_scales(model) get_scales <- function(object) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if(!.hasSlot(object, "covariates") || is.null(object@covariates)) stop("No covariates specified in 'object'") if(is.null(object@training_stats$scales)) stop("No scales saved in 'object' \n Make sure you specify the covariates and train setting the option 'GP_factors' to TRUE.") tmp <- object@training_stats$scales return(tmp) } #' @title Get group covariance matrix #' @name get_group_kernel #' @description Extract the inferred group-group covariance matrix per factor #' @details This can be used only if covariates are passed to the MOFAobject upon creation and GP_factors is set to True. #' @param object a \code{\link{MOFA}} object. #' @return A list of group-group correlation matrices per factor #' @export get_group_kernel <- function(object) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if(is.null(object@covariates)) stop("No covariates specified in 'object'") if (is.null(object@mefisto_options)) stop("'object' does have MEFISTO training options.") if(!object@mefisto_options$model_groups || object@dimensions$G == 1) { tmp <- lapply(seq_len(dim(object@training_stats$Kg)[3]), function(x) { mat <- matrix(1, nrow = object@dimensions$G, ncol = object@dimensions$G) rownames(mat) <- colnames(mat) <- groups_names(object) mat }) } else { if(is.null(object@training_stats$Kg)) stop("No group kernel saved in 'object' \n Make sure you specify the covariates and train setting the option 'model_groups' to TRUE.") tmp <- lapply(seq_len(dim(object@training_stats$Kg)[3]), function(x) { mat <- object@training_stats$Kg[ , , x] rownames(mat) <- colnames(mat) <- groups_names(object) mat }) } names(tmp) <- factors_names(object) return(tmp) } #' @title Get interpolated factor values #' @name get_interpolated_factors #' @description Extract the interpolated factor values #' @details This can be used only if covariates are passed to the object upon creation, GP_factors is set to True and new covariates were passed for interpolation. #' @param object a \code{\link{MOFA}} object #' @param as.data.frame logical indicating whether to return data as a data.frame #' @param only_mean logical indicating whether include only mean or also uncertainties #' @return By default, a nested list containing for each group a list with a matrix with the interpolated factor values ("mean"), #' their variance ("variance") and the values of the covariate at which interpolation took place ("new_values"). #' Alternatively, if \code{as.data.frame} is \code{TRUE}, returns a long-formatted data frame with columns containing the covariates #' and (factor, group, mean and variance). #' @import dplyr #' @import reshape2 #' @export get_interpolated_factors <- function(object, as.data.frame = FALSE, only_mean = FALSE) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if(is.null(object@interpolated_Z)) stop("No interpolated factors present in 'object'") if(length(object@interpolated_Z) == 0) stop("No interpolated factors present in 'object'") if(!as.data.frame){ return(object@interpolated_Z) } else { type <- NULL preds <- lapply(object@interpolated_Z, function(l) l[names(l)[names(l) != "new_values"]]) df_interpol <- reshape2::melt(preds, varnames = c("factor", "sample_id")) df_interpol <- dplyr::rename(df_interpol, group = L1, type = L2) if(only_mean){ df_interpol <- filter(df_interpol, type == "mean") } if("new_values" %in% names(object@interpolated_Z[[1]])) { new_vals <- lapply(object@interpolated_Z, function(l) l[names(l)[names(l) == "new_values"]]) new_vals <- reshape2::melt(new_vals, varnames = c("covariate","sample_id")) new_vals <- mutate(new_vals, covariate = covariates_names(object)) new_vals <- rename(new_vals, group = L1, covariate_value = value) new_vals <- spread(new_vals, key = covariate, value = covariate_value) new_vals <- select(new_vals, -L2) df_interpol <- left_join(df_interpol, new_vals, by = c("group", "sample_id")) df_interpol <- select(df_interpol, -sample_id) } else { # compatibility to older objects df_interpol <- rename(df_interpol, covariate_value = sample_id) df_interpol <- mutate(df_interpol, covariate = covariates_names(object)) } df_interpol <- mutate(df_interpol, factor = factors_names(object)[factor]) df_interpol <- spread(df_interpol, key = type, value = value) return(df_interpol) } } #' @title Get factors #' @name get_factors #' @description Extract the latent factors from the model. #' @param object a trained \code{\link{MOFA}} object. #' @param factors character vector with the factor name(s), or numeric vector with the factor index(es). #' Default is "all". #' @param groups character vector with the group name(s), or numeric vector with the group index(es). #' Default is "all". #' @param scale logical indicating whether to scale factor values. #' @param as.data.frame logical indicating whether to return a long data frame instead of a matrix. #' Default is \code{FALSE}. #' @return By default it returns the latent factor matrix of dimensionality (N,K), where N is number of samples and K is number of factors. \cr #' Alternatively, if \code{as.data.frame} is \code{TRUE}, returns a long-formatted data frame with columns (sample,factor,value). #' @export #' #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Fetch factors in matrix format (a list, one matrix per group) #' factors <- get_factors(model) #' #' # Concatenate groups #' factors <- do.call("rbind",factors) #' #' # Fetch factors in data.frame format instead of matrix format #' factors <- get_factors(model, as.data.frame = TRUE) get_factors <- function(object, groups = "all", factors = "all", scale = FALSE, as.data.frame = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get groups groups <- .check_and_get_groups(object, groups) # Get factors factors <- .check_and_get_factors(object, factors) # Collect factors Z <- get_expectations(object, "Z", as.data.frame) if (as.data.frame) { Z <- Z[Z$factor%in%factors & Z$group%in%groups,] if (scale) Z$value <- Z$value/max(abs(Z$value),na.rm=TRUE) } else { Z <- lapply(Z[groups], function(z) z[,factors, drop=FALSE]) if (scale) Z <- lapply(Z, function(x) x/max(abs(x)) ) names(Z) <- groups } return(Z) } #' @title Get weights #' @name get_weights #' @description Extract the weights from the model. #' @param object a trained \code{\link{MOFA}} object. #' @param views character vector with the view name(s), or numeric vector with the view index(es). #' Default is "all". #' @param factors character vector with the factor name(s) or numeric vector with the factor index(es). \cr #' Default is "all". #' @param abs logical indicating whether to take the absolute value of the weights. #' @param scale logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if \code{abs=TRUE}). #' @param as.data.frame logical indicating whether to return a long data frame instead of a list of matrices. #' Default is \code{FALSE}. #' @return By default it returns a list where each element is a loading matrix with dimensionality (D,K), #' where D is the number of features and K is the number of factors. \cr #' Alternatively, if \code{as.data.frame} is \code{TRUE}, returns a long-formatted data frame with columns (view,feature,factor,value). #' @export #' #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Fetch weights in matrix format (a list, one matrix per view) #' weights <- get_weights(model) #' #' # Fetch weights for factor 1 and 2 and view 1 #' weights <- get_weights(model, views = 1, factors = c(1,2)) #' #' # Fetch weights in data.frame format #' weights <- get_weights(model, as.data.frame = TRUE) get_weights <- function(object, views = "all", factors = "all", abs = FALSE, scale = FALSE, as.data.frame = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get views views <- .check_and_get_views(object, views) factors <- .check_and_get_factors(object, factors) # Fetch weights weights <- get_expectations(object, "W", as.data.frame) if (as.data.frame) { weights <- weights[weights$view %in% views & weights$factor %in% factors, ] if (abs) weights$value <- abs(weights$value) if (scale) weights$value <- weights$value/max(abs(weights$value)) } else { weights <- lapply(weights[views], function(x) x[,factors,drop=FALSE]) if (abs) weights <- lapply(weights, abs) if (scale) weights <- lapply(weights, function(x) x/max(abs(x)) ) names(weights) <- views } return(weights) } #' @title Get data #' @name get_data #' @description Fetch the input data #' @param object a \code{\link{MOFA}} object. #' @param views character vector with the view name(s), or numeric vector with the view index(es). #' Default is "all". #' @param groups character vector with the group name(s), or numeric vector with the group index(es). #' Default is "all". #' @param features a *named* list of character vectors. Example: list("view1"=c("feature_1","feature_2"), "view2"=c("feature_3","feature_4")) #' Default is "all". #' @param as.data.frame logical indicating whether to return a long data frame instead of a list of matrices. Default is \code{FALSE}. #' @param add_intercept logical indicating whether to add feature intercepts to the data. Default is \code{TRUE}. #' @param denoise logical indicating whether to return the denoised data (i.e. the model predictions). Default is \code{FALSE}. #' @param na.rm remove NAs from the data.frame (only if as.data.frame is \code{TRUE}). #' @details By default this function returns a list where each element is a data matrix with dimensionality (D,N) #' where D is the number of features and N is the number of samples. \cr #' Alternatively, if \code{as.data.frame} is \code{TRUE}, the function returns a long-formatted data frame with columns (view,feature,sample,value). #' Missing values are not included in the the long data.frame format by default. To include them use the argument \code{na.rm=FALSE}. #' @return A list of data matrices with dimensionality (D,N) or a \code{data.frame} (if \code{as.data.frame} is TRUE) #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Fetch data #' data <- get_data(model) #' #' # Fetch a specific view #' data <- get_data(model, views = "view_0") #' #' # Fetch data in data.frame format instead of matrix format #' data <- get_data(model, as.data.frame = TRUE) #' #' # Fetch centered data (do not add the feature intercepts) #' data <- get_data(model, as.data.frame = FALSE) #' #' # Fetch denoised data (do not add the feature intercepts) #' data <- get_data(model, denoise = TRUE) get_data <- function(object, views = "all", groups = "all", features = "all", as.data.frame = FALSE, add_intercept = TRUE, denoise = FALSE, na.rm = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get views and groups views <- .check_and_get_views(object, views) groups <- .check_and_get_groups(object, groups) # Get features if (is(features, "list")) { if (is.null(names(features))) stop("features has to be a *named* list of character vectors. Please see the documentation") if (!(names(features)%in%views_names(object))) stop("Views not recognised") if (!all(sapply(names(features), function(i) all(features[[i]] %in% features_names(object)[[i]]) ))) stop("features not recognised") if (any(sapply(features,length)<1)) stop("features not recognised, please read the documentation") views <- names(features) } else { if (paste0(features, collapse="") == "all") { features <- features_names(object)[views] } else { stop("features not recognised, please read the documentation") } } # Fetch data if (denoise) { data <- predict(object, views=views, groups=groups) } else { data <- lapply(object@data[views], function(x) x[groups]) } data <- lapply(views, function(m) lapply(seq_len(length(data[[1]])), function(p) data[[m]][[p]][as.character(features[[m]]),,drop=FALSE])) data <- .name_views_and_groups(data, views, groups) # Add feature intercepts (only for gaussian likelihoods) tryCatch( { if (add_intercept & length(object@intercepts[[1]])>0) { intercepts <- lapply(object@intercepts[views], function(x) x[groups]) intercepts <- lapply(seq_len(length(intercepts)), function(m) lapply(seq_len(length(intercepts[[1]])), function(p) intercepts[[m]][[p]][as.character(features[[m]])])) intercepts <- .name_views_and_groups(intercepts, views, groups) for (m in names(data)) { if (object@model_options$likelihoods[[m]]=="gaussian") { for (g in names(data[[m]])) { data[[m]][[g]] <- data[[m]][[g]] + intercepts[[m]][[g]][as.character(features[[m]])] } } } } }, error = function(e) { NULL }) # Convert to long data frame if (as.data.frame) { tmp <- lapply(views, function(m) { lapply(groups, function(p) { tmp <- reshape2::melt(data[[m]][[p]], na.rm=na.rm) if(nrow(tmp) >0 & !is.null(tmp)) { colnames(tmp) <- c("feature", "sample", "value") tmp <- cbind(view = m, group = p, tmp) return(tmp) } }) }) data <- do.call(rbind, do.call(rbind, tmp)) factor.cols <- c("view","group","feature","sample") data[factor.cols] <- lapply(data[factor.cols], factor) } return(data) } #' @title Get imputed data #' @name get_imputed_data #' @description Function to get the imputed data. It requires the previous use of the \code{\link{impute}} method. #' @param object a trained \code{\link{MOFA}} object. #' @param views character vector with the view name(s), or numeric vector with the view index(es). #' Default is "all". #' @param groups character vector with the group name(s), or numeric vector with the group index(es). #' Default is "all". #' @param features list of character vectors with the feature names or list of numeric vectors with the feature indices. #' Default is "all". #' @param as.data.frame logical indicating whether to return a long-formatted data frame instead of a list of matrices. #' Default is \code{FALSE}. #' @details Data is imputed from the generative model of MOFA. #' @return A list containing the imputed valued or a data.frame if as.data.frame is TRUE #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' model <- impute(model) #' imputed <- get_imputed_data(model) get_imputed_data <- function(object, views = "all", groups = "all", features = "all", as.data.frame = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (length(object@imputed_data)==0) stop("imputed data not found, did you run: 'object <- impute(object)'?") # Get views and groups views <- .check_and_get_views(object, views) groups <- .check_and_get_groups(object, groups) # Get features if (is(features, "list")) { stopifnot(all(sapply(seq_len(length(features)), function(i) all(features[[i]] %in% features_names(object)[[views[i]]])))) stopifnot(length(features)==length(views)) if (is.null(names(features))) names(features) <- views } else { if (paste0(features, collapse="") == "all") { features <- features_names(object)[views] } else { stop("features not recognised, please read the documentation") } } # Fetch mean imputed_data <- lapply(object@imputed_data[views], function(x) x[groups] ) imputed_data <- lapply(seq_len(length(imputed_data)), function(m) lapply(seq_len(length(imputed_data[[1]])), function(p) imputed_data[[m]][[p]][as.character(features[[m]]),,drop=FALSE])) imputed_data <- .name_views_and_groups(imputed_data, views, groups) # Add feature intercepts # tryCatch( { # # if (add_intercept & length(object@intercepts[[1]])>0) { # intercepts <- lapply(object@intercepts[views], function(x) x[groups]) # intercepts <- .name_views_and_groups(intercepts, views, groups) # # for (m in names(imputed_data)) { # for (g in names(imputed_data[[m]])) { # imputed_data[[m]][[g]] <- imputed_data[[m]][[g]] + intercepts[[m]][[g]][as.character(features[[m]])] # } # } # } }, error = function(e) { NULL }) # Convert to long data frame if (isTRUE(as.data.frame)) { imputed_data <- lapply(views, function(m) { lapply(groups, function(g) { tmp <- reshape2::melt(imputed_data[[m]][[g]]) colnames(tmp) <- c("feature", "sample", "value") tmp <- cbind(view = m, group = g, tmp) return(tmp) }) }) imputed_data <- do.call(rbind, do.call(rbind, imputed_data)) factor.cols <- c("view","group","feature","sample") imputed_data[factor.cols] <- lapply(imputed_data[factor.cols], factor) } return(imputed_data) } #' @title Get expectations #' @name get_expectations #' @description Function to extract the expectations from the (variational) posterior distributions of a trained \code{\link{MOFA}} object. #' @param object a trained \code{\link{MOFA}} object. #' @param variable variable name: 'Z' for factors and 'W' for weights. #' @param as.data.frame logical indicating whether to output the result as a long data frame, default is \code{FALSE}. #' @details Technical note: MOFA is a Bayesian model where each variable has a prior distribution and a posterior distribution. #' In particular, to achieve scalability we used the variational inference framework, thus true posterior distributions are replaced by approximated variational distributions. #' This function extracts the expectations of the variational distributions, which can be used as final point estimates to analyse the results of the model. \cr #' The priors and variational distributions of each variable are extensively described in the supplementary methods of the original paper. #' @return the output varies depending on the variable of interest: \cr #' \itemize{ #' \item{\strong{"Z"}: a matrix with dimensions (samples,factors). If \code{as.data.frame} is \code{TRUE}, a long-formatted data frame with columns (sample,factor,value)} #' \item{\strong{"W"}: a list of length (views) where each element is a matrix with dimensions (features,factors). If \code{as.data.frame} is \code{TRUE}, a long-formatted data frame with columns (view,feature,factor,value)} #' } #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' factors <- get_expectations(model, "Z") #' weights <- get_expectations(model, "W") get_expectations <- function(object, variable, as.data.frame = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(variable %in% names(object@expectations)) # Get expectations in single matrix or list of matrices (for multi-view nodes) exp <- object@expectations[[variable]] # unlist single view nodes - single Sigma node across all groups using time warping if(variable == "Sigma") exp <- exp[[1]] # For memory and space efficiency, Y expectations are not saved to the model file when using only gaussian likelihoods. if (variable == "Y") { if ((length(object@expectations$Y) == 0) && all(object@model_options$likelihood == "gaussian")) { # message("Using training data slot as Y expectations since all the likelihoods are gaussian.") exp <- object@data } } # Convert to long data frame if (as.data.frame) { # Z node if (variable=="Z") { tmp <- reshape2::melt(exp, na.rm=TRUE) colnames(tmp) <- c("sample", "factor", "value", "group") tmp$sample <- as.character(tmp$sample) factor.cols <- c("sample", "factor", "group") factor.cols[factor.cols] <- lapply(factor.cols[factor.cols], factor) } # W node else if (variable=="W") { tmp <- lapply(names(exp), function(m) { tmp <- reshape2::melt(exp[[m]], na.rm=TRUE) colnames(tmp) <- c("feature","factor","value") tmp$view <- m factor.cols <- c("view", "feature", "factor") tmp[factor.cols] <- lapply(tmp[factor.cols], factor) return(tmp) }) tmp <- do.call(rbind.data.frame,tmp) } # Y node else if (variable=="Y") { tmp <- lapply(names(exp), function(m) { tmp <- lapply(names(exp[[m]]), function(g) { tmp <- reshape2::melt(exp[[m]][[g]], na.rm=TRUE) colnames(tmp) <- c("sample", "feature", "value") tmp$view <- m tmp$group <- g factor.cols <- c("view", "group", "feature", "factor") tmp[factor.cols] <- lapply(tmp[factor.cols], factor) return(tmp) }) }) tmp <- do.call(rbind, tmp) } exp <- tmp } return(exp) } #' @title Get variance explained values #' @name get_variance_explained #' @description Extract the latent factors from the model. #' @param object a trained \code{\link{MOFA}} object. #' @param factors character vector with the factor name(s), or numeric vector with the factor index(es). #' Default is "all". #' @param groups character vector with the group name(s), or numeric vector with the group index(es). #' Default is "all". #' @param views character vector with the view name(s), or numeric vector with the view index(es). #' Default is "all". #' @param as.data.frame logical indicating whether to return a long data frame instead of a matrix. #' Default is \code{FALSE}. #' @return A list of data matrices with variance explained per group or a \code{data.frame} (if \code{as.data.frame} is TRUE) #' @export #' #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Fetch variance explained values (in matrix format) #' r2 <- get_variance_explained(model) #' #' # Fetch variance explained values (in data.frame format) #' r2 <- get_variance_explained(model, as.data.frame = TRUE) #' get_variance_explained <- function(object, groups = "all", views = "all", factors = "all", as.data.frame = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get factors and groups groups <- .check_and_get_groups(object, groups) views <- .check_and_get_views(object, views) factors <- .check_and_get_factors(object, factors) # Fetch R2 if (.hasSlot(object, "cache") && ("variance_explained" %in% names(object@cache))) { r2_list <- object@cache$variance_explained } else { r2_list <- calculate_variance_explained(object, factors = factors, views = views, groups = groups) } # Convert to data.frame format if (as.data.frame) { # total R2 r2_total <- reshape2::melt( do.call("rbind",r2_list[["r2_total"]]) ) colnames(r2_total) <- c("group", "view", "value") # R2 per factor r2_per_factor <- lapply(names(r2_list[["r2_per_factor"]]), function(g) { x <- reshape2::melt( r2_list[["r2_per_factor"]][[g]] ) colnames(x) <- c("factor", "view", "value") x$factor <- as.factor(x$factor) x$group <- g return(x) }) r2_per_factor <- do.call("rbind",r2_per_factor)[,c("group","view","factor","value")] r2 <- list("r2_per_factor"=r2_per_factor, "r2_total"=r2_total) } else { r2 <- r2_list } return(r2) } ================================================ FILE: R/imports.R ================================================ #' Re-exporting the pipe operator #' See \code{magrittr::\link[magrittr]{\%>\%}} for details. #' #' @name %>% #' @rdname pipe #' @param lhs see \code{magrittr::\link[magrittr]{\%>\%}} #' @param rhs see \code{magrittr::\link[magrittr]{\%>\%}} #' @export #' @importFrom magrittr %>% #' @usage lhs \%>\% rhs #' @return depending on lhs and rhs NULL ================================================ FILE: R/impute.R ================================================ ####################################################### ## Functions to perform imputation of missing values ## ####################################################### #' @title Impute missing values from a fitted MOFA #' @name impute #' @description This function uses the latent factors and the loadings to impute missing values. #' @param object a \code{\link{MOFA}} object. #' @param views character vector with the view name(s), or numeric vector with view index(es). #' @param groups character vector with the group name(s), or numeric vector with group index(es). #' @param factors character vector with the factor names, or numeric vector with the factor index(es). #' @param add_intercept add feature intercepts to the imputation (default is TRUE). #' @details MOFA generates a denoised and condensed low-dimensional representation of the data that captures the main sources of heterogeneity of the data. #' This representation can be used to reconstruct the data, simply using the equation \code{Y = WX}. #' For more details read the supplementary methods of the manuscript. \cr #' Note that with \code{\link{impute}} you can only generate the point estimates (the means of the posterior distributions). #' If you want to add uncertainty estimates (the variance) you need to set \code{impute=TRUE} in the training options. #' See \code{\link{get_default_training_options}}. #' @return This method fills the \code{imputed_data} slot by replacing the missing values in the input data with the model predictions. #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Impute missing values in all data modalities #' imputed_data <- impute(model, views = "all") #' #' # Impute missing values in all data modalities using factors 1:3 #' imputed_data <- impute(model, views = "all", factors = 1:3) impute <- function(object, views = "all", groups = "all", factors = "all", add_intercept = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (length(object@imputed_data)>0) warning("imputed_data slot is already filled. It will be replaced and the variance estimates will be lost...") # Get views and groups views <- .check_and_get_views(object, views, non_gaussian=FALSE) groups <- .check_and_get_groups(object, groups) # Do predictions pred <- predict(object, views=views, factors=factors, add_intercept=add_intercept) # Replace NAs with predicted values imputed <- get_data(object, views=views, groups=groups, add_intercept = add_intercept) for (m in views) { for (g in groups) { imputed[[m]][[g]] <- imputed[[m]][[g]] non_observed <- is.na(imputed[[m]][[g]]) imputed[[m]][[g]][non_observed] <- pred[[m]][[g]][non_observed] } } # Save imputed data in the corresponding slot object@imputed_data <- imputed return(object) } ================================================ FILE: R/load_model.R ================================================ ############################################ ## Functions to load a trained MOFA model ## ############################################ #' @title Load a trained MOFA #' @name load_model #' @description Method to load a trained MOFA \cr #' The training of mofa is done using a Python framework, and the model output is saved as an .hdf5 file, which has to be loaded in the R package. #' @param file an hdf5 file saved by the mofa Python framework #' @param sort_factors logical indicating whether factors should be sorted by variance explained (default is TRUE) #' @param on_disk logical indicating whether to work from memory (FALSE) or disk (TRUE). \cr #' This should be set to TRUE when the training data is so big that cannot fit into memory. \cr #' On-disk operations are performed using the \code{\link{HDF5Array}} and \code{\link{DelayedArray}} framework. #' @param load_data logical indicating whether to load the training data (default is TRUE, it can be memory expensive) #' @param remove_outliers logical indicating whether to mask outlier values. #' @param remove_inactive_factors logical indicating whether to remove inactive factors from the model. # #' @param remove_intercept_factors logical indicating whether to remove intercept factors for non-Gaussian views. #' @param verbose logical indicating whether to print verbose output (default is FALSE) #' @param load_interpol_Z (MEFISTO) logical indicating whether to load predictions for factor values based on latent processed (only #' relevant for models trained with covariates and Gaussian processes, where prediction was enabled) #' @return a \code{\link{MOFA}} model #' @importFrom rhdf5 h5read h5ls #' @importFrom HDF5Array HDF5ArraySeed #' @importFrom DelayedArray DelayedArray #' @importFrom dplyr bind_rows #' @export #' @examples #' #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) load_model <- function(file, sort_factors = TRUE, on_disk = FALSE, load_data = TRUE, remove_outliers = FALSE, remove_inactive_factors = TRUE, verbose = FALSE, load_interpol_Z = FALSE) { # Create new MOFAodel object object <- new("MOFA") object@status <- "trained" # Set on_disk option if (on_disk) { object@on_disk <- TRUE } else { object@on_disk <- FALSE } # Get groups and data set names from the hdf5 file object h5ls.out <- h5ls(file, datasetinfo = FALSE) ######################## ## Load training data ## ######################## # Load names if ("views" %in% h5ls.out$name) { view_names <- as.character( h5read(file, "views")[[1]] ) group_names <- as.character( h5read(file, "groups")[[1]] ) feature_names <- h5read(file, "features")[view_names] sample_names <- h5read(file, "samples")[group_names] } else { # for old models feature_names <- h5read(file, "features") sample_names <- h5read(file, "samples") view_names <- names(feature_names) group_names <- names(sample_names) h5ls.out <- h5ls.out[grep("variance_explained", h5ls.out$name, invert = TRUE),] } if("covariates" %in% h5ls.out$name){ covariate_names <- as.character( h5read(file, "covariates")[[1]]) } else { covariate_names <- NULL } # Load training data (as nested list of matrices) data <- list(); intercepts <- list() if (load_data && "data"%in%h5ls.out$name) { object@data_options[["loaded"]] <- TRUE if (verbose) message("Loading data...") for (m in view_names) { data[[m]] <- list() intercepts[[m]] <- list() for (g in group_names) { if (on_disk) { # as DelayedArrays data[[m]][[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("data/%s/%s", m, g) ) ) } else { # as matrices data[[m]][[g]] <- h5read(file, sprintf("data/%s/%s", m, g) ) tryCatch(intercepts[[m]][[g]] <- as.numeric( h5read(file, sprintf("intercepts/%s/%s", m, g) ) ), error = function(e) { NULL }) } # Replace NaN by NA data[[m]][[g]][is.nan(data[[m]][[g]])] <- NA # this realised into memory, TO FIX } } # Create empty training data (as nested list of empty matrices, with the correct dimensions) } else { object@data_options[["loaded"]] <- FALSE for (m in view_names) { data[[m]] <- list() for (g in group_names) { data[[m]][[g]] <- .create_matrix_placeholder(rownames = feature_names[[m]], colnames = sample_names[[g]]) } } } object@data <- data object@intercepts <- intercepts # Load metadata if any if ("samples_metadata" %in% h5ls.out$name) { object@samples_metadata <- bind_rows(lapply(group_names, function(g) as.data.frame(h5read(file, sprintf("samples_metadata/%s", g))))) } if ("features_metadata" %in% h5ls.out$name) { object@features_metadata <- bind_rows(lapply(view_names, function(m) as.data.frame(h5read(file, sprintf("features_metadata/%s", m))))) } ############################ ## Load sample covariates ## ############################ if (any(grepl("cov_samples", h5ls.out$group))){ covariates <- list() for (g in group_names) { if (on_disk) { # as DelayedArrays covariates[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("cov_samples/%s", g) ) ) } else { # as matrices covariates[[g]] <- h5read(file, sprintf("cov_samples/%s", g) ) } } } else covariates <- NULL object@covariates <- covariates if (any(grepl("cov_samples_transformed", h5ls.out$group))){ covariates_warped <- list() for (g in group_names) { if (on_disk) { # as DelayedArrays covariates_warped[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("cov_samples_transformed/%s", g) ) ) } else { # as matrices covariates_warped[[g]] <- h5read(file, sprintf("cov_samples_transformed/%s", g) ) } } } else covariates_warped <- NULL object@covariates_warped <- covariates_warped ####################### ## Load interpolated factor values ## ####################### interpolated_Z <- list() if (isTRUE(load_interpol_Z)) { if (isTRUE(verbose)) message("Loading interpolated factor values...") for (g in group_names) { interpolated_Z[[g]] <- list() if (on_disk) { # as DelayedArrays # interpolated_Z[[g]] <- DelayedArray::DelayedArray( HDF5ArraySeed(file, name = sprintf("Z_predictions/%s", g) ) ) } else { # as matrices tryCatch( { interpolated_Z[[g]][["mean"]] <- h5read(file, sprintf("Z_predictions/%s/mean", g) ) }, error = function(x) { print("Predictions of Z not found, not loading it...") }) tryCatch( { interpolated_Z[[g]][["variance"]] <- h5read(file, sprintf("Z_predictions/%s/variance", g) ) }, error = function(x) { print("Variance of predictions of Z not found, not loading it...") }) tryCatch( { interpolated_Z[[g]][["new_values"]] <- h5read(file, "Z_predictions/new_values") }, error = function(x) { print("New values of Z not found, not loading it...") }) } } } object@interpolated_Z <- interpolated_Z ####################### ## Load expectations ## ####################### expectations <- list() node_names <- h5ls.out[h5ls.out$group=="/expectations","name"] if (verbose) message(paste0("Loading expectations for ", length(node_names), " nodes...")) if ("AlphaW" %in% node_names) expectations[["AlphaW"]] <- h5read(file, "expectations/AlphaW")[view_names] if ("AlphaZ" %in% node_names) expectations[["AlphaZ"]] <- h5read(file, "expectations/AlphaZ")[group_names] if ("Sigma" %in% node_names) expectations[["Sigma"]] <- h5read(file, "expectations/Sigma") if ("Z" %in% node_names) expectations[["Z"]] <- h5read(file, "expectations/Z")[group_names] if ("W" %in% node_names) expectations[["W"]] <- h5read(file, "expectations/W")[view_names] if ("ThetaW" %in% node_names) expectations[["ThetaW"]] <- h5read(file, "expectations/ThetaW")[view_names] if ("ThetaZ" %in% node_names) expectations[["ThetaZ"]] <- h5read(file, "expectations/ThetaZ")[group_names] # if ("Tau" %in% node_names) # expectations[["Tau"]] <- h5read(file, "expectations/Tau") object@expectations <- expectations ######################## ## Load model options ## ######################## if (verbose) message("Loading model options...") tryCatch( { object@model_options <- as.list(h5read(file, 'model_options', read.attributes = TRUE)) }, error = function(x) { print("Model options not found, not loading it...") }) # Convert True/False strings to logical values for (i in names(object@model_options)) { if (object@model_options[i] == "False" || object@model_options[i] == "True") { object@model_options[i] <- as.logical(object@model_options[i]) } else { object@model_options[i] <- object@model_options[i] } } ########################################## ## Load training options and statistics ## ########################################## if (verbose) message("Loading training options and statistics...") # Load training options if (length(object@training_options) == 0) { tryCatch( { object@training_options <- as.list(h5read(file, 'training_opts', read.attributes = TRUE)) }, error = function(x) { print("Training opts not found, not loading it...") }) } # Load training statistics tryCatch( { object@training_stats <- h5read(file, 'training_stats', read.attributes = TRUE) object@training_stats <- h5read(file, 'training_stats', read.attributes = TRUE) }, error = function(x) { print("Training stats not found, not loading it...") }) ############################# ## Load covariates options ## ############################# if (any(grepl("cov_samples", h5ls.out$group))) { if (isTRUE(verbose)) message("Loading covariates options...") tryCatch( { object@mefisto_options <- as.list(h5read(file, 'smooth_opts', read.attributes = TRUE)) }, error = function(x) { print("Covariates options not found, not loading it...") }) # Convert True/False strings to logical values for (i in names(object@mefisto_options)) { if (object@mefisto_options[i] == "False" | object@mefisto_options[i] == "True") { object@mefisto_options[i] <- as.logical(object@mefisto_options[i]) } else { object@mefisto_options[i] <- object@mefisto_options[i] } } } ####################################### ## Load variance explained estimates ## ####################################### if ("variance_explained" %in% h5ls.out$name) { r2_list <- list( r2_total = h5read(file, "variance_explained/r2_total")[group_names], r2_per_factor = h5read(file, "variance_explained/r2_per_factor")[group_names] ) object@cache[["variance_explained"]] <- r2_list } # Hack to fix the problems where variance explained values range from 0 to 1 (%) if (max(sapply(object@cache$variance_explained$r2_total,max,na.rm=TRUE),na.rm=TRUE)<1) { for (m in 1:length(view_names)) { for (g in 1:length(group_names)) { object@cache$variance_explained$r2_total[[g]][[m]] <- 100 * object@cache$variance_explained$r2_total[[g]][[m]] object@cache$variance_explained$r2_per_factor[[g]][,m] <- 100 * object@cache$variance_explained$r2_per_factor[[g]][,m] } } } ############################## ## Specify dimensionalities ## ############################## # Specify dimensionality of the data object@dimensions[["M"]] <- length(data) # number of views object@dimensions[["G"]] <- length(data[[1]]) # number of groups object@dimensions[["N"]] <- sapply(data[[1]], ncol) # number of samples (per group) object@dimensions[["D"]] <- sapply(data, function(e) nrow(e[[1]])) # number of features (per view) object@dimensions[["C"]] <- nrow(covariates[[1]]) # number of covariates object@dimensions[["K"]] <- ncol(object@expectations$Z[[1]]) # number of factors # Assign sample and feature names (slow for large matrices) if (verbose) message("Assigning names to the different dimensions...") # Create default features names if they are null if (is.null(feature_names)) { print("Features names not found, generating default: feature1_view1, ..., featureD_viewM") feature_names <- lapply(seq_len(object@dimensions[["M"]]), function(m) sprintf("feature%d_view_&d", as.character(seq_len(object@dimensions[["D"]][m])), m)) } else { # Check duplicated features names all_names <- unname(unlist(feature_names)) duplicated_names <- unique(all_names[duplicated(all_names)]) if (length(duplicated_names)>0) warning("There are duplicated features names across different views. We will add the suffix *_view* only for those features Example: if you have both TP53 in mRNA and mutation data it will be renamed to TP53_mRNA, TP53_mutation") for (m in names(feature_names)) { tmp <- which(feature_names[[m]] %in% duplicated_names) if (length(tmp)>0) feature_names[[m]][tmp] <- paste(feature_names[[m]][tmp], m, sep="_") } } features_names(object) <- feature_names # Create default samples names if they are null if (is.null(sample_names)) { print("Samples names not found, generating default: sample1, ..., sampleN") sample_names <- lapply(object@dimensions[["N"]], function(n) paste0("sample", as.character(seq_len(n)))) } samples_names(object) <- sample_names # Add covariates names if(!is.null(object@covariates)){ # Create default covariates names if they are null if (is.null(covariate_names)) { print("Covariate names not found, generating default: covariate1, ..., covariateC") covariate_names <- paste0("sample", as.character(seq_len(object@dimensions[["C"]]))) } covariates_names(object) <- covariate_names } # Set views names if (is.null(names(object@data))) { print("Views names not found, generating default: view1, ..., viewM") view_names <- paste0("view", as.character(seq_len(object@dimensions[["M"]]))) } views_names(object) <- view_names # Set groups names if (is.null(names(object@data[[1]]))) { print("Groups names not found, generating default: group1, ..., groupG") group_names <- paste0("group", as.character(seq_len(object@dimensions[["G"]]))) } groups_names(object) <- group_names # Set factors names factors_names(object) <- paste0("Factor", as.character(seq_len(object@dimensions[["K"]]))) ################### ## Parse factors ## ################### # Calculate variance explained estimates per factor if (is.null(object@cache[["variance_explained"]])) { object@cache[["variance_explained"]] <- calculate_variance_explained(object) } # Remove inactive factors if (remove_inactive_factors) { r2 <- rowSums(do.call('cbind', lapply(object@cache[["variance_explained"]]$r2_per_factor, rowSums, na.rm=TRUE))) var.threshold <- 0.0001 if (all(r2 < var.threshold)) { warning(sprintf("All %s factors were found to explain little or no variance so remove_inactive_factors option has been disabled.", length(r2))) } else if (any(r2 < var.threshold)) { object <- subset_factors(object, which(r2>=var.threshold), recalculate_variance_explained=FALSE) message(sprintf("%s factors were found to explain no variance and they were removed for downstream analysis. You can disable this option by setting load_model(..., remove_inactive_factors = FALSE)", sum(r2 < var.threshold))) } } # [Done in mofapy2] Sort factors by total variance explained if (sort_factors && object@dimensions$K>1) { # Sanity checks if (verbose) message("Re-ordering factors by their variance explained...") # Calculate variance explained per factor across all views r2 <- rowSums(sapply(object@cache[["variance_explained"]]$r2_per_factor, function(e) rowSums(e, na.rm = TRUE))) order_factors <- c(names(r2)[order(r2, decreasing = TRUE)]) # re-order factors object <- subset_factors(object, order_factors) } # Mask outliers if (remove_outliers) { if (verbose) message("Removing outliers...") object <- .detect_outliers(object) } # Mask intercepts for non-Gaussian data if (any(object@model_options$likelihoods!="gaussian")) { for (m in names(which(object@model_options$likelihoods!="gaussian"))) { for (g in names(object@intercepts[[m]])) { object@intercepts[[m]][[g]] <- NA } } } ###################### ## Quality controls ## ###################### if (verbose) message("Doing quality control...") object <- .quality_control(object, verbose = verbose) return(object) } ================================================ FILE: R/make_example_data.R ================================================ #' @title Simulate a data set using the generative model of MOFA #' @name make_example_data #' @description Function to simulate an example multi-view multi-group data set according to the generative model of MOFA2. #' @param n_views number of views #' @param n_features number of features in each view #' @param n_samples number of samples in each group #' @param n_groups number of groups #' @param n_factors number of factors #' @param likelihood likelihood for each view, one of "gaussian" (default), "bernoulli", "poisson", #' or a character vector of length n_views #' @param lscales vector of lengthscales, needs to be of length n_factors (default is 0 - no smooth factors) #' @param sample_cov (only for use with MEFISTO) matrix of sample covariates for one group with covariates in rows and samples in columns #' or "equidistant" for sequential ordering, default is NULL (no smooth factors) #' @param as.data.frame return data and covariates as long dataframe #' @return Returns a list containing the simulated data and simulation parameters. #' @importFrom stats rnorm rbinom rpois #' @importFrom dplyr left_join #' @importFrom stats dist #' @export #' @examples #' # Generate a simulated data set #' MOFAexample <- make_example_data() make_example_data <- function(n_views=3, n_features=100, n_samples = 50, n_groups = 1, n_factors = 5, likelihood = "gaussian", lscales = 1, sample_cov = NULL, as.data.frame = FALSE) { # Sanity checks if (!all(likelihood %in% c("gaussian", "bernoulli", "poisson"))) stop("Likelihood not implemented: Use either gaussian, bernoulli or poisson") if(length(lscales) == 1) lscales = rep(lscales, n_factors) if(!length(lscales) == n_factors) stop("Lengthscales lscales need to be of length n_factors") if(all(lscales == 0)){ sample_cov <- NULL } if (length(likelihood)==1) likelihood <- rep(likelihood, n_views) if (!length(likelihood) == n_views) stop("Likelihood needs to be a single string or matching the number of views!") if(!is.null(sample_cov)){ if(sample_cov[1] == "equidistant") { sample_cov <- seq_len(n_samples) } if(is.null(dim(sample_cov))) sample_cov <- matrix(sample_cov, nrow = 1) if(ncol(sample_cov) != n_samples){ stop("Number of columns in sample_cov must match number of samples n_samples.") } # Simulate covariance for factors Sigma = lapply(lscales, function(ls) { if(ls == 0) diag(1, n_samples) else (1) * exp(-as.matrix(stats::dist(t(sample_cov)))^2/(2*ls^2)) # else (1-0.001) * exp(-as.matrix(stats::dist(t(sample_cov)))^2/(2*ls^2)) + diag(0.001, n_samples) }) # simulate factors alpha_z <- NULL S_z <- lapply(seq_len(n_groups), function(vw) matrix(1, nrow=n_samples, ncol=n_factors)) Z <- vapply(seq_len(n_factors), function(fc) mvtnorm::rmvnorm(1, rep(0, n_samples), Sigma[[fc]]), numeric(n_samples)) colnames(Z) <- paste0("simulated_factor_", 1:ncol(Z)) Z <- lapply(seq_len(n_groups), function(gr) Z) sample_cov <- Reduce(cbind, lapply(seq_len(n_groups), function(gr) sample_cov)) } else { # set sparsity for factors theta_z <- 0.5 # set ARD prior for factors, each factor being active in at least one group alpha_z <- vapply(seq_len(n_factors), function(fc) { active_gw <- sample(seq_len(n_groups), 1) alpha_fc <- sample(c(1, 1000), n_groups, replace = TRUE) if(all(alpha_fc==1000)) alpha_fc[active_gw] <- 1 alpha_fc }, numeric(n_groups)) alpha_z <- matrix(alpha_z, nrow=n_factors, ncol=n_groups, byrow=TRUE) # simulate factors S_z <- lapply(seq_len(n_groups), function(vw) matrix(rbinom(n_samples * n_factors, 1, theta_z), nrow=n_samples, ncol=n_factors)) Z <- lapply(seq_len(n_groups), function(vw) vapply(seq_len(n_factors), function(fc) rnorm(n_samples, 0, sqrt(1/alpha_z[fc,vw])), numeric(n_samples))) } # set sparsity for weights theta_w <- 0.5 # set ARD prior, each factor being active in at least one view alpha_w <- vapply(seq_len(n_factors), function(fc) { active_vw <- sample(seq_len(n_views), 1) alpha_fc <- sample(c(1, 1000), n_views, replace = TRUE) if(all(alpha_fc==1000)) alpha_fc[active_vw] <- 1 alpha_fc }, numeric(n_views)) alpha_w <- matrix(alpha_w, nrow=n_factors, ncol=n_views, byrow=TRUE) # simulate weights S_w <- lapply(seq_len(n_views), function(vw) matrix(rbinom(n_features*n_factors, 1, theta_w), nrow=n_features, ncol=n_factors)) W <- lapply(seq_len(n_views), function(vw) vapply(seq_len(n_factors), function(fc) rnorm(n_features, 0, sqrt(1/alpha_w[fc,vw])), numeric(n_features))) # set noise level (for gaussian likelihood) tau <- 10 # pre-compute linear term and rbind groups mu <- lapply(seq_len(n_views), function(vw) lapply(seq_len(n_groups), function(gw) (S_z[[gw]]*Z[[gw]]) %*% t(S_w[[vw]]*W[[vw]]))) mu <- lapply(mu, function(l) Reduce(rbind, l)) groups <- rep(paste("group",seq_len(n_groups), sep = "_"), each = n_samples) # simulate data according to the likelihood data <- lapply(seq_len(n_views), function(vw){ lk <- likelihood[vw] if (lk == "gaussian"){ dd <- t(mu[[vw]] + rnorm(length(mu[[vw]]),0,sqrt(1/tau))) } else if (lk == "poisson"){ term <- log(1+exp(mu[[vw]])) dd <- t(apply(term, 2, function(tt) rpois(length(tt),tt))) } else if (lk == "bernoulli") { term <- 1/(1+exp(-mu[[vw]])) dd <- t(apply(term, 2, function(tt) rbinom(length(tt),1,tt))) } colnames(dd) <- paste0("sample_", seq_len(ncol(dd))) rownames(dd) <- paste0("feature_", seq_len(nrow(dd)),"_view", vw) dd }) if(!is.null(sample_cov)) { colnames(sample_cov) <- colnames(data[[1]]) rownames(sample_cov) <- paste0("covariate_", seq_len(nrow(sample_cov))) } names(data) <- paste0("view_", seq_len(n_views)) if(as.data.frame){ gr_df <- data.frame(group = groups, sample = colnames(data[[1]])) dat <- lapply(names(data), function(vw){ tmp <- data[[vw]] df <- melt(tmp, varnames = c("feature", "sample")) df$view <- vw df }) data <- bind_rows(dat) data <- dplyr::left_join(data, gr_df, by = "sample") sample_cov <- melt(sample_cov, varnames = c("covariate", "sample")) } return(list(data = data, groups = groups, alpha_w=alpha_w, alpha_z =alpha_z, lscales = lscales, sample_cov = sample_cov, Z = Z)) } ================================================ FILE: R/mefisto.R ================================================ ########################################################################## ## Functions to use continuous covariates, as part of the MEFISTO framework ## ########################################################################## #' @title Add covariates to a MOFA model #' @name set_covariates #' @description Function to add continuous covariate(s) to a \code{\link{MOFA}} object for training with MEFISTO #' @param object an untrained \code{\link{MOFA}} #' @param covariates Sample-covariates to be passed to the model. #' This can be either: #' \itemize{ #' \item{a character, specifying columns already present in the samples_metadata of the object} #' \item{a data.frame with columns "sample", "covariate", "value". Sample names need to match those present in the data} #' \item{a matrix with samples in columns and covariate(s) in row(s)} #' } #' Note that the covariate should be numeric and continuous. #' @return Returns an untrained \code{\link{MOFA}} with covariates filled in the corresponding slots #' @details To activate the functional MEFISTO framework, specify mefisto_options when preparing the training using \code{prepare_mofa} #' @export #' @examples #' #' # Simulate data #' dd <- make_example_data(sample_cov = seq(0,1,length.out = 100), n_samples = 100, n_factors = 4) #' #' # Create MOFA object #' sm <- create_mofa(data = dd$data) #' #' # Add a covariate #' sm <- set_covariates(sm, covariates = dd$sample_cov) #' sm set_covariates <- function(object, covariates) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (object@status=="trained") stop("The model is already trained! Covariates must be added before training") # get sample names samples_data <- lapply(object@data[[1]], colnames) # samples <- unlist(samples_data) samples_data_vec <- unlist(samples_names(object)) # covariates passed as characters: extract from the metadata as dataframe if (is(covariates, "character")) { if (!all(covariates %in% colnames(samples_metadata(object)))) { stop("Columns specified in covariates do not exist in the MOFA object metadata slot.") } covariates <- samples_metadata(object)[,c("sample",covariates),drop=FALSE] covariates <- gather(covariates, key = "covariate", value = "value", -sample) if(!is.numeric(covariates$value)){ stop("Covariates need to be numeric") } # TO-DO: Check that they continuous # covariates passed in data.frame format } if (any(class(covariates) %in% c("data.frame", "tibble", "Data.Frame"))) { # TO-DO: USE is() if (!all(c("sample", "covariate", "value") %in% colnames(covariates))) stop("If covariates is provided as data.frame it needs to contain the columns: sample, covariate, value") if (!is.numeric(covariates$value)) { stop("Values in covariates need to be numeric") } samples <- covariates$sample # covariates <- covariates[!duplicated(covariates), ] covariates <- reshape2::acast(covariates, covariate ~ sample) # covariates passed in matrix format # TO-DO: CHECK THIS } else if (all(is.numeric(covariates)) || class(covariates) %in% c("dgTMatrix", "dgCMatrix")) { samples <- colnames(covariates) if (!is.null(samples)) { if(!(all(samples %in% samples_data_vec) && all(samples_data_vec %in% samples))) stop("Sample names of the data and the sample covariates do not match.") covariates <- covariates[ , samples_data_vec, drop = FALSE] } else { # warnings and checks if no matching sample names if(sum(object@dimensions[['N']]) != ncol(covariates)) stop("Number of columns in sample covariates does not match the number of samples") if(!is.null(samples_data) && length(samples_data_vec) > 0) { warning("No sample names in covariates - we will use the sample names in data. Please ensure that the order matches.") colnames(covariates) <- samples } else { stop("No sample names found!") } } # covariates format not recognised } else { stop("covariates needs to be a character vector, a dataframe, a matrix or NULL.") } # Set covariate dimensionality object@dimensions[["C"]] <- nrow(covariates) # Set covariate names if (is.null(rownames(covariates))) { message("No covariates names provided - using generic: covariate1, covariate2, ...") rownames(covariates) <- paste0("covariate", seq_len(nrow(covariates))) } # split covariates by groups covariates <- lapply(samples_names(object), function(i) covariates[, i, drop = FALSE]) names(covariates) <- groups_names(object) # Sanity checks stopifnot(all(sapply(covariates, ncol) == object@dimensions[["N"]])) # add covariates to the MOFA object object@covariates <- covariates return(object) } #' @title Get sample covariates #' @name get_covariates #' @description Function to extract the covariates from a \code{\link{MOFA}} object using MEFISTO. #' @param object a \code{\link{MOFA}} object. #' @param covariates character vector with the covariate name(s), or numeric vector with the covariate index(es). #' @param as.data.frame logical indicating whether to output the result as a long data frame, default is \code{FALSE}. #' @param warped logical indicating whether to extract the aligned covariates #' @return a matrix with dimensions (samples,covariates). If \code{as.data.frame} is \code{TRUE}, a long-formatted data frame with columns (sample,factor,value) #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' covariates <- get_covariates(model) get_covariates <- function(object, covariates = "all", as.data.frame = FALSE, warped = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get and check covariate names covariates <- .check_and_get_covariates(object, covariates) # Get covariates if(warped){ sample_cov <- lapply(object@covariates_warped, function(cmat) cmat[covariates,,drop=FALSE]) } else { sample_cov <- lapply(object@covariates, function(cmat) cmat[covariates,,drop=FALSE]) } if (as.data.frame) { if(!is.null(rownames(sample_cov[[1]]))){ nms <- rownames(sample_cov[[1]]) } else { nms <- paste0("covariate_", seq_along(covariates)) } sample_cov <- Reduce(cbind, sample_cov) # remove group info sample_cov <- melt(sample_cov, varnames = c("covariate", "sample")) } return(sample_cov) } #' @title Get default options for MEFISTO covariates #' @name get_default_mefisto_options #' @description Function to obtain the default options for the usage of MEFISTO covariates with MEFISTO #' @param object an untrained \code{\link{MOFA}} object #' @details The options are the following: \cr #' \itemize{ #' \item{\strong{scale_cov}:} logical: Scale covariates? #' \item{\strong{start_opt}:} integer: First iteration to start the optimisation of GP hyperparameters #' \item{\strong{n_grid}:} integer: Number of points for the grid search in the optimisation of GP hyperparameters #' \item{\strong{opt_freq}:} integer: Frequency of optimisation of GP hyperparameters #' \item{\strong{sparseGP}:} logical: Use sparse GPs to speed up the optimisation of the GP parameters? #' \item{\strong{frac_inducing}:} numeric between 0 and 1: Fraction of samples to use as inducing points (only relevant if sparseGP is \code{TRUE}) #' \item{\strong{warping}:} logical: Activate warping functionality to align covariates between groups (requires a multi-group design) #' \item{\strong{warping_freq}:} numeric: frequency of the warping (only relevant if warping is \code{TRUE}) #' \item{\strong{warping_ref}:} A character specifying the reference group for warping (only relevant if warping is \code{TRUE}) #' \item{\strong{warping_open_begin}:} logical: Warping: Allow for open beginning? (only relevant warping is \code{TRUE}) #' \item{\strong{warping_open_end}:} logical: Warping: Allow for open end? (only relevant warping is \code{TRUE}) #' \item{\strong{warping_groups}:} Assignment of groups to classes used for alignment (advanced option). #' Needs to be a vector of length number of samples, e.g. a column of samples_metadata, which needs to have the same value within each group. #' By default groups are used specified in `create_mofa`. #' \item{\strong{model_groups}:} logical: Model covariance structure across groups (for more than one group, otherwise FALSE)? If FALSE, we assume the same patterns in all groups. #' \item{\strong{new_values}:} Values for which to predict the factor values (for interpolation / extrapolation). #' This should be numeric matrix in the same format with covariate(s) in rows and new values in columns. #' Default is NULL, leading to no interpolation. #' } #' @return Returns a list with default options for the MEFISTO covariate(s) functionality. #' @importFrom utils modifyList #' @export #' @examples #' # generate example data #' dd <- make_example_data(sample_cov = seq(0,1,length.out = 200), n_samples = 200, #' n_factors = 4, n_features = 200, n_views = 4, lscales = c(0.5, 0.2, 0, 0)) #' # input data #' data <- dd$data #' # covariate matrix with samples in columns #' time <- dd$sample_cov #' rownames(time) <- "time" #' #' # create mofa and set covariates #' sm <- create_mofa(data = dd$data) #' sm <- set_covariates(sm, covariates = time) #' #' MEFISTO_opt <- get_default_mefisto_options(sm) get_default_mefisto_options <- function(object) { mefisto_options <- list( # Standard options scale_cov = FALSE, # (logical) Scale covariates? start_opt = 20, # (integer) First iteration to start the optimisation of GP hyperparameters n_grid = 20, # (integer) Number of points for the grid search in the optimisation of GP hyperparameters opt_freq = 10, # (integer) Frequency of optimisation of GP hyperparameters model_groups = TRUE, # (logical) model covariance structure across groups # sparse GP options sparseGP = FALSE, # (logical) Use sparse GPs to speed up the optimisation of the GP parameters? frac_inducing = 0.75, # (numeric) Fraction of samples to use as inducing points # warping warping = FALSE, # (logical) Activate warping functionality to align covariates between groups (requires a multi-group design) warping_freq = 20, # (numeric) Warping: frequency of the optimisation warping_ref = groups_names(object)[[1]], # (character) Warping: reference group warping_open_begin = TRUE, # (logical) Warping: Allow for open beginning? warping_open_end = TRUE, # (logical) Warping: Allow for open ending? warping_groups = NULL, new_values = NULL # new values if interpolation/extrapolation is wanted ) # model_groups is set to FALSE if only one group present if (object@dimensions$G == 1) mefisto_options$model_groups <- FALSE # if mefisto_options already exist, replace the default values but keep the additional ones if (length(object@mefisto_options)>0) mefisto_options <- modifyList(mefisto_options, object@mefisto_options) return(mefisto_options) } #' @title Heatmap plot showing the group-group correlations per factor #' @name plot_group_kernel #' @description Heatmap plot showing the group-group correlations inferred by the model per factor #' @param object a trained \code{\link{MOFA}} object using MEFISTO. #' @param factors character vector with the factors names, or numeric vector indicating the indices of the factors to use #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups. #' @param ... additional parameters that can be passed to \code{pheatmap} #' @details The heatmap gives insight into the clustering of the patterns that factors display along the covariate in each group. #' A correlation of 1 indicates that the module captured by a factor shows identical patterns across groups, a correlation of zero that it shows distinct patterns, #' a negative correlation that the patterns go in opposite directions. #' @return Returns a \code{ggplot,gg} object containing the heatmaps #' @import pheatmap #' @import cowplot #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_group_kernel(model) plot_group_kernel <- function(object, factors = "all", groups = "all", ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define factors factors <- .check_and_get_factors(object, factors) # Define groups groups <- .check_and_get_groups(object, groups) # Get group kernels Kg <- get_group_kernel(object) hmlist <- lapply(factors, function(f){ tmp <- Kg[[f]][groups,groups] # set breaks for heatmaps ncols <- 100 seq_breaks <- c(seq(-1, 0, 1/ncols * 2), seq(0, 1, 1/ncols * 2)[-1]) p <- pheatmap::pheatmap(tmp, color = rev(colorRampPalette((RColorBrewer::brewer.pal(n = 7, name ="RdBu")))(ncols)), breaks = seq_breaks, silent = TRUE,...) p$gtable }) # subset to groups p <- cowplot::plot_grid(plotlist = hmlist) return(p) } #' @title Barplot showing the smoothness per factor #' @name plot_smoothness #' @description Barplot indicating a smoothness score (between 0 (non-smooth) and 1 (smooth)) per factor #' @param object a trained \code{\link{MOFA}} object using MEFISTO. #' @param factors character vector with the factors names, or numeric vector indicating the indices of the factors to use #' @param color for the smooth part of the bar #' @details The smoothness score is given by the scale parameter for the underlying Gaussian process of each factor. #' @return Returns a \code{ggplot2} object #' @import ggplot2 #' @importFrom tidyr gather #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' smoothness_bars <- plot_smoothness(model) plot_smoothness <- function(object, factors = "all", color = "cadetblue") { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define factors factors <- .check_and_get_factors(object, factors) # Get scale parameters ss <- get_scales(object)[factors] df <- data.frame(factor = names(ss), smooth = ss, non_smooth = 1- ss) df$factor <- factor(df$factor, levels=factors) df <- gather(df, -factor, key = "smoothness", value = "value") gg_bar <- ggplot(df, aes(x= 1, y = value, fill = smoothness)) + geom_bar(stat="identity") + facet_wrap(~factor, nrow = 1) + theme_void() + coord_flip() + guides(fill=FALSE) + scale_fill_manual(values = c("non_smooth" = "gray", "smooth" = color)) + geom_text(x=1, y = 0.5, label = "smoothness", size = 3) return(gg_bar) } #' @title Barplot showing the sharedness per factor #' @name plot_sharedness #' @description Barplot indicating a sharedness score (between 0 (non-shared) and 1 (shared)) per factor #' @param object a trained \code{\link{MOFA}} object using MEFISTO. #' @param factors character vector with the factors names, or numeric vector indicating the indices of the factors to use #' @param color for the shared part of the bar #' @details The sharedness score is calculated as the distance of the learnt group correlation matrix to the identity matrix #' in terms of the mean absolute distance on the off-diagonal elements. #' @return Returns a \code{ggplot2} object #' @import ggplot2 #' @export plot_sharedness <- function(object, factors = "all", color = "#B8CF87") { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (object@dimensions$G == 1) stop("'object' has only one group, more than one group are required to determine sharedness.") # Define factors factors <- .check_and_get_factors(object, factors) # Get group kernels Kgs <- get_group_kernel(object)[factors] # Calculate distance idmat <- diag(1, ncol(Kgs[[1]])) gr <- sapply(Kgs, function(k) mean(abs(k - idmat)[lower.tri(idmat)])) # make plot df <- data.frame(factor = names(gr), group = gr, non_group = 1-gr) df$factor <- factor(df$factor, levels=factors) df <- gather(df, -factor, key = "sharedness", value = "value") df <- mutate(df, sharedness = factor(sharedness, levels = rev(c("group", "non_group")))) gg_bar <- ggplot(df, aes(x= 1, y=value, fill = sharedness)) + geom_bar(stat="identity") + facet_wrap(~factor, nrow = 1) + theme_void() + coord_flip() + guides(fill=FALSE) + scale_fill_manual(values = c("non_group" = "gray", "group" = color)) + geom_text(x=1, y = 0.5, label = "sharedness", size = 3) return(gg_bar) } #' @title Plot interpolated factors versus covariate (1-dimensional) #' @name plot_interpolation_vs_covariate #' @description make a plot of interpolated covariates versus covariate #' @param object a trained \code{\link{MOFA}} object using MEFISTO. #' @param covariate covariate to use for plotting #' @param factors character or numeric specifying the factor(s) to plot, default is "all" #' @param only_mean show only mean or include uncertainties? #' @param show_observed include observed factor values as dots on the plot #' @details to be filled #' @return Returns a \code{ggplot2} object #' @import ggplot2 #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' model <- interpolate_factors(model, new_values = seq(0,1.1,0.1)) #' plot_interpolation_vs_covariate(model, covariate = "time", factors = 1) plot_interpolation_vs_covariate <- function(object, covariate = 1, factors = "all", only_mean = TRUE, show_observed = TRUE){ # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # get and check covariate covariate <- .check_and_get_covariates(object, covariate) # get and check factor factors <- .check_and_get_factors(object, factors) # get interpolated factor df <- get_interpolated_factors(object, as.data.frame = TRUE, only_mean = only_mean) df <- filter(df, factor %in% factors) df$factor <- factor(df$factor, levels = factors) # calculate ribbon borders if(!only_mean) { df <- df %>% mutate(sd = sqrt(variance), ymin = mean -1.96 * sd, ymax = mean + 1.96 * sd) } if(show_observed) { # add the factor values of the observed time point to the plot df_observed <- plot_factors_vs_cov(object, covariates = covariate, return_data = TRUE) df_observed <- filter(df_observed, factor %in% factors) df_observed$factor <- factor(df_observed$factor, levels = factors) } gg_interpol <- ggplot(df, aes(x=.data[[covariate]], y = .data$mean, col = .data$group)) + geom_line(aes(y=mean, col = group)) + facet_wrap(~ factor) + theme_classic() + ylab("factor value") if(show_observed) { gg_interpol <- gg_interpol + geom_point(data = df_observed, aes(x= value.covariate, y = value.factor, col = group), size = 1) } if(!only_mean) { gg_interpol <- gg_interpol + geom_ribbon(aes(ymin=ymin, ymax = ymax, fill = group), alpha = .2, col = "gray", size = 0.1) } gg_interpol } #' @title Scatterplots of feature values against sample covariates #' @name plot_data_vs_cov #' @description Function to do a scatterplot of features against sample covariate values. #' @param object a \code{\link{MOFA}} object using MEFISTO. #' @param covariate string with the covariate name or a samples_metadata column, or an integer with the index of the covariate #' @param warped logical indicating whether to show the aligned covariate (default: TRUE), #' only relevant if warping has been used to align multiple sample groups #' @param factor string with the factor name, or an integer with the index of the factor to take top features from #' @param view string with the view name, or an integer with the index of the view. Default is the first view. #' @param groups groups to plot. Default is "all". #' @param features if an integer (default), the total number of features to plot (given by highest weights). If a character vector, a set of manually-defined features. #' @param sign can be 'positive', 'negative' or 'all' (default) to show only features with highest positive, negative or all weights, respectively. #' @param color_by specifies groups or values (either discrete or continuous) used to color the dots (samples). This can be either: #' \itemize{ #' \item the string "group": dots are coloured with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the same of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #' \item a dataframe with two columns: "sample" and "color" #' } #' @param shape_by specifies groups or values (only discrete) used to shape the dots (samples). This can be either: #' \itemize{ #' \item the string "group": dots are shaped with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the same of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #' \item a dataframe with two columns: "sample" and "shape" #' } #' @param legend logical indicating whether to add a legend #' @param dot_size numeric indicating dot size (default is 5). #' @param text_size numeric indicating text size (default is 5). #' @param stroke numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically). #' @param alpha numeric indicating dot transparency (default is 1). #' @param add_lm logical indicating whether to add a linear regression line for each plot #' @param lm_per_group logical indicating whether to add a linear regression line separately for each group #' @param imputed logical indicating whether to include imputed measurements #' @param return_data logical indicating whether to return a data frame instead of a plot #' @details One of the first steps for the annotation of factors is to visualise the weights using \code{\link{plot_weights}} or \code{\link{plot_top_weights}} #' and inspect the relationship of the factor to the covariate(s) using \code{\link{plot_factors_vs_cov}}. #' However, one might also be interested in visualising the direct relationship between features and covariate(s), rather than looking at "abstract" weights and #' possibly look at the interpolated and extrapolated values by setting imputed to True. #' @import ggplot2 # #' @importFrom ggpubr stat_cor #' @importFrom dplyr left_join #' @importFrom utils tail #' @importFrom stats quantile #' @return Returns a \code{ggplot2} object or the underlying dataframe if return_data is set to \code{TRUE}. #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_data_vs_cov(model, factor = 3, features = 2) plot_data_vs_cov <- function(object, covariate = 1, warped = TRUE, factor = 1, view = 1, groups = "all", features = 10, sign = "all", color_by = "group", legend = TRUE, alpha = 1, shape_by = NULL, stroke = NULL, dot_size = 2.5, text_size = NULL, add_lm = FALSE, lm_per_group = FALSE, imputed = FALSE, return_data = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(factor)==1) stopifnot(length(covariate)==1) stopifnot(length(view)==1) if (lm_per_group) add_lm = TRUE # Define views, factors and groups groups <- .check_and_get_groups(object, groups) factor <- .check_and_get_factors(object, factor) view <- .check_and_get_views(object, view) # Check and fetch covariates df1 <- get_covariates(object, covariate, as.data.frame = TRUE, warped = warped) covariate_name <- unique(df1$covariate) if(!warped){ covariate_name <- paste(covariate_name, "(unaligned)") } # Collect relevant data N <- get_dimensions(object)[["N"]] W <- get_weights(object)[[view]][,factor] # Get features if (sign=="all") { W <- abs(W) } else if (sign=="positive") { W <- W[W>0] } else if (sign=="negative") { W <- W[W<0] } if (is(features, "numeric")) { if (length(features) == 1) { features <- names(tail(sort(abs(W)), n=features)) } else { features <- names(sort(-abs(W))[features]) } stopifnot(all(features %in% features_names(object)[[view]])) } else if (is(features, "character")) { stopifnot(all(features %in% features_names(object)[[view]])) } else { stop("Features need to be either a numeric or character vector") } # Set group/color/shape if (length(color_by)==1 && is.character(color_by)) color_name <- color_by if (length(shape_by)==1 && is.character(shape_by)) shape_name <- shape_by color_by <- .set_colorby(object, color_by) shape_by <- .set_shapeby(object, shape_by) # Merge factor values with color and shape information df1 <- merge(df1, color_by, by="sample") df1 <- merge(df1, shape_by, by="sample") # Create data frame foo <- list(features); names(foo) <- view if (imputed) { df2 <- get_imputed_data(object, groups = groups, views = view, features = foo, as.data.frame = TRUE) } else { df2 <- get_data(object, groups = groups, features = foo, as.data.frame = TRUE) } df2$sample <- as.character(df2$sample) df <- left_join(df1, df2, by = "sample", suffix = c(".covariate",".data")) # (Q) Remove samples with missing values in Factor values df <- df[!is.na(df$value.covariate) & !is.na(df$value.data) ,] if(return_data){ return(df) } # Set stroke if (is.null(stroke)) { stroke <- .select_stroke(N=length(unique(df$sample))) } # Set Pearson text size if (add_lm && is.null(text_size)) { text_size <- .select_pearson_text_size(N=length(unique(df$feature))) } # Set axis text size axis.text.size <- .select_axis.text.size(N=length(unique(df$feature))) # Generate plot p <- ggplot(df, aes(x = .data[["value.covariate"]], y = .data[["value.data"]])) + geom_point(aes(fill = .data$color_by, shape = .data$shape_by), colour = "black", size = dot_size, stroke = stroke, alpha = alpha) + labs(x=covariate_name, y="") + facet_wrap(~feature, scales="free_y") + theme_classic() + theme( axis.text = element_text(size = rel(axis.text.size), color = "black"), axis.title = element_text(size = rel(1.0), color="black") ) # Add linear regression line if (add_lm) { if (lm_per_group && length(groups)>1) { p <- p + stat_smooth(formula=y~x, aes(color=.data$group), method="lm", alpha=0.4) + ggpubr::stat_cor(aes(color=.data$group, label = .data[["..r.label.."]]), method = "pearson", label.sep="\n", output.type = "latex", size = text_size)# + # guides(color = FALSE) } else { p <- p + stat_smooth(formula=y~x, method="lm", color="grey", fill="grey", alpha=0.4) + ggpubr::stat_cor(method = "pearson", label.sep="\n", output.type = "latex", size = text_size, color = "black") } } # Add legend p <- .add_legend(p, df, legend, color_name, shape_name) return(p) } #' @title Scatterplots of a factor's values against the sample covariates #' @name plot_factors_vs_cov #' @description Scatterplots of a factor's values against the sample covariates #' @param object a trained \code{\link{MOFA}} object using MEFISTO. #' @param factors character or numeric specifying the factor(s) to plot, default is "all" #' @param covariates specifies sample covariate(s) to plot against: #' (1) a character giving the name of a column present in the sample covariates or sample metadata. #' (2) a character giving the name of a feature present in the training data. #' (3) a vector of the same length as the number of samples specifying continuous numeric values per sample. #' Default is the first sample covariates in covariates slot #' @param warped logical indicating whether to show the aligned covariate (default: TRUE), #' only relevant if warping has been used to align multiple sample groups #' @param scale logical indicating whether to scale factor values. #' @param show_missing (for 1-dim covariates) logical indicating whether to include samples for which \code{shape_by} or \code{color_by} is missing #' @param color_by (for 1-dim covariates) specifies groups or values used to color the samples. This can be either: #' (1) a character giving the name of a feature present in the training data. #' (2) a character giving the same of a column present in the sample metadata. #' (3) a vector of the same length as the number of samples specifying discrete groups or continuous numeric values. #' @param shape_by (for 1-dim covariates) specifies groups or values used to shape the samples. This can be either: #' (1) a character giving the name of a feature present in the training data, #' (2) a character giving the same of a column present in the sample metadata. #' (3) a vector of the same length as the number of samples specifying discrete groups. #' @param color_name (for 1-dim covariates) name for color legend. #' @param shape_name (for 1-dim covariates) name for shape legend. #' @param dot_size (for 1-dim covariates) numeric indicating dot size. #' @param alpha (for 1-dim covariates) numeric indicating dot transparency. #' @param stroke (for 1-dim covariates) numeric indicating the stroke size #' @param legend (for 1-dim covariates) logical indicating whether to add legend. #' @param rotate_x (for spatial, 2-dim covariates) Rotate covariate on x-axis #' @param rotate_y (for spatial, 2-dim covariates) Rotate covariate on y-axis #' @param return_data logical indicating whether to return the data frame to plot instead of plotting #' @param show_variance (for 1-dim covariates) logical indicating whether to show the marginal variance of inferred factor values #' (only relevant for 1-dimensional covariates) #' @details To investigate the factors pattern along the covariates (such as time or a spatial coordinate) #' this function an be used to plot a scatterplot of the factor against the values of each covariate #' @return Returns a \code{ggplot2} object #' @import ggplot2 dplyr #' @importFrom stats complete.cases #' @importFrom tidyr spread #' @importFrom magrittr %>% set_colnames #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_factors_vs_cov(model) plot_factors_vs_cov <- function(object, factors = "all", covariates = NULL, warped = TRUE, show_missing = TRUE, scale = FALSE, color_by = NULL, shape_by = NULL, color_name = NULL, shape_name = NULL, dot_size = 1.5, alpha = 1, stroke = NULL, legend = TRUE, rotate_x = FALSE, rotate_y = FALSE, return_data = FALSE, show_variance = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define covariates if (is.null(covariates)) { if (!.hasSlot(object, "covariates") || any(object@dimensions[["C"]] < 1, is.null(object@covariates))) stop("No covariates found in object. Please specify one.") covariates <- covariates_names(object) } # Get factors factors <- .check_and_get_factors(object, factors) Z <- get_factors(object, factors=factors, as.data.frame=TRUE) # Remove samples with missing values Z <- Z[complete.cases(Z),] # Get covariates df <- get_covariates(object, covariates, as.data.frame = TRUE, warped = warped) %>% merge(Z, by="sample", suffixes = c(".covariate",".factor")) # Remember color_name and shape_name if not provided if (!is.null(color_by) && (length(color_by) == 1) && is.null(color_name)) color_name <- color_by if (!is.null(shape_by) && (length(shape_by) == 1) && is.null(shape_name)) shape_name <- shape_by # Set color and shape color_by <- .set_colorby(object, color_by) shape_by <- .set_shapeby(object, shape_by ) # Merge factor values with color and shape information df <- df %>% merge(color_by, by="sample") %>% merge(shape_by, by="sample") %>% mutate(shape_by = as.character(shape_by)) # Remove missing values if (!show_missing) df <- filter(df, !is.na(color_by) && !is.na(shape_by)) # Return data if requested instead of plotting if (return_data) return(df) # Set stroke if (is.null(stroke)) stroke <- .select_stroke(N=length(unique(df$sample))) # Select 1D or 2D plots if (length(covariates) == 1) { # Include marginal variance if (show_variance) { if("E2" %in% names(object@expectations$Z)){ ZZ = object@expectations$Z$E2 ZZ <- reshape2::melt(ZZ, na.rm=TRUE) colnames(ZZ) <- c("sample", "factor", "E2") df <- left_join(df, ZZ, by = c("sample", "factor")) df <- mutate(df, var = E2 - value^2) } else { show_variance <- FALSE warning("No second moments saved in the trained model - variance can not be shown.") } } p <- .plot_factors_vs_cov_1d(df, color_name = color_name, shape_name = shape_name, scale = scale, dot_size = dot_size, alpha = alpha, stroke = stroke, show_variance = show_variance, legend = legend, warped = warped ) } else if (length(covariates) == 2) { p <- .plot_factors_vs_cov_2d(df, scale = scale, rotate_x = rotate_x, rotate_y = rotate_y ) } else { stop("too many covariates provided") } return(p) } .plot_factors_vs_cov_1d <- function(df, color_name = "", shape_name = "", scale = FALSE, dot_size = 1.5, alpha = 1, stroke = 1, show_variance = FALSE, legend = TRUE, warped = TRUE) { # Sanity checks stopifnot(length(unique(df$covariate))==1) covariate_name <- unique(df$covariate) if(!warped){ covariate_name <- paste(covariate_name, "(unaligned)") } # Scale values from 0 to 1 if (scale) { df <- df %>% group_by(factor) %>% mutate(value_scaled = value.factor/max(abs(value.factor))) if(show_variance) df <- mutate(df, var = var/(max(abs(value.factor))^2)) df <- df %>% mutate(value.factor = value_scaled) %>% select(-value_scaled) %>% ungroup } # Generate plot p <- ggplot(df, aes(x=value.covariate, y=value.factor)) + geom_point(aes(fill = .data$color_by, shape = .data$shape_by), colour="black", stroke = stroke, size=dot_size, alpha=alpha) + facet_wrap(~ factor) + theme_classic() + theme( axis.text = element_text(size = rel(0.9), color = "black"), axis.title = element_text(size = rel(1.2), color = "black"), axis.line = element_line(color = "black", linewidth = 0.5), axis.ticks = element_line(color = "black", linewidth = 0.5) ) + xlab(covariate_name) + ylab("factor value") if (show_variance){ p <- p + geom_errorbar(aes(ymin = value - sqrt(var)*1.96, ymax =value + sqrt(var)*1.96), col = "red", alpha = 0.7) } p <- .add_legend(p, df, legend, color_name, shape_name) return(p) } .plot_factors_vs_cov_2d <- function(df, scale = FALSE, rotate_x = FALSE, rotate_y= FALSE) { # Sanity checks stopifnot(length(unique(df$covariate))==2) # pivot covariate values covariates_dt <- df %>% tidyr::pivot_wider(names_from="covariate", values_from="value.covariate") covariates.names <- c(colnames(covariates_dt)[ncol(covariates_dt)-1], colnames(covariates_dt)[ncol(covariates_dt)]) # Scale factor values from 0 to 1 if (scale) { covariates_dt <- covariates_dt %>% group_by(factor) %>% mutate(value.factor = value.factor/max(abs(value.factor))) %>% ungroup } covariates_dt <- mutate(covariates_dt, color_by = value.factor) # for compatibility with .add_legend p <- ggplot(covariates_dt, aes(x=.data[[covariates.names[1]]], y=.data[[covariates.names[2]]], col = .data$color_by)) + geom_point() + scale_color_gradient2() + geom_point(col = "gray", alpha =0.05) + facet_wrap( ~ factor) + coord_fixed() + theme_bw() + theme( axis.text = element_text(size = rel(0.9), color = "black"), axis.title = element_text(size = rel(1.0), color = "black"), axis.line = element_line(color = "black", linewidth = 0.5), axis.ticks = element_line(color = "black", linewidth = 0.5) ) + guides(col = guide_colorbar(title = "Factor value")) if(rotate_x){ p <- p + scale_x_reverse() } if(rotate_y){ p <- p + scale_y_reverse() } return(p) } #' @title Interpolate factors in MEFISTO based on new covariate values #' @name interpolate_factors #' @description Function to interpolate factors in MEFISTO based on new covariate values. #' @param object a \code{\link{MOFA}} object trained with MEFISTO options and a covariate #' @param new_values a matrix containing the new covariate values to inter/extrapolate to. Should be #' in the same format as the covariates used for training. #' @return Returns the \code{\link{MOFA}} with interpolated factor values filled in the corresponding slot (interpolatedZ) #' @details This function requires the functional MEFISTO framework to be used in training. #' Use \code{set_covariates} and specify mefisto_options when preparing the training using \code{prepare_mofa}. #' Currently, only the mean of the interpolation is provided from R. #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' model <- interpolate_factors(model, new_values = seq(0,1.1,0.01)) interpolate_factors <- function(object, new_values) { # TODO check this function # message("We recommend doing interpolation from python where additionally uncertainties are provided for the interpolation.") if(length(object@interpolated_Z) != 0){ warning("Object already contains interpolated factor values, overwriting it.") } # sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (is.null(object@covariates)) stop("'object' does not contain any covariates.") if (is.null(object@mefisto_options)) stop("'object' does have MEFISTO training options.") if (is.null(object@expectations$Sigma)) stop("'object' does not have any expectations of Sigma.") if (!is.numeric(new_values)) stop("'new_values' should be numeric.") # restructure 1d covariate if(is.null(dim(new_values))){ new_values <- matrix(new_values, nrow = 1) } # get kernel parameters ls <- get_lengthscales(object) Kgs <- get_group_kernel(object) s <- get_scales(object) Sigma <- object@expectations$Sigma$E Sigma_inv <- lapply(seq_along(factors_names(object)), function(k) solve(Sigma[k,,])) # all covariates if (!all(sapply(nrow(object@covariates_warped), function(c) nrow(c) == nrow(new_values)))) { stop("Number of covariates in new_values does not match covariates in model") } # get covariates of old and new values if(object@mefisto_options$warping){ old_covariates <- samples_metadata(object)[, paste(covariates_names(object), "warped", sep = "_"), drop = FALSE] %>% t() } else{ old_covariates <- samples_metadata(object)[, covariates_names(object), drop = FALSE] %>% t() } all_covariates <- cbind(new_values, old_covariates) %>% unique.matrix(., MARGIN = 2) old_groups <- as.character(samples_metadata(object)$group) old <- rbind(old_groups, old_covariates) all <- rbind(rep(groups_names(object), each = ncol(all_covariates)), t(apply(all_covariates, 1,function(x) rep(x, object@dimensions$G)))) new <- rbind(rep(groups_names(object), each = ncol(new_values)), t(apply(new_values, 1,function(x) rep(x, object@dimensions$G)))) oldidx <- match(data.frame(old), data.frame(all)) newidx <- match(data.frame(new), data.frame(all)) # get factor values Z <- get_factors(object) %>% Reduce(rbind,.) means <- sapply(seq_along(factors_names(object)), function(k) { if(ls[k] == 0 || s[k] == 0){ means <- matrix( rep(NA, length(new_values) * object@dimensions$G), ncol = 1) } else { Kc_new <- exp(- as.matrix(dist(t(all_covariates))) ^ 2 / (2 * ls[k]^2)) K_new_k <- s[k] * Kgs[[k]] %x% Kc_new mean <- K_new_k[newidx, oldidx] %*% Sigma_inv[[k]] %*% Z[,k] } }) %>% t() res <- lapply(groups_names(object), function(g){ list(mean = means[,new[1,] == g], new_values = new_values, variance = rep(NA, nrow = object@dimensions$K, # variances only provided from python ncol = length(new_values))) }) names(res) <- groups_names(object) object@interpolated_Z <- res return(object) } #' @title Plot covariate alignment across groups #' @name plot_alignment #' @description Function to plot the alignment learnt by MEFISTO for the #' covariate values between different groups #' @param object a \code{\link{MOFA}} object using MEFISTO with warping #' @return ggplot object showing the alignment #' @details This function requires the functional MEFISTO framework to be used in training. #' Use \code{set_covariates} and specify mefisto_options when preparing the training using \code{prepare_mofa}. #' @export #' plot_alignment <- function(object){ # sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (is.null(object@covariates)) stop("'object' does not contain any covariates.") if (is.null(object@mefisto_options)) stop("'object' does have MEFISTO training options.") if (!object@mefisto_options$warping) stop("No warping applied in this MOFA object") df_w <- get_covariates(object, 1, as.data.frame = TRUE, warped = TRUE) df_nw <- get_covariates(object, 1, as.data.frame = TRUE, warped = FALSE) df <- left_join(df_w, df_nw, by = c("sample"), suffix = c(".warped", ".unaligned")) df <- left_join(df, select(samples_metadata(object), group, sample), by = "sample") yname <- object@mefisto_options$warping_ref if(!yname %in% groups_names(object)){ yname <- "reference_value" } ggplot(df, aes(y = value.warped, x = value.unaligned)) + geom_point() + facet_wrap(~group) + theme_bw() + ylab(yname) } #' @title Plot variance explained by the smooth components of the model #' @description This function plots the variance explained by the smooth components (Gaussian processes) underlying the factors in MEFISTO across different views and groups, as specified by the user. #' @name plot_variance_explained_by_covariates #' @param object a \code{\link{MOFA}} object #' @param x character specifying the dimension for the x-axis ("view", "factor", or "group"). #' @param y character specifying the dimension for the y-axis ("view", "factor", or "group"). #' @param split_by character specifying the dimension to be faceted ("view", "factor", or "group"). #' @param factors character vector with a factor name(s), or numeric vector with the index(es) of the factor(s). Default is "all". #' @param min_r2 minimum variance explained for the color scheme (default is 0). #' @param max_r2 maximum variance explained for the color scheme. #' @param compare_total plot corresponding variance explained in total in addition #' @param legend logical indicating whether to add a legend to the plot (default is TRUE). #' @import ggplot2 #' @importFrom cowplot plot_grid #' @importFrom reshape2 melt #' @details Note that this function requires the use of MEFISTO. #' To activate the functional MEFISTO framework, specify mefisto_options when preparing the training using \code{prepare_mofa} #' @return A list of \code{\link{ggplot}} objects (if \code{compare_total} is TRUE) or a single \code{\link{ggplot}} object. #' Consider using cowplot::plot_grid(plotlist = ...) to combine the multiple plots that this function generates. #' @export #' @examples #' # load_model #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_variance_explained_by_covariates(model) #' #' # compare to total variance explained #' plist <- plot_variance_explained_by_covariates(model, compare_total = TRUE) #' cowplot::plot_grid(plotlist = plist) plot_variance_explained_by_covariates <- function(object, factors = "all", x = "view", y = "factor", split_by = NA, min_r2 = 0, max_r2 = NULL, compare_total = FALSE, legend = TRUE){ # Sanity checks if (length(unique(c(x, y, split_by))) != 3) { stop(paste0("Please ensure x, y, and split_by arguments are different.\n", " Possible values are `view`, `group`, and `factor`.")) } # Automatically fill split_by in if (is.na(split_by)) split_by <- setdiff(c("view", "factor", "group"), c(x, y, split_by)) views <- .check_and_get_views(object, "all") groups <- .check_and_get_groups(object, "all") factors <- .check_and_get_factors(object, factors) # Collect relevant expectations W <- get_weights(object, views=views, factors=factors) Z <- get_factors(object, groups=groups, factors=factors) Z_interpol <- lapply(groups, function(g) { if(all(object@covariates_warped[[g]] %in% object@interpolated_Z[[g]]$new_values)){ idx <- match(object@covariates_warped[[g]], object@interpolated_Z[[g]]$new_values) mat <- t(get_interpolated_factors(object, only_mean = TRUE)[[g]]$mean)[idx,] } else { message("No interpolations found in object, recalculating them...") mm_tmp <- object mm_tmp@interpolated_Z <- list() mm_tmp <- interpolate_factors(mm_tmp, mm_tmp@covariates_warped[[g]]) mat <- t(get_interpolated_factors(mm_tmp, only_mean = TRUE)[[g]]$mean) rm(mm_tmp) } mat[is.na(mat)] <- 0 colnames(mat) <- factors_names(object) rownames(mat) <- samples_names(object)[[g]] mat[, factors] }) names(Z_interpol) <- groups Y <- lapply(get_data(object, add_intercept = FALSE)[views], function(view) view[groups]) Y <- lapply(Y, function(x) lapply(x,t)) r2_GP <- lapply(groups, function(g) { tmp_Z <- sapply(views, function(m) { sapply(factors, function(k) { a <- sum((as.matrix(Y[[m]][[g]]) - tcrossprod(Z[[g]][,k], W[[m]][,k]))**2, na.rm = TRUE) b <- sum(Y[[m]][[g]]**2, na.rm = TRUE) return(1 - a/b) }) }) tmp_Z <- matrix(tmp_Z, ncol = length(views), nrow = length(factors)) colnames(tmp_Z) <- views rownames(tmp_Z) <- factors tmp_GP <- sapply(views, function(m) { sapply(factors, function(k) { a <- sum((as.matrix(Y[[m]][[g]]) - tcrossprod(Z_interpol[[g]][,k], W[[m]][,k]))**2, na.rm = TRUE) b <- sum(Y[[m]][[g]]**2, na.rm = TRUE) return(1 - a/b) }) }) tmp_GP <- matrix(tmp_GP, ncol = length(views), nrow = length(factors)) colnames(tmp_GP) <- views rownames(tmp_GP) <- factors return(tmp_GP * 100) # return(pmax(tmp_GP - tmp_Z,0)) }) names(r2_GP) <- groups r2_GP_df <- melt( lapply(r2_GP, function(x) melt(as.matrix(x), varnames = c("factor", "view")) ), id.vars=c("factor", "view", "value") ) colnames(r2_GP_df)[ncol(r2_GP_df)] <- "group" r2_GP_df$factor <- factor(r2_GP_df$factor, levels = factors) r2_GP_df$group <- factor(r2_GP_df$group, levels = groups) r2_GP_df$view <- factor(r2_GP_df$view, levels = views) # Set R2 limits r2_Z <- calculate_variance_explained(object) if (!is.null(min_r2)) r2_GP_df$value[r2_GP_df$valuemax_r2] <- max_r2 } else { max_r2 = max(max(Reduce(c,r2_Z$r2_per_factor)), max(r2_GP_df$value)) } p1 <- ggplot(r2_GP_df, aes(x=.data[[x]], y=.data[[y]])) + geom_tile(aes(fill=.data$value), color="black") + facet_wrap(as.formula(sprintf('~%s',split_by)), nrow=1) + labs(x="", y="", title="") + scale_fill_gradientn(colors=c("gray97","darkblue"), guide="colorbar", limits=c(min_r2,max_r2)) + guides(fill=guide_colorbar("Var. (%)")) + theme( axis.text.x = element_text(size=rel(1.0), color="black"), axis.text.y = element_text(size=rel(1.1), color="black"), axis.line = element_blank(), axis.ticks = element_blank(), panel.background = element_blank(), strip.background = element_blank(), strip.text = element_text(size=rel(1.0)) ) if (!legend) p1 <- p1 + theme(legend.position = "none") # remove facet title if (length(unique(r2_GP_df[,split_by]))==1) p1 <- p1 + theme(strip.text = element_blank()) if(!compare_total){ return(p1) } else{ list(p1 + ggtitle("smooth part"), plot_variance_explained(object, min_r2 = min_r2, max_r2 = max_r2, x= x, y=y, split_by=split_by, factors = factors) + ggtitle("total")) } } ================================================ FILE: R/plot_data.R ================================================ ########################################### ## Functions to visualise the input data ## ########################################### #' @title Plot heatmap of relevant features #' @name plot_data_heatmap #' @description Function to plot a heatmap of the data for relevant features, typically the ones with high weights. #' @param object a \code{\link{MOFA}} object. #' @param factor a string with the factor name, or an integer with the index of the factor. #' @param view a string with the view name, or an integer with the index of the view. Default is the first view. #' @param groups groups to plot. Default is "all". #' @param features if an integer (default), the total number of features to plot based on the absolute value of the weights. #' If a character vector, a set of manually defined features. #' @param annotation_samples annotation metadata for samples (columns). #' Either a character vector specifying columns in the sample metadata, or a data.frame that will be passed to \code{\link[pheatmap]{pheatmap}} as \code{annotation_row} #' @param annotation_features annotation metadata for features (rows). #' Either a character vector specifying columns in the feature metadata, or a data.frame that will be passed to \code{\link[pheatmap]{pheatmap}} as \code{annotation_col} #' @param transpose logical indicating whether to transpose the heatmap. #' Default corresponds to features as rows and samples as columns. #' @param imputed logical indicating whether to plot the imputed data instead of the original data. Default is FALSE. #' @param denoise logical indicating whether to plot a denoised version of the data reconstructed using the MOFA factors. #' @param max.value numeric indicating the maximum value to display in the heatmap (i.e. the matrix values will be capped at \code{max.value} ). #' @param min.value numeric indicating the minimum value to display in the heatmap (i.e. the matrix values will be capped at \code{min.value} ). #' See \code{\link{predict}}. Default is FALSE. #' @param ... further arguments that can be passed to \code{\link[pheatmap]{pheatmap}} #' @details One of the first steps for the annotation of a given factor is to visualise the corresponding weights, #' using for example \code{\link{plot_weights}} or \code{\link{plot_top_weights}}. \cr #' However, one might also be interested in visualising the direct relationship between features and factors, rather than looking at "abstract" weights. \cr #' This function generates a heatmap for selected features, which should reveal the underlying pattern that is captured by the latent factor. \cr #' A similar function for doing scatterplots rather than heatmaps is \code{\link{plot_data_scatter}}. #' @return A \code{\link[pheatmap]{pheatmap}} object #' @importFrom pheatmap pheatmap #' @importFrom utils tail #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_data_heatmap(model, factor = 1, show_rownames = FALSE, show_colnames = FALSE) plot_data_heatmap <- function(object, factor, view = 1, groups = "all", features = 50, annotation_features = NULL, annotation_samples = NULL, transpose = FALSE, imputed = FALSE, denoise = FALSE, max.value = NULL, min.value = NULL, ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(factor)==1) stopifnot(length(view)==1) # Define views, factors and groups groups <- .check_and_get_groups(object, groups) factor <- .check_and_get_factors(object, factor) view <- .check_and_get_views(object, view) # Get weights W <- do.call(rbind, get_weights(object, views=view, factors=factor, as.data.frame = FALSE)) # NOTE: By default concatenate all the groups Z <- lapply(get_factors(object)[groups], function(z) as.matrix(z[,factor])) Z <- do.call(rbind, Z)[,1] Z <- Z[!is.na(Z)] # Get data if (isTRUE(denoise)) { data <- predict(object, views=view, groups=groups)[[1]] } else { if (isTRUE(imputed)) { data <- get_imputed_data(object, view, groups)[[1]] } else { data <- get_data(object, views=view, groups=groups)[[1]] } } # Concatenate groups if (is(data, "list")) { data <- do.call(cbind, data) } # Subset features if (is(features, "numeric")) { if (length(features)==1) { features <- rownames(W)[tail(order(abs(W)), n=features)] } else { features <- rownames(W)[order(-abs(W))[features]] } # Sort features according to the weights features <- names(W[features,])[order(W[features,])] } else if (is(features, "character")) { stopifnot(all(features %in% features_names(object)[[view]])) } else { stop("Features need to be either a numeric or character vector") } data <- data[features,] # Select respective samples data <- data[,names(Z)] # Ignore samples with full missing views data <- data[, apply(data, 2, function(x) !all(is.na(x)))] # By default, sort samples according to the factor values order_samples <- names(sort(Z, decreasing = TRUE)) order_samples <- order_samples[order_samples %in% colnames(data)] data <- data[,order_samples] # Add sample annotations if (!is.null(annotation_samples)) { # Predefined data.frame if (is.data.frame(annotation_samples)) { message("'annotation_samples' provided as a data.frame, please make sure that the rownames match the sample names") if (any(!colnames(data)%in%rownames(annotation_samples))) { stop("There are rownames in annotation_samples that do not correspond to sample names in the model") } annotation_samples <- annotation_samples[colnames(data), , drop = FALSE] # Extract metadata from the sample metadata } else if (is.character(annotation_samples)) { stopifnot(annotation_samples%in%colnames(object@samples_metadata)) # tmp <- tibble::column_to_rownames(object@samples_metadata,"sample")[order_samples,,drop=F] tmp <- object@samples_metadata rownames(tmp) <- tmp$sample tmp$sample <- NULL tmp <- tmp[order_samples,,drop=FALSE] annotation_samples <- tmp[,annotation_samples, drop=FALSE] rownames(annotation_samples) <- rownames(tmp) } else { stop("Input format for 'annotation_samples' not recognised ") } # Convert character columns to factors foo <- sapply(annotation_samples, function(x) is.logical(x) || is.character(x)) if (any(foo)) annotation_samples[,which(foo)] <- lapply(annotation_samples[,which(foo),drop=FALSE], as.factor) } # Add feature annotations if (!is.null(annotation_features)) { stop("'annotation_features' is currently not implemented") } # Transpose the data if (transpose) { data <- t(data) if (!is.null(annotation_samples)) { annotation_features <- annotation_samples annotation_samples <- NULL } if (!is.null(annotation_features)) { annotation_samples <- annotation_features annotation_features <- NULL } } # Cap values if (!is.null(max.value)) data[data>=max.value] <- max.value if (!is.null(min.value)) data[data<=min.value] <- min.value # Plot heatmap pheatmap(data, annotation_row = annotation_features, annotation_col = annotation_samples, ... ) } #' @title Scatterplots of feature values against latent factors #' @name plot_data_scatter #' @description Function to do a scatterplot of features against factor values. #' @param object a \code{\link{MOFA}} object. #' @param factor string with the factor name, or an integer with the index of the factor. #' @param view string with the view name, or an integer with the index of the view. Default is the first view. #' @param groups groups to plot. Default is "all". #' @param features if an integer (default), the total number of features to plot. If a character vector, a set of manually-defined features. #' @param sign can be 'positive', 'negative' or 'all' (default) to show only positive, negative or all weights, respectively. #' @param color_by specifies groups or values (either discrete or continuous) used to color the dots (samples). This can be either: #' \itemize{ #' \item the string "group": dots are coloured with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the same of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #' \item a dataframe with two columns: "sample" and "color" #' } #' @param shape_by specifies groups or values (only discrete) used to shape the dots (samples). This can be either: #' \itemize{ #' \item the string "group": dots are shaped with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the same of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #' \item a dataframe with two columns: "sample" and "shape" #' } #' @param legend logical indicating whether to add a legend #' @param dot_size numeric indicating dot size (default is 5). #' @param text_size numeric indicating text size (default is 5). #' @param stroke numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically). #' @param alpha numeric indicating dot transparency (default is 1). #' @param add_lm logical indicating whether to add a linear regression line for each plot #' @param lm_per_group logical indicating whether to add a linear regression line separately for each group #' @param imputed logical indicating whether to include imputed measurements #' @details One of the first steps for the annotation of factors is to visualise the weights using \code{\link{plot_weights}} or \code{\link{plot_top_weights}}. #' However, one might also be interested in visualising the direct relationship between features and factors, rather than looking at "abstract" weights. \cr #' A similar function for doing heatmaps rather than scatterplots is \code{\link{plot_data_heatmap}}. #' @import ggplot2 # #' @importFrom ggpubr stat_cor #' @importFrom dplyr left_join #' @importFrom utils tail #' @importFrom stats quantile #' @return A \code{\link{ggplot}} object #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_data_scatter(model) plot_data_scatter <- function(object, factor = 1, view = 1, groups = "all", features = 10, sign = "all", color_by = "group", legend = TRUE, alpha = 1, shape_by = NULL, stroke = NULL, dot_size = 2.5, text_size = NULL, add_lm = TRUE, lm_per_group = TRUE, imputed = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(factor)==1) stopifnot(length(view)==1) if (lm_per_group) add_lm = TRUE # Define views, factors and groups groups <- .check_and_get_groups(object, groups) factor <- .check_and_get_factors(object, factor) view <- .check_and_get_views(object, view) # Collect relevant data N <- get_dimensions(object)[["N"]] W <- get_weights(object)[[view]][,factor] if (imputed) { Y <- do.call(cbind, object@imputed_data[[view]][groups]) } else { Y <- do.call(cbind, object@data[[view]][groups]) } # Fetch factors Z <- get_factors(object, factors = factor, groups = groups, as.data.frame = TRUE) Z <- Z[,c("sample","value")] colnames(Z) <- c("sample","x") # Get features if (sign=="all") { W <- abs(W) } else if (sign=="positive") { W <- W[W>0] } else if (sign=="negative") { W <- W[W<0] } if (is(features, "numeric")) { if (length(features) == 1) { features <- names(tail(sort(abs(W)), n=features)) } else { features <- names(sort(-abs(W))[features]) } stopifnot(all(features %in% features_names(object)[[view]])) } else if (is(features, "character")) { stopifnot(all(features %in% features_names(object)[[view]])) } else { stop("Features need to be either a numeric or character vector") } W <- W[features] # Set group/color/shape if (length(color_by)==1 & is.character(color_by)) color_name <- color_by if (length(shape_by)==1 & is.character(shape_by)) shape_name <- shape_by color_by <- .set_colorby(object, color_by) shape_by <- .set_shapeby(object, shape_by) # Merge factor values with color and shape information df1 <- merge(Z, color_by, by="sample") df1 <- merge(df1, shape_by, by="sample") # Create data frame foo <- list(features); names(foo) <- view if (isTRUE(imputed)) { df2 <- get_imputed_data(object, groups = groups, views = view, features = foo, as.data.frame = TRUE) } else { df2 <- get_data(object, groups = groups, features = foo, as.data.frame = TRUE) } df2$sample <- as.character(df2$sample) df <- dplyr::left_join(df1, df2, by = "sample") # (Q) Remove samples with missing values in Factor values df <- df[!is.na(df$value),] # Set stroke if (is.null(stroke)) { stroke <- .select_stroke(N=length(unique(df$sample))) } # Set Pearson text size if (add_lm && is.null(text_size)) { text_size <- .select_pearson_text_size(N=length(unique(df$feature))) } # Set axis text size axis.text.size <- .select_axis.text.size(N=length(unique(df$feature))) # Generate plot p <- ggplot(df, aes(x = .data$x, y = .data$value)) + geom_point(aes(fill = .data$color_by, shape = .data$shape_by), colour = "black", size = dot_size, stroke = stroke, alpha = alpha) + labs(x="Factor values", y="") + facet_wrap(~feature, scales="free_y") + theme_classic() + theme( axis.text = element_text(size = rel(axis.text.size), color = "black"), axis.title = element_text(size = rel(1.0), color="black") ) # Add linear regression line if (add_lm) { if (lm_per_group && length(groups)>1) { p <- p + stat_smooth(formula=y~x, aes(color=.data$group), method="lm", alpha=0.4) + ggpubr::stat_cor(aes(color=.data$group, label = .data[["..r.label.."]]), method = "pearson", label.sep="\n", output.type = "latex", size = text_size)# + # guides(color = "none") } else { p <- p + stat_smooth(formula=y~x, method="lm", color="grey", fill="grey", alpha=0.4) + ggpubr::stat_cor(method = "pearson", label.sep="\n", output.type = "latex", size = text_size, color = "black") } } # Add legend p <- .add_legend(p, df, legend, color_name, shape_name) return(p) } #' @title Overview of the input data #' @name plot_data_overview #' @description Function to do a tile plot showing the missing value structure of the input data #' @param object a \code{\link{MOFA}} object. #' @param covariate (only for MEFISTO) specifies sample covariate to order samples by in the plot. This should be #' a character or a numeric index giving the name or position of a column present in the covariates slot of the object. #' Default is the first sample covariate in covariates slot. \code{NULL} does not order by covariate #' @param colors a vector specifying the colors per view (see example for details). #' @param show_covariate (only for MEFISTO) boolean specifying whether to include the covariate in the plot #' @param show_dimensions logical indicating whether to plot the dimensions of the data (default is TRUE). #' @details This function is helpful to get an overview of the structure of the data. #' It shows the model dimensionalities (number of samples, groups, views and features) #' and it indicates which measurements are missing. #' @import ggplot2 #' @importFrom reshape2 melt # #' @importFrom rlang .data #' @importFrom dplyr mutate left_join #' @return A \code{\link{ggplot}} object #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_data_overview(model) plot_data_overview <- function(object, covariate = 1, colors = NULL, show_covariate = FALSE, show_dimensions = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if(sum(get_dimensions(object)[["N"]]) > 1e4) warning("This function is inefficient with large number of cells...") if (length(object@data)==0) stop("Data not found") M <- get_dimensions(object)[["M"]] G <- get_dimensions(object)[["G"]] if (M==1 & G==1) warning("This function is not useful when there is just one view and one group") # Collect MEFISTO covariates if(!.hasSlot(object, "covariates") || any(object@dimensions[["C"]] < 1, is.null(object@covariates))) covariate <- NULL if (!is.null(covariate)) { if(is.numeric(covariate)){ if(covariate > object@dimensions[["C"]]) stop("Covariate index out of range") covariate <- covariates_names(object)[covariate] } if(!is.character(covariate) | !covariate %in% covariates_names(object)) stop("Covariate misspecified. Please read the documentation") covari <- .set_xax(object, covariate) } # Define colors if (is.null(colors)) { palette <- c("#FF7F50", "#D95F02", "#377EB8", "#E6AB02", "#31A354", "#7570B3", "#E7298A", "#66A61E", "#A6761D", "#666666", "#E41A1C", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#1B9E77") if (M < 18) colors <- palette[seq_len(M)] else colors <- rainbow(M) names(colors) <- views_names(object) } else { if (length(colors) != M) stop("Length of 'colors' does not match the number of views") if(is.null(names(colors))) { names(colors) <- views_names(object) } else { stopifnot(sort(names(colors))==sort(views_names(object))) } } # Define availability binary matrix to indicate whether assay j is profiled in sample i tmp <- lapply(object@data, function(m) sapply(m, function(g) apply(g, 2, function(x) !all(is.na(x))))) ovw <- do.call(cbind, lapply(seq_len(M), function(m) { do.call(rbind, lapply(tmp[[m]], as.data.frame)) })) rownames(ovw) <- object@samples_metadata$sample colnames(ovw) <- views_names(object) ovw$sample <- object@samples_metadata$sample ovw$group <- object@samples_metadata$group # Melt to data.frame to.plot <- reshape2::melt(ovw, id.vars = c("sample", "group"), var=c("view")) if(!is.null(covariate)) { to.plot <- left_join(to.plot, covari, by= "sample") to.plot$sample <- factor(to.plot$sample, levels = unique(to.plot$sample[order(to.plot$covariate_value)])) } else { to.plot$sample <- factor(to.plot$sample, levels = rownames(ovw)) } n <- length(unique(to.plot$sample)) # Add number of samples and features per view/group to.plot$combi <- ifelse(to.plot$value, as.character(to.plot$view), "missing") if (show_dimensions) { to.plot$ntotal <- paste("N=", sapply(object@data[[1]], function(e) ncol(e))[ as.character(to.plot$group) ], sep="") to.plot$ptotal <- paste("D=", sapply(object@data, function(e) nrow(e[[1]]))[ as.character(to.plot$view) ], sep="") if (length(unique(to.plot$group))==1) { to.plot <- mutate(to.plot, view_label = paste(view, ptotal, sep="\n"), group_label = ntotal) } else { to.plot <- mutate(to.plot, view_label = paste(view, ptotal, sep="\n"), group_label = paste(group, ntotal, sep="\n")) } } else { to.plot <- mutate(to.plot, view_label = view, group_label = group) } # Order groups to.plot$group_label <- factor(to.plot$group_label, levels=unique(to.plot$group_label)) # Plot p <- ggplot(to.plot, aes(x=.data$sample, y=.data$view_label, fill=.data$combi)) + geom_tile() + scale_fill_manual(values = c("missing"="grey", colors)) + # xlab(paste0("Samples (N=", n, ")")) + ylab("") + guides(fill = "none") + # facet_wrap(~group_label, scales="free_x", nrow=length(unique(to.plot$view_label))) + facet_wrap(vars(group_label), scales="free_x", nrow=length(unique(to.plot$view_label))) + theme( panel.background = element_rect(fill="white"), text = element_text(size=14), axis.line = element_blank(), axis.ticks = element_blank(), axis.title = element_blank(), axis.text.x = element_blank(), axis.text.y = element_text(color="black"), strip.background = element_blank(), panel.grid = element_blank() ) if(show_covariate){ p2 <- ggplot(to.plot, aes(x=.data$sample, y=.data$covariate_value)) + geom_point(size = 0.5) + theme_bw() +theme( text = element_text(size=10), axis.ticks.x = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), strip.background = element_blank(), strip.text = element_blank() ) + ylab(covariate) + facet_wrap(~group_label, ncol =1, scales="free_x") if(object@dimensions["G"] == 1) { p <- cowplot::plot_grid(p, p2, align = "v", ncol = 1, rel_heights = c(1,0.2) ) } else{ p <- cowplot::plot_grid(p, p2, align = "h", nrow = 1, rel_widths = c(1,1) ) } } return(p) } #' @title Visualize the structure of the data in the terminal #' @name plot_ascii_data #' @description A Fancy printing method #' @param object a \code{\link{MOFA}} object #' @param nonzero a logical value specifying whether to calculate the fraction of non-zero values (non-NA values by default) #' @details This function is helpful to get an overview of the structure of the data as a text output #' @return None #' @export #' @examples #' # Using an existing trained model #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_ascii_data(model) plot_ascii_data <- function(object, nonzero = FALSE) { stopifnot(is(object, "MOFA")) if (!.hasSlot(object, "dimensions") || length(object@dimensions) == 0) stop("Error: dimensions not defined") if (!.hasSlot(object, "status") || length(object@status) == 0) stop("Error: status not defined") vis_lines <- "" lpad <- max(sapply(views_names(object), function(v) nchar(v))) wlim <- max(sapply(groups_names(object), function(v) nchar(v))) igr_sp <- .rep_string(5, " ") s <- 8 # extra lpadding shift w <- max(8, wlim) # width of one block (minus 2 walls) hat <- paste0(" ", .rep_string(w, "_"), " ") walls <- paste0("|", .rep_string(w, " "), "|") ground <- paste0("|", .rep_string(w, "_"), "|") groups_line <- .pad_left(lpad + s, .cpaste(groups_names(object), w+2, collapse = igr_sp)) nsamples_line <- .pad_left(lpad + s, .cpaste(get_dimensions(object)$N, w+2, collapse = igr_sp)) vis_lines <- c(vis_lines, groups_line, nsamples_line) # Calculate percentage of missing values in every view and every group if (nonzero) { content_pct <- lapply(object@data, function(view) sapply(view, function(group) sum(group == 0))) } else { content_pct <- lapply(object@data, function(view) sapply(view, function(group) sum(is.na(group)))) } content_pct <- lapply(seq_len(length(content_pct)), function(m) { paste0(as.character(round(100 - content_pct[[m]] / object@dimensions$N / object@dimensions$D[m] * 100)), sep = "%") }) for (m in seq_len(length(views_names(object)))) { # browser() toprect_line <- .pad_left(lpad + s, paste(.rep_string(get_dimensions(object)$G, hat, collapse = igr_sp))) midrect_line <- .pad_left(lpad + s, paste(.rep_string(get_dimensions(object)$G, walls, collapse = igr_sp))) dfeatures_line <- .pad_left_with(lpad + s, paste(.insert_inside(content_pct[[m]], rep(walls, get_dimensions(object)$G)), collapse = igr_sp), with = paste(c(views_names(object)[m], .cpaste(get_dimensions(object)$D[m], s)), collapse = "")) botrect_line <- .pad_left(lpad + s, paste(.rep_string(get_dimensions(object)$G, ground, collapse = igr_sp))) vis_lines <- c(vis_lines, toprect_line, midrect_line, dfeatures_line, botrect_line) } cat(paste(vis_lines, collapse = "\n")) cat("\n\n") } .rep_string <- function(times, string, collapse = "") { paste(replicate(times, string), collapse = collapse) } .pad_left_with <- function(len, string, with = "") { wlen <- nchar(with) len <- max(len - wlen, 0) paste0(with, paste(replicate(len, " "), collapse = ""), string) } .pad_left <- function(len, string) { .pad_left_with(len, string, with = "") } .insert_inside <- function(values, boxes) { sapply(seq_len(length(boxes)), function(i) { box <- boxes[i] v <- values[i] paste0(substr(box, 1, 1), .cpaste(v, nchar(box) - 2), substr(box, length(box), length(box))) }) } # Center and paste .cpaste <- function(vals, cwidth, collapse = "") { vals <- sapply(vals, function(e) { e <- toString(e) lendiff <- cwidth - nchar(e) if (lendiff > 1) { paste0(.rep_string(ceiling(lendiff / 2), " "), e, .rep_string(floor(lendiff / 2), " ")) } else { e } }) paste(vals, collapse = collapse) } # Function to define the axis text size for plot_data_scatter .select_axis.text.size <- function(N) { if (N>=4) { return(0.5) } else if (N>=2 & N<4) { return(0.6) } else if (N==1) { return(0.8) } } # Function to define the text size for the pearson correlation coefficient .select_pearson_text_size <- function(N) { if (N>=4) { return(3) } else if (N>=2 & N<4) { return(4) } else if (N==1) { return(5) } } ================================================ FILE: R/plot_factors.R ================================================ ########################################### ## Functions to visualise latent factors ## ########################################### #' @title Beeswarm plot of factor values #' @name plot_factor #' @description Beeswarm plot of the latent factor values. #' @param object a trained \code{\link{MOFA}} object. #' @param factors character vector with the factor names, or numeric vector with the indices of the factors to use, or "all" to plot all factors. #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups. #' @param scale logical indicating whether to scale factor values. #' @param group_by specifies grouping of samples: #' \itemize{ #' \item (default) the string "group": in this case, the plot will color samples with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the name of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #'} #' @param color_by specifies color of samples. This can be either: #' \itemize{ #' \item (default) the string "group": in this case, the plot will color the dots with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the name of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #' } #' @param shape_by specifies shape of samples. This can be either: #' \itemize{ #' \item (default) the string "group": in this case, the plot will shape the dots with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the name of a column in the sample metadata slot #' \item a vector of the same length as the number of samples specifying the value for each sample. #' } #' @param add_dots logical indicating whether to add dots. #' @param add_violin logical indicating whether to add violin plots #' @param add_boxplot logical indicating whether to add box plots #' @param dodge logical indicating whether to dodge the dots (default is FALSE). #' @param show_missing logical indicating whether to remove samples for which \code{shape_by} or \code{color_by} is missing. #' @param dot_size numeric indicating dot size. #' @param dot_alpha numeric indicating dot transparency. #' @param violin_alpha numeric indicating violin plot transparency. #' @param color_violin logical indicating whether to color violin plots. #' @param boxplot_alpha numeric indicating boxplot transparency. #' @param color_boxplot logical indicating whether to color box plots. #' @param color_name name for color legend (usually only used if color_by is not a character itself). #' @param shape_name name for shape legend (usually only used if shape_by is not a character itself). #' @param stroke numeric indicating the stroke size (the black border around the dots). #' @param legend logical indicating whether to add a legend to the plot (default is TRUE). #' @param rasterize logical indicating whether to rasterize the plot (default is FALSE). #' @details One of the main steps for the annotation of factors is to visualise and color them using known covariates or phenotypic data. \cr #' This function generates a Beeswarm plot of the sample values in a given latent factor. \cr #' Similar functions are \code{\link{plot_factors}} for doing scatter plots. #' @return Returns a \code{ggplot2} #' @import ggplot2 grDevices #' @importFrom stats complete.cases #' @importFrom forcats fct_na_value_to_level #' @importFrom RColorBrewer brewer.pal #' @importFrom dplyr summarise group_by #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Plot Factors 1 and 2 and colour by "group" #' plot_factor(model, factors = c(1,2), color_by="group") #' #' # Plot Factor 3 and colour by the value of a specific feature #' plot_factor(model, factors = 3, color_by="feature_981_view_1") #' #' # Add violin plots #' plot_factor(model, factors = c(1,2), color_by="group", add_violin = TRUE) #' #' # Scale factor values from -1 to 1 #' plot_factor(model, factors = c(1,2), scale = TRUE) #' plot_factor <- function(object, factors = 1, groups = "all", group_by = "group", color_by = "group", shape_by = NULL, add_dots = TRUE, dot_size = 2, dot_alpha = 1, add_violin = FALSE, violin_alpha = 0.5, color_violin = TRUE, add_boxplot = FALSE, boxplot_alpha = 0.5, color_boxplot = TRUE, show_missing = TRUE, scale = FALSE, dodge = FALSE, color_name = "", shape_name = "", stroke = NULL, legend = TRUE, rasterize = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define factors factors <- .check_and_get_factors(object, factors) # Get factor values Z <- get_factors(object, factors=factors, groups = groups, as.data.frame=TRUE) Z$factor <- factor(Z$factor, levels=factors) # Set group/color/shape if (length(color_by)==1 & is.character(color_by)) color_name <- color_by if (length(shape_by)==1 & is.character(shape_by)) shape_name <- shape_by object_group_by <- .set_groupby(object, group_by) object_color_by <- .set_colorby(object, color_by) object_shape_by <- .set_shapeby(object, shape_by) # Remove samples with missing values Z <- Z[complete.cases(Z),] # Merge factor values with group/color/shape information df <- merge(Z, object_group_by, by="sample") df <- merge(df, object_color_by, by="sample") df <- merge(df, object_shape_by, by="sample") # QC if (length(unique(df$color_by))>10 & is.numeric(df$color_by)) { add_violin <- FALSE add_boxplot <- FALSE dodge <- FALSE } if (length(unique(df$shape_by))>5) { warning("Maximum number of shapes is 5") df$shape_by <- "1" } # if (all(unique(df$color_by)==unique(df$group_by))) dodge <- TRUE # Remove samples with no sample metadata if (!show_missing) df <- filter(df, !is.na(color_by) & !is.na(shape_by)) if (is.factor(df$color_by)) df$color_by <- forcats::fct_na_value_to_level(df$color_by) if (is.factor(df$shape_by)) df$shape_by <- forcats::fct_na_value_to_level(df$shape_by) # Scale values if (scale) df$value <- df$value/max(abs(df$value)) # Generate plot p <- ggplot(df, aes(x=.data$group_by, y=.data$value, fill=.data$color_by, shape=.data$shape_by)) + theme_classic() # Define facets as factors or groups if (length(factors) == 1) { p <- p + facet_wrap(~group_by, nrow=1, scales="free_x") + labs(x=group_by, y=as.character(factors)) if (length(unique(df$group_by))==1) p <- p + theme(strip.text = element_blank()) # remove facet title } else { p <- p + facet_wrap(~factor, nrow=1, scales="free_x") + labs(x=group_by, y="Factor value") if (length(unique(df$factor))==1) p <- p + theme(strip.text = element_blank()) # remove facet title } # Add dots if (add_dots) { # Set stroke if (is.null(stroke)) stroke <- .select_stroke(N=length(unique(df$sample))) if (rasterize) { warning("geom_jitter is not available with rasterize==TRUE. We use instead ggrastr::geom_quasirandom_rast()") if (dodge) { p <- p + ggrastr::geom_quasirandom_rast(size = dot_size, position = "dodge", stroke = stroke, alpha = dot_alpha, dodge.width = 1) } else { p <- p + ggrastr::geom_quasirandom_rast(size = dot_size, stroke = stroke, alpha = dot_alpha) } } else { if (dodge) { p <- p + geom_jitter(colour = "black", size = dot_size, stroke = stroke, alpha = dot_alpha, position = position_jitterdodge(dodge.width=1, jitter.width=0.2)) } else { p <- p + geom_jitter(colour = "black", size = dot_size, stroke = stroke, alpha = dot_alpha) } } } # Add violin plot if (add_violin) { if (color_violin && dodge) { tmp <- summarise(group_by(df, factor, color_by), n=n()) if (min(tmp$n)==1) { warning("Warning: some 'color_by' groups have only one observation, violin plots cannot be added. Adding boxplots instead...") add_boxplot <- TRUE # p <- p + geom_violin(color="black", fill="grey", alpha=violin_alpha, trim=TRUE, scale="width", show.legend = FALSE) # p <- p + geom_violin(color="black", alpha=violin_alpha, trim=TRUE, scale="width", show.legend = FALSE) } else { p <- p + geom_violin(alpha=violin_alpha, trim=TRUE, scale="width", position=position_dodge(width=1)) } # p <- p + geom_violin(color="black", alpha=violin_alpha, trim=TRUE, scale="width", position=position_dodge(width=1), show.legend = FALSE) } else { p <- p + geom_violin(color="black", fill="grey", alpha=violin_alpha, trim=TRUE, scale="width", show.legend = FALSE) } } # Add boxplot plot if (add_boxplot) { if (color_boxplot && dodge) { tmp <- summarise(group_by(df, factor, color_by), n=n()) # if (min(tmp$n)==1) { # warning("Warning: some 'color_by' groups have only one observation, boxplot plots cannot be coloured") # p <- p + geom_boxplot(color="black", alpha=boxplot_alpha, show.legend = FALSE) # } else { # p <- p + geom_boxplot(alpha=boxplot_alpha, position=position_dodge(width=1), show.legend = FALSE) # } p <- p + geom_boxplot(color="black", alpha=boxplot_alpha, position=position_dodge(width=1)) } else { p <- p + geom_boxplot(color="black", fill="grey", alpha=boxplot_alpha, show.legend = FALSE) } } # Add theme p <- p + geom_hline(yintercept=0, linetype="dashed", linewidth=0.2, alpha=0.5) + theme( panel.border = element_rect(color="black", linewidth=0.1, fill=NA), strip.background = element_rect(colour = "black", linewidth=0.25), panel.spacing = unit(0,"lines"), # axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), axis.text = element_text(size=rel(0.75), color="black"), axis.title.x = element_blank(), axis.title.y = element_text(size=rel(1.0), color="black"), axis.line = element_line(color="black", linewidth=0.25), axis.ticks = element_line(color = "black") ) if (length(unique(df$factor))>1) { # p <- p + scale_y_continuous(breaks=NULL) } else { # Remove strip labels for groups, they are labelled along X axis if (isFALSE(dodge)) { p <- p + theme(strip.text.x = element_blank()) } } # If group_by has a single value, remove text if (length(unique(df$group_by))==1) { p <- p + theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) } # Add legend p <- .add_legend(p, df, legend, color_name, shape_name) return(p) } #' @title Scatterplots of two factor values #' @name plot_factors #' @description Scatterplot of the values of two latent factors. #' @param object a trained \code{\link{MOFA}} object. #' @param factors a vector of length two with the factors to plot. Factors can be specified either as a characters #' @param groups character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups. #' @param show_missing logical indicating whether to include samples for which \code{shape_by} or \code{color_by} is missing #' @param scale logical indicating whether to scale factor values. #' @param color_by specifies groups or values used to color the samples. This can be either: #' (1) a character giving the name of a feature present in the training data. #' (2) a character giving the name of a column present in the sample metadata. #' (3) a vector of the same length as the number of samples specifying discrete groups or continuous numeric values. #' @param shape_by specifies groups or values used to shape the samples. This can be either: #' (1) a character giving the name of a feature present in the training data, #' (2) a character giving the name of a column present in the sample metadata. #' (3) a vector of the same length as the number of samples specifying discrete groups. #' @param color_name name for color legend. #' @param shape_name name for shape legend. #' @param dot_size numeric indicating dot size (default is 2). #' @param stroke numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically). #' @param alpha numeric indicating dot transparency (default is 1). #' @param legend logical indicating whether to add legend. #' @param return_data logical indicating whether to return the data frame to plot instead of plotting #' @details One of the first steps for the annotation of factors is to visualise and group/color them using known covariates such as phenotypic or clinical data. #' This method generates a single scatterplot for the combination of two latent factors. #' TO-FINISH... #' \code{\link{plot_factors}} for doing Beeswarm plots for factors. #' @return Returns a \code{ggplot2} object #' @import ggplot2 dplyr #' @importFrom stats complete.cases #' @importFrom tidyr spread #' @importFrom magrittr %>% set_colnames #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Scatterplot of factors 1 and 2 #' plot_factors(model, factors = c(1,2)) #' #' # Shape dots by a column in the metadata #' plot_factors(model, factors = c(1,2), shape_by="group") #' #' # Scale factor values from -1 to 1 #' plot_factors(model, factors = c(1,2), scale = TRUE) #' plot_factors <- function(object, factors = c(1, 2), groups = "all", show_missing = TRUE, scale = FALSE, color_by = NULL, shape_by = NULL, color_name = NULL, shape_name = NULL, dot_size = 2, alpha = 1, legend = TRUE, stroke = NULL, return_data = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # If plotting one or multiple factors, re-direct to other functions if (length(unique(factors)) == 1) { .args <- as.list(match.call()[-1]) .args <- .args[names(.args) != "factors"] return(do.call(plot_factor, c(.args, list(factors = unique(factors))))) } else if (length(factors) > 2) { .args <- as.list(match.call()[-1]) p <- do.call(.plot_multiple_factors, .args) return(p) } # Remember color_name and shape_name if not provided if (!is.null(color_by) && (length(color_by) == 1) && is.null(color_name)) color_name <- color_by if (!is.null(shape_by) && (length(shape_by) == 1) && is.null(shape_name)) shape_name <- shape_by # Define factors factors <- .check_and_get_factors(object, factors) # Get factors Z <- get_factors(object, factors=factors, groups = groups, as.data.frame=TRUE) # Set color and shape color_by <- .set_colorby(object, color_by) shape_by <- .set_shapeby(object, shape_by) # Remove samples with missing values Z <- Z[complete.cases(Z),] # Merge factor values with color and shape information df <- merge(Z, color_by, by="sample") df <- merge(df, shape_by, by="sample") df$shape_by <- as.character(df$shape_by) # Remove missing values if(isFALSE(show_missing)) df <- filter(df, !is.na(color_by) & !is.na(shape_by)) # spread over factors df <- spread(df, key="factor", value="value") df <- df[,c(colnames(df)[seq_len(4)], factors)] df <- set_colnames(df, c(colnames(df)[seq_len(4)], "x", "y")) # Scale values from 0 to 1 if (scale) { df$x <- df$x/max(abs(df$x)) df$y <- df$y/max(abs(df$y)) } # Return data if requested instead of plotting if (return_data) return(df) # Set stroke if (is.null(stroke)) { stroke <- .select_stroke(N=length(unique(df$sample))) } # Generate plot p <- ggplot(df, aes(x=.data$x, y=.data$y, fill=.data$color_by, shape=.data$shape_by)) + geom_point(size=dot_size, alpha=alpha, stroke = stroke) + labs(x=factors[1], y=factors[2]) + theme_classic() + theme( axis.text = element_text(size = rel(0.8), color = "black"), axis.title = element_text(size = rel(1.1), color = "black"), axis.line = element_line(color = "black", linewidth = 0.5), axis.ticks = element_line(color = "black", linewidth = 0.5) ) p <- .add_legend(p, df, legend, color_name, shape_name) # Fix legend labels if (!is.null(color_name)) { p <- p + labs(fill = color_name) } if (!is.null(shape_name)) { p <- p + labs(shape = shape_name) } return(p) } # Plot multiple factors as pairwise scatterplots #' @importFrom stats complete.cases .plot_multiple_factors <- function(object, factors = "all", show_missing = TRUE, dot_size = 1, color_by = NULL, color_name = "", shape_by = NULL, shape_name = "", legend = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define factors factors <- .check_and_get_factors(object, factors) # Collect relevant data Z <- get_factors(object, factors=factors, as.data.frame=TRUE) # Set color and shape color_by <- .set_colorby(object, color_by) shape_by <- .set_shapeby(object, shape_by) # Remove samples with missing factor values Z <- Z[complete.cases(Z),] # Merge factor values with color and shape information df <- merge(Z, color_by, by="sample") df <- merge(df, shape_by, by="sample") # Remove missing values if(!show_missing) df <- filter(df, !is.na(color_by) & !is.na(shape_by)) # Spread over factors df <- tidyr::spread(df, key="factor", value="value") # Prepare the legend p_legend <- ggplot(df, aes(x=.data[[factors[1]]], y=.data[[factors[2]]], color=.data$color_by, shape=.data$shape_by)) + geom_point() + theme( legend.key = element_rect(fill = "white"), legend.text = element_text(size=rel(1.2)), legend.title = element_text(size=rel(1.2)) ) colorscale <- NULL if (length(unique(df$color))>1 && isTRUE(legend)) { p_legend <- p_legend + labs(color=color_name) } else { p_legend <- p_legend + guides(color="none") colorscale <- scale_color_manual(values="black") } if (length(unique(df$color))>1 && isFALSE(legend)) { p_legend <- p_legend + guides(color="none") } if (length(unique(df$shape))>1) { p_legend <- p_legend + labs(shape=shape_name) } else { p_legend <- p_legend + guides(shape = "none") } if (is.numeric(df$color)) colorscale <- scale_color_gradientn(colors=colorRampPalette(rev(brewer.pal(n=5, name="RdYlBu")))(10)) # Apply scale early so ggpairs builds correct legend if (!is.null(colorscale)) p_legend <- p_legend + colorscale # Grab legend if needed if ((length(unique(df$color))>1 || length(unique(df$shape))>1) && isTRUE(legend)) { legend <- GGally::grab_legend(p_legend) } else { legend <- NULL } # Generate the final plot p <- GGally::ggpairs(df, columns = factors, lower = list(continuous=GGally::wrap("points", size=dot_size)), diag = list(continuous='densityDiag'), upper = list(continuous=GGally::wrap("points", size=dot_size)), mapping = aes(color=.data$color_by, shape=.data$shape_by), title = "", legend = legend ) p <- p + theme_bw() + theme( panel.grid.major = element_blank(), axis.ticks = element_blank(), axis.text = element_blank() ) # Apply colorscale to panels for consistency if (!is.null(colorscale)) p <- p + colorscale return(p) } #' @title Plot correlation matrix between latent factors #' @name plot_factor_cor #' @description Function to plot the correlation matrix between the latent factors. #' @param object a trained \code{\link{MOFA}} object. #' @param method a character indicating the type of correlation coefficient to be computed: pearson (default), kendall, or spearman. #' @param ... arguments passed to \code{\link[corrplot]{corrplot}} #' @details This method plots the correlation matrix between the latent factors. \cr #' The model encourages the factors to be uncorrelated, so this function usually yields a diagonal correlation matrix. \cr #' However, it is not a hard constraint such as in Principal Component Analysis and correlations between factors can occur, #' particularly with large number factors. \cr #' Generally, correlated factors are redundant and should be avoided, as they make interpretation harder. Therefore, #' if you have too many correlated factors we suggest you try reducing the number of factors. #' @return Returns a symmetric matrix with the correlation coefficient between every pair of factors. # #' @importFrom corrplot corrplot #' @importFrom corrplot corrplot #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Plot correlation between all factors #' plot_factor_cor(model) #' plot_factor_cor <- function(object, method = "pearson", ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Fetch factors Z <- get_factors(object) # Compute and plot correlation r <- abs(cor(x=do.call(rbind, Z), y=do.call(rbind, Z), method=method, use = "complete.obs")) corrplot(r, tl.col = "black", ...) } ================================================ FILE: R/plot_weights.R ================================================ ######################################## ## Functions to visualise the weights ## ######################################## #' @title Plot heatmap of the weights #' @name plot_weights_heatmap #' @description Function to visualize the weights for a given set of factors in a given view. \cr #' This is useful to visualize the overall pattern of the weights but not to individually characterise the factors. \cr #' To inspect the weights of individual factors, use the functions \code{\link{plot_weights}} and \code{\link{plot_top_weights}} #' @param object a trained \code{\link{MOFA}} object. #' @param view character vector with the view name(s), or numeric vector with the index of the view(s) to use. #' Default is the first view. #' @param features character vector with the feature name(s), or numeric vector with the index of the feature(s) to use. #' Default is 'all'. #' @param factors character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. #' Default is 'all'. #' @param threshold threshold on absolute weight values, so that weights with a magnitude below this threshold (in all factors) are removed #' @param ... extra arguments passed to \code{\link[pheatmap]{pheatmap}}. #' @importFrom pheatmap pheatmap #' @return A \code{\link{pheatmap}} object #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' plot_weights_heatmap(model) plot_weights_heatmap <- function(object, view = 1, features = "all", factors = "all", threshold = 0, ...) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (is.numeric(view)) view <- views_names(object)[view] stopifnot(all(view %in% views_names(object))) # Define factors factors <- .check_and_get_factors(object, factors) # Define features if (paste(features, collapse="") =="all") { features <- features_names(object)[[view]] } else if (is.numeric(features)) { features <- features_names(object)[[view]][features] } else { stopifnot(all(features %in% features_names(object)[[view]])) } # Get relevant data W <- get_weights(object, views=view, factors=factors)[[1]][features,] # apply thresholding of weights W <- W[!apply(W,1,function(r) all(abs(r)1 || length(unique(df$shape_by))>1) && legend) { p <- p + labs(color = name_color, shape = name_shape) + theme( legend.key = element_rect(fill = "white"), legend.text = element_text(size=16), legend.title = element_text(size=16) ) } else { p <- p + theme( legend.position = "none" ) } return(p) } #' @title Plot distribution of feature weights (weights) #' @name plot_weights #' @description An important step to annotate factors is to visualise the corresponding feature weights. \cr #' This function plots all weights for a given latent factor and view, labeling the top ones. \cr #' In contrast, the function \code{\link{plot_top_weights}} displays only the top features with highest loading. #' @param object a \code{\link{MOFA}} object. #' @param view a string with the view name, or an integer with the index of the view. #' @param factors character vector with the factor name(s), or numeric vector with the index of the factor(s). #' @param nfeatures number of top features to label. #' @param color_by specifies groups or values (either discrete or continuous) used to color the dots (features). This can be either: #' \itemize{ #' \item (default) the string "group": in this case, the plot will color the dots with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the same of a column in the features metadata slot #' \item a vector of the same length as the number of features specifying the value for each feature #' \item a dataframe with two columns: "feature" and "color" #' } #' @param shape_by specifies groups or values (only discrete) used to shape the dots (features). This can be either: #' \itemize{ #' \item (default) the string "group": in this case, the plot will shape the dots with respect to their predefined groups. #' \item a character giving the name of a feature that is present in the input data #' \item a character giving the same of a column in the features metadata slot #' \item a vector of the same length as the number of features specifying the value for each feature #' \item a dataframe with two columns: "feature" and "shape" #' } #' @param abs logical indicating whether to take the absolute value of the weights. #' @param manual A nested list of character vectors with features to be manually labelled (see the example for details). #' @param color_manual a character vector with colors, one for each element of 'manual' #' @param scale logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if abs=TRUE). #' @param dot_size numeric indicating the dot size. #' @param text_size numeric indicating the text size. #' @param legend logical indicating whether to add legend. #' @param return_data logical indicating whether to return the data frame to plot instead of plotting #' @import ggplot2 dplyr tidyr #' @importFrom magrittr %>% #' @importFrom ggrepel geom_text_repel #' @return A \code{\link{ggplot}} object or a \code{data.frame} if return_data is TRUE #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Plot distribution of weights for Factor 1 and View 1 #' plot_weights(model, view = 1, factors = 1) #' #' # Plot distribution of weights for Factors 1 to 3 and View 1 #' plot_weights(model, view = 1, factors = 1:3) #' #' # Take the absolute value and highlight the top 10 features #' plot_weights(model, view = 1, factors = 1, nfeatures = 10, abs = TRUE) #' #' # Change size of dots and text #' plot_weights(model, view = 1, factors = 1, text_size = 5, dot_size = 1) #' plot_weights <- function(object, view = 1, factors = 1, nfeatures = 10, color_by = NULL, shape_by = NULL, abs = FALSE, manual = NULL, color_manual = NULL, scale = TRUE, dot_size = 1, text_size = 5, legend = TRUE, return_data = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(view)==1) # Get views view <- .check_and_get_views(object, view) # Get factor names factors <- .check_and_get_factors(object, factors) # Collect expectations W <- get_weights(object, views = view, factors = factors, as.data.frame = TRUE) # Convert factor names to a factor to preserve order W$factor <- factor(W$factor, levels = unique(factors)) ################ ## Parse data ## ################ # Scale values if (scale && sum(W$value>0)>0) W$value <- W$value / max(abs(W$value)) # Take the absolute value if (abs) W$value <- abs(W$value) # Define groups for labelling W$labelling_group <- "0" # Define group of features to color according to the loading if (is.null(manual) & nfeatures>0) { for (f in factors) { features <- W[W$factor==f,] %>% group_by(view) %>% top_n(n=nfeatures, abs(value)) %>% .$feature W[(W$feature %in% features) & (W$factor==f), "labelling_group"] <- "1" } } # Define group of features to label manually if(!is.null(manual)) { if (is.null(color_manual)) { if (length(manual)>1) { # color_manual <- hcl(h = seq(15, 375, length=length(manual)+1), l=65, c=100)[seq_len(length(manual))] color_manual <- RColorBrewer::brewer.pal(n=length(manual)+1, "Dark2") } else { color_manual <- "black" } } else { stopifnot(length(color_manual)==length(manual)) } # Add labelling group (0 for non-labelled, >= 1 for labelled) for (m in seq_len(length(manual))) W$labelling_group[W$feature %in% manual[[m]]] <- as.character(m+1) } # Make features names unique W$feature_id <- W$feature if ((length(unique(W$view)) > 1) && (nfeatures > 0) && (any(duplicated(W[W$factor == factors[1],]$feature_id)))) { message("Duplicated feature names across views, we will add the view name as a prefix") W$feature_id <- paste(W$view, W$feature, sep="_") } # labelling_indicator is TRUE for labelled, FALSE for non-labelled W$labelling_indicator <- as.factor(W$labelling_group != "0") # Set color and shape if (length(color_by)==1 & is.character(color_by)) color_name <- color_by if (length(shape_by)==1 & is.character(shape_by)) shape_name <- shape_by obj_color_by <- .set_colorby_features(object, color_by, view) obj_shape_by <- .set_shapeby_features(object, shape_by, view) # Merge factor values with group/color/shape information W <- merge(W, obj_color_by, by=c("feature", "view")) W <- merge(W, obj_shape_by, by=c("feature", "view")) # Sort features by weight W <- by(W, list(W$factor), function(x) x[order(x$value),]) W <- do.call(rbind, W) # In order to re-order features across multiple factors, make them unique for different factors W$feature_id <- paste(W$feature_id, W$factor, sep="_") W$feature_id <- factor(W$feature_id, levels = unique(W$feature_id)) # Return data if requested instead of plotting if (return_data) return(W) # Generate plot p <- ggplot(W, aes(x = .data$value, y = .data$feature_id, col = .data$labelling_group)) + scale_y_discrete(expand = c(0.03,0.03)) + geom_point(aes(shape = .data$shape_by, size=.data$labelling_indicator)) + labs(x="Weight", y="Rank", size=dot_size) # Add labels to the top features if (nfeatures>0 || length(unique(W$labelling_group))>0) { p <- p + geom_text_repel( data = W[W$labelling_group != "0",], aes(label = .data$feature, col = .data$labelling_group), size = text_size, segment.alpha = 0.25, segment.color = "black", segment.size = 0.3, show.legend = FALSE, max.overlaps = Inf) } # Configure axis if (scale) { if (abs) { p <- p + coord_cartesian(xlim=c(0,1)) + scale_x_continuous(breaks=c(0,1)) + expand_limits(x=c(0,1)) } else { p <- p + coord_cartesian(xlim=c(-1,1)) + scale_x_continuous(breaks=c(-1,0,1)) + expand_limits(x=c(-1,1)) } } # Define dot size p <- p + scale_size_manual(values=c(dot_size/2,dot_size*2)) + guides(size = "none") # Define dot colours and legend for colours if (!is.null(color_by)) { p <- p + labs(color=color_name) } else { foo <- c("grey","black",color_manual); names(foo) <- as.character(0:(length(foo)-1)) p <- p + guides(color="none") + scale_color_manual(values=foo) } # Add legend for shape if (!is.null(shape_by)) { p <- p + labs(shape=shape_name) } else { p <- p + guides(shape="none") } # Facet if multiple factors if (length(unique(W$factor)) > 1) { p <- p + facet_wrap(~factor, nrow=1, scales="free") } # Add Theme p <- p + theme_bw() + theme( plot.title = element_text(size=rel(1.3), hjust=0.5), axis.title = element_text(size=rel(1.3), color="black"), axis.text.x = element_text(size=rel(1.3), color="black"), axis.text.y = element_blank(), axis.ticks.y = element_blank(), # facets strip.text = element_text(size=rel(1.2)), panel.spacing = unit(1,"lines"), # gridlines panel.grid.major.y = element_blank(), ) # Configure the legend if (legend) { p <- p + theme( legend.text = element_text(size=rel(1.2)), legend.title = element_text(size=rel(1.2)) ) } else { p <- p + theme(legend.position = "none") } return(p) } #' @title Plot top weights #' @name plot_top_weights #' @description Plot top weights for a given factor and view. #' @param object a trained \code{\link{MOFA}} object. #' @param view a string with the view name, or an integer with the index of the view. #' @param factors a character string with factors names, or an integer vector with factors indices. #' @param nfeatures number of top features to display. #' Default is 10 #' @param abs logical indicating whether to use the absolute value of the weights (Default is FALSE). #' @param sign can be 'positive', 'negative' or 'all' to show only positive, negative or all weights, respectively. Default is 'all'. #' @param scale logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if abs=TRUE). Default is TRUE. #' @details An important step to annotate factors is to visualise the corresponding feature weights. \cr #' This function displays the top features with highest loading whereas the function \code{\link{plot_weights}} plots all weights for a given latent factor and view. \cr #' Importantly, the weights of the features within a view have relative values and they should not be interpreted in an absolute scale. #' Therefore, for interpretability purposes we always recommend to scale the weights with \code{scale=TRUE}. #' @import ggplot2 #' @importFrom dplyr group_by top_n desc #' @return Returns a \code{ggplot2} object #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Plot top weights for Factors 1 and 2 and View 1 #' plot_top_weights(model, view = 1, factors = c(1,2)) #' #' # Do not take absolute value #' plot_weights(model, abs = FALSE) #' plot_top_weights <- function(object, view = 1, factors = 1, nfeatures = 10, abs = TRUE, scale = TRUE, sign = "all") { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (nfeatures <= 0) stop("'nfeatures' has to be greater than 0") if (sign=="all") { abs <- TRUE} if (is.numeric(view)) view <- views_names(object)[view] stopifnot(view %in% views_names(object)) # Get views view <- .check_and_get_views(object, view) # Get factor names factors <- .check_and_get_factors(object, factors) # Collect expectations W <- get_weights(object, factors = factors, views = view, as.data.frame=TRUE) # Scale values by weight with highest (absolute) value if (scale) W$value <- W$value/max(abs(W$value)) # Store sign W <- W[W$value!=0,] W$sign <- ifelse(W$value>0, "+", "-") # Select subset of only positive or negative weights if (sign=="positive") { W <- W[W$value>0,] } else if (sign=="negative") { W <- W[W$value<0,] } # Absolute value if (abs) W$value <- abs(W$value) # Extract relevant features W <- W[with(W, order(-abs(value))), ] # Sort according to weights for each factor W <- as.data.frame(top_n(group_by(W, factor), n = nfeatures, wt = value)) # # Make features names unique W$feature_id <- W$feature if ((length(unique(W$view)) > 1) && (nfeatures > 0) && (any(duplicated(W[W$factor == factors[1],]$feature_id)))) { message("Duplicated feature names across views, we will add the view name as a prefix") W$feature_id <- paste(W$view, W$feature, sep="_") } # In order to re-order features across multiple factors, # make them unique for different factors W$feature_id <- factor(W$feature_id, levels = rev(unique(W$feature_id))) p <- ggplot(W, aes(x=.data$feature_id, y=.data$value)) + geom_point(size=2) + geom_segment(aes(xend=.data$feature_id), linewidth=0.75, yend=0) + scale_colour_gradient(low="grey", high="black") + coord_flip() + labs(y="Weight") + # Theme theme_bw() + theme( axis.title.x = element_text(color='black'), axis.title.y = element_blank(), axis.text.y = element_text(size=rel(1.1), hjust=1, color='black'), axis.text.x = element_text(color='black'), axis.ticks.y = element_blank(), axis.ticks.x = element_line(), legend.position = 'top', legend.title = element_blank(), legend.text = element_text(color="black"), legend.key = element_rect(fill='transparent'), # facets strip.text = element_text(size=rel(1.2)), panel.background = element_blank(), panel.spacing = unit(1,"lines"), # gridlines panel.grid.major.y = element_blank(), ) + facet_wrap(~factor, nrow=1, scales="free") if (sign=="negative") p <- p + scale_x_discrete(position = "top") # If absolute values are used, add the corresponding signs to the plot if (abs) { p <- p + ylim(0,max(W$value)+0.1) + geom_text(label=W$sign,y=max(W$value)+0.1, size=10) } return(p) } # (Hidden) function to define the shape .set_shapeby_features <- function(object, shape_by, view) { # Option 1: no color if (is.null(shape_by)) { shape_by <- rep("1",sum(object@dimensions[["D"]][view])) # Option 2: input is a data.frame with columns (feature,color) } else if (is(shape_by,"data.frame")) { stopifnot(all(colnames(shape_by) %in% c("feature","color"))) stopifnot(all(unique(shape_by$feature) %in% features_names(object)[[view]])) # Option 3: by a feature_metadata column } else if ((length(shape_by)==1) && is.character(shape_by) & (shape_by %in% colnames(features_metadata(object)))) { tmp <- features_metadata(object) shape_by <- tmp[tmp$view==view,shape_by] # Option 4: shape_by is a vector of length D } else if (length(shape_by) > 1) { stopifnot(length(shape_by) == object@dimensions[["D"]][[view]]) # Option not recognised } else { stop("'shape_by' was specified but it was not recognised, please read the documentation") } # Create data.frame with columns (feature,shape) if (!is(shape_by,"data.frame")) { df = data.frame( feature = features_names(object)[[view]], shape_by = shape_by, view = view ) } return(df) } # (Hidden) function to define the color .set_colorby_features <- function(object, color_by, view) { # Option 1: no color if (is.null(color_by)) { color_by <- rep("1",sum(object@dimensions[["D"]][view])) # Option 2: input is a data.frame with columns (feature,color) } else if (is(color_by,"data.frame")) { stopifnot(all(colnames(color_by) %in% c("feature","color"))) stopifnot(all(unique(color_by$feature) %in% features_names(object)[[view]])) # Option 3: by a feature_metadata column } else if ((length(color_by)==1) && is.character(color_by) && (color_by %in% colnames(features_metadata(object)))) { tmp <- features_metadata(object) color_by <- tmp[tmp$view==view,color_by] # Option 4: color_by is a vector of length D } else if (length(color_by) > 1) { stopifnot(length(color_by) == object@dimensions[["D"]][[view]]) # Option not recognised } else { stop("'color_by' was specified but it was not recognised, please read the documentation") } # Create data.frame with columns (feature,color) if (!is(color_by,"data.frame")) { df = data.frame( feature = features_names(object)[[view]], color_by = color_by, view = view ) } return(df) } ================================================ FILE: R/predict.R ================================================ ###################################### ## Functions to perform predictions ## ###################################### #' @title Do predictions using a fitted MOFA #' @name predict #' @description This function uses the latent factors and the weights to do data predictions. #' @param object a \code{\link{MOFA}} object. #' @param views character vector with the view name(s), or numeric vector with the view index(es). #' Default is "all". #' @param groups character vector with the group name(s), or numeric vector with the group index(es). #' Default is "all". #' @param factors character vector with the factor name(s) or numeric vector with the factor index(es). #' Default is "all". #' @param add_intercept add feature intercepts to the prediction (default is TRUE). #' @details MOFA generates a denoised and condensed low-dimensional representation of the data that captures the main sources of heterogeneity of the data. #' This representation can be used to reconstruct a denoised representation of the data, simply using the equation \code{Y = WX}. #' For more mathematical details read the supplementary methods of the manuscript. #' @return Returns a list with the data reconstructed by the model predictions. #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Predict observations for all data modalities #' predictions <- predict(model) predict <- function(object, views = "all", groups = "all", factors = "all", add_intercept = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Get views views <- .check_and_get_views(object, views, non_gaussian=FALSE) groups <- .check_and_get_groups(object, groups) # Sanity check if (any(views %in% names(which(object@model_options$likelihoods!="gaussian")))) stop("predict does not work for non-gaussian modalities") # Get factors if (paste0(factors, collapse="") == "all") { factors <- factors_names(object) } else if (is.numeric(factors)) { factors <- factors_names(object)[factors] } else { stopifnot(all(factors %in% factors_names(object))) } # Collect weights W <- get_weights(object, views = views, factors = factors) # Collect factors Z <- get_factors(object, groups = groups, factors = factors) Z[is.na(Z)] <- 0 # set missing values in Z to 0 to exclude from imputations # Do predictions predicted_data <- lapply(views, function(m) { lapply(groups, function(g) { # calculate terms based on linear model pred <- t(Z[[g]] %*% t(W[[m]])) # add feature-wise intercepts (i think this does not work for non-gaussian likelihood, needs some verification) tryCatch( { if (add_intercept & length(object@intercepts[[1]])>0) { intercepts <- object@intercepts[[m]][[g]] intercepts[is.na(intercepts)] <- 0 pred <- pred + object@intercepts[[m]][[g]] } }, error = function(e) { NULL }) return(pred) }) }) predicted_data <- .name_views_and_groups(predicted_data, views, groups) return(predicted_data) } ================================================ FILE: R/prepare_mofa.R ================================================ ####################################################### ## Functions to prepare a MOFA object for training ## ####################################################### #' @title Prepare a MOFA for training #' @name prepare_mofa #' @description Function to prepare a \code{\link{MOFA}} object for training. #' It requires defining data, model and training options. #' @param object an untrained \code{\link{MOFA}} #' @param data_options list of data_options (see \code{\link{get_default_data_options}} details). #' If NULL, default options are used. #' @param model_options list of model options (see \code{\link{get_default_model_options}} for details). #' If NULL, default options are used. #' @param training_options list of training options (see \code{\link{get_default_training_options}} for details). #' If NULL, default options are used. #' @param stochastic_options list of options for stochastic variational inference (see \code{\link{get_default_stochastic_options}} for details). #' If NULL, default options are used. #' @param mefisto_options list of options for mefisto (see \code{\link{get_default_mefisto_options}} for details). #' If NULL, default options are used. #' @return Returns an untrained \code{\link{MOFA}} with specified options filled in the corresponding slots #' @details This function is called after creating a \code{\link{MOFA}} object (using \code{\link{create_mofa}}) #' and before starting the training (using \code{\link{run_mofa}}). Here, we can specify different options for #' the data (data_options), the model (model_options) and the training (training_options, stochastic_options). Take a look at the #' individual default options for an overview using the get_default_XXX_options functions above. #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data dt (in data.frame format) #' load(file) #' #' # Create the MOFA object #' MOFAmodel <- create_mofa(dt) #' #' # Prepare MOFA object using default options #' MOFAmodel <- prepare_mofa(MOFAmodel) #' #' # Prepare MOFA object changing some of the default model options values #' model_opts <- get_default_model_options(MOFAmodel) #' model_opts$num_factors <- 10 #' MOFAmodel <- prepare_mofa(MOFAmodel, model_options = model_opts) prepare_mofa <- function(object, data_options = NULL, model_options = NULL, training_options = NULL, stochastic_options = NULL, mefisto_options = NULL) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (any(object@dimensions$N<10) & !(.hasSlot(object, "covariates") && length(object@covariates)>=1)) warning("Some group(s) have less than 10 samples, MOFA will have little power to learn meaningful factors for these group(s)...") if (any(object@dimensions$D<15)) warning("Some view(s) have less than 15 features, MOFA will have little power to to learn meaningful factors for these view(s)....") if (any(object@dimensions$D>1e4)) warning("Some view(s) have a lot of features, it is recommended to perform a more stringent feature selection before creating the MOFA object....") if (length(object@samples_metadata)>0) { stopifnot(c("sample","group") %in% colnames(object@samples_metadata)) } else { stop("object@samples_metadata not found") } if (length(object@features_metadata)>0) { stopifnot(c("feature","view") %in% colnames(object@features_metadata)) } else { stop("object@features_metadata not found") } if (object@dimensions$G>1) { message("\n# Multi-group mode requested.") message("\nThis is an advanced option, if this is the first time that you are running MOFA, we suggest that you try do some exploration first without specifying groups. Two important remarks:") message("\n - The aim of the multi-group framework is to identify the sources of variability *within* the groups. If your aim is to find a factor that 'separates' the groups, you DO NOT want to use the multi-group framework. Please see the FAQ on the MOFA2 webpage.") message("\n - It is important to account for the group effect before selecting highly variable features (HVFs). We suggest that either you calculate HVFs per group and then take the union, or regress out the group effect before HVF selection") } # Get data options message("Checking data options...") if (is.null(data_options)) { message("No data options specified, using default...") object@data_options <- get_default_data_options(object) } else { if (!is(data_options,"list") || !setequal(names(data_options), names(get_default_data_options(object)) )) stop("data_options are incorrectly specified, please read the documentation in get_default_data_options") object@data_options <- data_options } # if (any(nchar(unlist(samples_names(object)))>50)) # warning("Due to string size limitations in the HDF5 format, sample names will be trimmed to less than 50 characters") # Get training options if (is.null(training_options)) { message("No training options specified, using default...") object@training_options <- get_default_training_options(object) } else { message("Checking training options...") if (!is(training_options,"list") || !setequal(names(training_options), names(get_default_training_options(object)) )) stop("training_options are incorrectly specified, please read the documentation in get_default_training_options") object@training_options <- training_options if (object@training_options$maxiter<=100) warning("Maximum number of iterations is very small\n") if (object@training_options$startELBO<1) object@training_options$startELBO <- 1 if (object@training_options$freqELBO<1) object@training_options$freqELBO <- 1 if (!object@training_options$convergence_mode %in% c("fast","medium","slow")) stop("Convergence mode has to be either 'fast', 'medium', or 'slow'") } # Get stochastic options if (is.null(stochastic_options)) { object@stochastic_options <- list() } else { if (isFALSE(object@training_options[["stochastic"]])) stop("stochastic_options have been provided but training_opts$stochastic is FALSE. If you want to use stochastic inference you have to set training_opts$stochastic = TRUE") # object@training_options$stochastic <- TRUE } if (object@training_options$stochastic) { message("Stochastic inference activated. Note that this is only recommended if you have a very large sample size (>1e4) and access to a GPU") if (is.null(stochastic_options)) { message("No stochastic options specified, using default...") object@stochastic_options <- get_default_stochastic_options(object) } else { object@training_options$stochastic <- TRUE message("Checking stochastic inference options...") if (!is(stochastic_options,"list") || !setequal(names(stochastic_options), names(get_default_stochastic_options(object)) )) stop("stochastic_options are incorrectly specified, please read the documentation in get_default_stochastic_options") if (!stochastic_options$batch_size %in% c(0.05,0.10,0.15,0.20,0.25,0.50)) stop("Batch size has to be one of the following numeric values: 0.05, 0.10, 0.15, 0.20, 0.25, 0.50") if (stochastic_options$batch_size==1) warning("A batch size equal to 1 is equivalent to non-stochastic inference.") if (stochastic_options$learning_rate<=0 || stochastic_options$learning_rate>1) stop("The learning rate has to be a value between 0 and 1") if (stochastic_options$forgetting_rate<=0 || stochastic_options$forgetting_rate>1) stop("The forgetting rate has to be a value between 0 and 1") if (sum(object@dimensions$N)<1e4) warning("Stochastic inference is only recommended when you have a lot of samples (at least N>10,000))\n") object@stochastic_options <- stochastic_options } } # Get model options if (is.null(model_options)) { message("No model options specified, using default...") object@model_options <- get_default_model_options(object) } else { message("Checking model options...") if (!is(model_options,"list") || !setequal(names(model_options), names(get_default_model_options(object)) )) stop("model_options are incorrectly specified, please read the documentation in get_default_model_options") object@model_options <- model_options } if (object@model_options$num_factors > 50) warning("The number of factors is very large, training will be slow...") # if (!object@model_options$ard_weights) warning("model_options$ard_weights should always be set to TRUE") if (sum(object@dimensions$N) < 4 * object@model_options$num_factors) { warning(sprintf("The total number of samples is very small for learning %s factors. Try to reduce the number of factors to obtain meaningful results. It should not exceed ~%s.", object@model_options$num_factors, floor(min(object@dimensions$N/4)))) } # Get mefisto covariates options if (.hasSlot(object, "covariates") && length(object@covariates)>=1) { if (is.null(mefisto_options)) { message("Covariates provided but no mefisto options specified, using default...") object@mefisto_options <- get_default_mefisto_options(object) } else { message("Checking inference options for mefisto covariates...") # message("mefisto covariates have been provided as prior information.") if (!is(mefisto_options,"list") || !setequal(names(mefisto_options), names(get_default_mefisto_options(object)) )) stop("mefisto_options are incorrectly specified, please read the documentation in get_default_mefisto_options") if (isTRUE(mefisto_options$sparseGP)) { if (object@dimensions[["N"]] < 1000) warning("Warning: sparseGPs should only be used when having a large sample size (>1e3)") if (isTRUE(mefisto_options$warping)) stop("Warping is not implemented in conjunction with sparseGPs") } # Check warping options if (isTRUE(mefisto_options$warping)) { stopifnot(object@dimensions[['G']] > 1) # check that multi-group is TRUE if (!is.null(mefisto_options$warping_ref)) { stopifnot(length(mefisto_options$warping_ref)==1) stopifnot(is.character(mefisto_options$warping_ref)) stopifnot(mefisto_options$warping_ref %in% groups_names(object)) } if (!is.null(mefisto_options$warping_groups)) { # check that warping groups are a partition of groups groups_ok <- sapply(unique(object@samples_metadata$group), function(g) { length(unique(mefisto_options$warping_groups[object@samples_metadata$group == g])) == 1 }) if (!all(groups_ok)) stop("Warping group assignment needs to be unique within each indiviudal group.") } } # Disable spike-slab on the factors if(isTRUE(model_options$spikeslab_factors)) { print("Spike-and-Slab sparsity prior on the factors is not available when using MEFISTO, setting to False") model_options$spikeslab_factors <- FALSE } # Disable stochastic inference if (isTRUE(model_options$stochastic)) { print("Stochastic inference is not available when using MEFISTO, setting to False") model_options$stochastic <- FALSE object@stochastic_options <- list() } # TO-DO: CHECKS ON MODEL_GROUPS object@mefisto_options <- mefisto_options } } else { object@mefisto_options <- list() } # Center the data # message("Centering the features (per group, this is a mandatory requirement)...") # for (m in views_names(object)) { # if (model_options$likelihoods[[m]] == "gaussian") { # for (g in groups_names(object)) { # object@data[[m]][[g]] <- scale(object@data[[m]][[g]], center=T, scale=F) # } # } # } # Transform sparse matrices into dense ones # See https://github.com/rstudio/reticulate/issues/72 for (m in views_names(object)) { for (g in groups_names(object)) { if (is(object@data[[m]][[g]], "dgCMatrix") || is(object@data[[m]][[g]], "dgTMatrix")) object@data[[m]][[g]] <- as(object@data[[m]][[g]], "matrix") } } return(object) } #' @title Get default training options #' @name get_default_training_options #' @description Function to obtain the default training options. #' @param object an untrained \code{\link{MOFA}} #' @details This function provides a default set of training options that can be modified and passed to the \code{\link{MOFA}} object #' in the \code{\link{prepare_mofa}} step (see example), i.e. after creating a \code{\link{MOFA}} object #' (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) #' The training options are the following: \cr #' \itemize{ #' \item{\strong{maxiter}: numeric value indicating the maximum number of iterations. #' Default is 1000. Convergence is assessed using the ELBO statistic.} #' \item{\strong{drop_factor_threshold}: numeric indicating the threshold on fraction of variance explained to consider a factor inactive and drop it from the model. #' For example, a value of 0.01 implies that factors explaining less than 1\% of variance (in each view) will be dropped. Default is -1 (no dropping of factors)} #' \item{\strong{convergence_mode}: character indicating the convergence criteria, either "fast", "medium" or "slow", corresponding to 0.0005\%, 0.00005\% or 0.000005\% deltaELBO change. } #' \item{\strong{verbose}: logical indicating whether to generate a verbose output.} #' \item{\strong{startELBO}: integer indicating the first iteration to compute the ELBO (default is 1). } #' \item{\strong{freqELBO}: integer indicating the first iteration to compute the ELBO (default is 1). } #' \item{\strong{stochastic}: logical indicating whether to use stochastic variational inference (only required for very big data sets, default is \code{FALSE}).} #' \item{\strong{gpu_mode}: logical indicating whether to use GPUs (see details).} #' \item{\strong{gpu_device}: integer indicating which GPU to use.} #' \item{\strong{seed}: numeric indicating the seed for reproducibility (default is 42).} #' } #' @return Returns a list with default training options #' @importFrom utils modifyList #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data dt (in data.frame format) #' load(file) #' #' # Create the MOFA object #' MOFAmodel <- create_mofa(dt) #' #' # Load default training options #' train_opts <- get_default_training_options(MOFAmodel) #' #' # Edit some of the training options #' train_opts$convergence_mode <- "medium" #' train_opts$startELBO <- 100 #' train_opts$seed <- 42 #' #' # Prepare the MOFA object #' MOFAmodel <- prepare_mofa(MOFAmodel, training_options = train_opts) get_default_training_options <- function(object) { # Get default train options training_options <- list( maxiter = 1000, # (numeric) Maximum number of iterations convergence_mode = 'fast', # (string) Convergence mode based on change in the ELBO ("slow","medium","fast") drop_factor_threshold = -1, # (numeric) Threshold on fraction of variance explained to drop a factor verbose = FALSE, # (logical) Verbosity startELBO = 1, # (numeric) First iteration to compute the ELBO freqELBO = 5, # (numeric) Frequency of ELBO calculation stochastic = FALSE, # (logical) Do stochastic variational inference? gpu_mode = FALSE, # (logical) Use GPU? gpu_device = NULL, # (integer) Which GPU to use? seed = 42, # (numeric) random seed outfile = NULL, # (string) Output file name weight_views = FALSE, # (logical) Weight the ELBO based on the number of features per view? save_interrupted = FALSE # (logical) Save partially trained model when training is interrupted? ) # if training_options already exist, replace the default values but keep the additional ones if (length(object@training_options)>0) training_options <- modifyList(training_options, object@training_options) return(training_options) } #' @title Get default data options #' @name get_default_data_options #' @description Function to obtain the default data options. #' @param object an untrained \code{\link{MOFA}} object #' @details This function provides a default set of data options that can be modified and passed to the \code{\link{MOFA}} object #' in the \code{\link{prepare_mofa}} step (see example), i.e. after creating a \code{\link{MOFA}} object #' (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) #' The data options are the following: \cr #' \itemize{ #' \item{\strong{scale_views}: logical indicating whether to scale views to have the same unit variance. #' As long as the scale differences between the views is not too high, this is not required. Default is FALSE.} #' \item{\strong{scale_groups}: logical indicating whether to scale groups to have the same unit variance. #' As long as the scale differences between the groups is not too high, this is not required. Default is FALSE.} #' \item{\strong{use_float32}: logical indicating whether use float32 instead of float64 arrays to increase speed and memory usage. Default is FALSE.} #' } #' @return Returns a list with the default data options. #' @importFrom utils modifyList #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data dt (in data.frame format) #' load(file) #' #' # Create the MOFA object #' MOFAmodel <- create_mofa(dt) #' #' # Load default data options #' data_opts <- get_default_data_options(MOFAmodel) #' #' # Edit some of the data options #' data_opts$scale_views <- TRUE #' #' # Prepare the MOFA object #' MOFAmodel <- prepare_mofa(MOFAmodel, data_options = data_opts) get_default_data_options <- function(object) { # Define default data options data_options <- list( scale_views = FALSE, # (logical) Scale views to unit variance? scale_groups = FALSE, # (logical) Scale groups to unit variance? center_groups = TRUE, # (logical) Center groups? use_float32 = TRUE # (logical) Use float32 instead of float64 arrays to increase speed and memory usage ) # Activate float32 arrays for large sample sizes if (sum(object@dimensions$N)>1e5) { message("A lot of samples detected, using float32 arrays instead of float64 arrays to increase speed and memory usage. You can modify this using the `data_options` argument of the `prepare_mofa` function.") data_options$use_float32 <- TRUE } # if data_options already exists, replace the default values but keep the additional ones if (length(object@data_options)>0) data_options <- modifyList(data_options, object@data_options) return(data_options) } #' @title Get default model options #' @name get_default_model_options #' @description Function to obtain the default model options. #' @param object an untrained \code{\link{MOFA}} object #' @details This function provides a default set of model options that can be modified and passed to the \code{\link{MOFA}} object #' in the \code{\link{prepare_mofa}} step (see example), i.e. after creating a \code{\link{MOFA}} object #' (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) #' The model options are the following: \cr #' \itemize{ #' \item{\strong{likelihoods}: character vector with data likelihoods per view: #' 'gaussian' for continuous data (Default for all views), 'bernoulli' for binary data and 'poisson' for count data.} #' \item{\strong{num_factors}: numeric value indicating the (initial) number of factors. Default is 15.} #' \item{\strong{spikeslab_factors}: logical indicating whether to use spike and slab sparsity on the factors (Default is FALSE)} #' \item{\strong{spikeslab_weights}: logical indicating whether to use spike and slab sparsity on the weights (Default is TRUE)} #' \item{\strong{ard_factors}: logical indicating whether to use ARD sparsity on the factors (Default is TRUE only if using multiple groups)} #' \item{\strong{ard_weights}: logical indicating whether to use ARD sparsity on the weights (Default is TRUE)} #' } #' @return Returns a list with the default model options. #' @importFrom utils modifyList #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data dt (in data.frame format) #' load(file) #' #' # Create the MOFA object #' MOFAmodel <- create_mofa(dt) #' #' # Load default model options #' model_opts <- get_default_model_options(MOFAmodel) #' #' # Edit some of the model options #' model_opts$num_factors <- 10 #' model_opts$spikeslab_weights <- FALSE #' #' # Prepare the MOFA object #' MOFAmodel <- prepare_mofa(MOFAmodel, model_options = model_opts) get_default_model_options <- function(object) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (!.hasSlot(object,"dimensions") | length(object@dimensions) == 0) stop("dimensions of object need to be defined before getting the model options") if (.hasSlot(object,"data")) { if (length(object@data)==0) stop("data slot is empty") } else { stop("data slot not found") } # Guess likelihoods from the data # likelihoods <- .infer_likelihoods(object) likelihoods <- rep(x="gaussian", times=object@dimensions$M) names(likelihoods) <- views_names(object) # Define default model options model_options <- list( likelihoods = likelihoods, # (character vector) likelihood per view [gaussian/bernoulli/poisson] num_factors = 10, # (numeric) initial number of latent factors spikeslab_factors = FALSE, # (logical) Spike and Slab sparsity on the factors spikeslab_weights = FALSE, # (logical) Spike and Slab sparsity on the weights ard_factors = FALSE, # (logical) Group-wise ARD sparsity on the factors ard_weights = TRUE # (logical) View-wise ARD sparsity on the weights ) # (Heuristic) set the number of factors depending on the sample size N <- sum(object@dimensions$N) if (N<=25) { model_options$num_factors <- 5 } else if (N>25 & N<=1e3) { model_options$num_factors <- 15 } else if (N>1e3 & N<=1e4) { model_options$num_factors <- 20 } else if (N>1e4) { model_options$num_factors <- 25 } # Group-wise ARD sparsity on the factors only if there are multiple groups if (object@dimensions$G>1) model_options$ard_factors <- TRUE # if model_options already exist, replace the default values but keep the additional ones if (length(object@model_options)>0) model_options <- modifyList(model_options, object@model_options) return(model_options) } #' @title Get default stochastic options #' @name get_default_stochastic_options #' @description Function to obtain the default options for stochastic variational inference. #' @param object an untrained \code{\link{MOFA}} #' @details This function provides a default set of stochastic inference options that can be modified and passed to the \code{\link{MOFA}} object #' in the \code{\link{prepare_mofa}} step), i.e. after creating a \code{\link{MOFA}} object #' (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) #' These options are only relevant when activating stochastic inference in training_options (see example). #' The stochastic inference options are the following: \cr #' \itemize{ #' \item{\strong{batch_size}: numeric value indicating the batch size (as a fraction)}. #' Default is 0.5 (half of the data set). #' \item{\strong{learning_rate}: numeric value indicating the learning rate. } #' Default is 1.0 #' \item{\strong{forgetting_rate}: numeric indicating the forgetting rate.} #' Default is 0.5 #' \item{\strong{start_stochastic}: integer indicating the first iteration to start stochastic inference} #' Default is 1 #' } #' @return Returns a list with default options #' @importFrom utils modifyList #' @export #' @examples #' # Using an existing simulated data with two groups and two views #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' #' # Load data dt (in data.frame format) #' load(file) #' #' # Create the MOFA object #' MOFAmodel <- create_mofa(dt) #' #' # activate stochastic inference in training options #' train_opts <- get_default_training_options(MOFAmodel) #' train_opts$stochastic <- TRUE #' #' # Load default stochastic options #' stochastic_opts <- get_default_stochastic_options(MOFAmodel) #' #' # Edit some of the stochastic options #' stochastic_opts$learning_rate <- 0.75 #' stochastic_opts$batch_size <- 0.25 #' #' # Prepare the MOFA object #' MOFAmodel <- prepare_mofa(MOFAmodel, #' training_options = train_opts, #' stochastic_options = stochastic_opts #' ) #' get_default_stochastic_options <- function(object) { # Get default stochastic options stochastic_options <- list( batch_size = 0.5, # Batch size (as a fraction) learning_rate = 1.0, # Starting learning rate forgetting_rate = 0.5, # Forgetting rate start_stochastic = 1 # First iteration to start stochastic inference ) # if stochastic_options already exist, replace the default values but keep the additional ones if (length(object@stochastic_options)>0) stochastic_options <- modifyList(stochastic_options, object@stochastic_options) return(stochastic_options) } ================================================ FILE: R/run_mofa.R ================================================ ####################################### ## Functions to train a MOFA model ## ####################################### #' @title Train a MOFA model #' @name run_mofa #' @description Function to train an untrained \code{\link{MOFA}} object. #' @details This function is called once a MOFA object has been prepared (using \code{\link{prepare_mofa}}) #' In this step the R package calls the \code{mofapy2} Python package, where model training is performed. \cr #' The interface with Python is done with the \code{\link{reticulate}} package. #' If you have several versions of Python installed and R is not detecting the correct one, #' you can change it using \code{reticulate::use_python} when loading the R session. #' Alternatively, you can let us install mofapy2 for you using \code{basilisk} if you set use_basilisk to \code{TRUE} #' @param object an untrained \code{\link{MOFA}} object #' @param save_data logical indicating whether to save the training data in the hdf5 file. #' This is useful for some downstream analysis (mainly functions with the prefix \code{plot_data}), but it can take a lot of disk space. #' @param outfile output file for the model (.hdf5 format). If \code{NULL}, a temporary file is created. #' @param use_basilisk use \code{basilisk} to automatically install a conda environment with mofapy2 and all dependencies? #' If \code{FALSE} (default), you should specify the right python binary when loading R with \code{reticulate::use_python(..., force=TRUE)} #' or the right conda environment with \code{reticulate::use_condaenv(..., force=TRUE)}. #' @return a trained \code{\link{MOFA}} object #' @import reticulate #' @import basilisk #' @export #' @examples #' # Load data (in data.frame format) #' file <- system.file("extdata", "test_data.RData", package = "MOFA2") #' load(file) #' #' # Create the MOFA object #' MOFAmodel <- create_mofa(dt) #' #' # Prepare the MOFA object with default options #' MOFAmodel <- prepare_mofa(MOFAmodel) #' #' # Run the MOFA model #' \dontrun{ MOFAmodel <- run_mofa(MOFAmodel, use_basilisk = TRUE) } run_mofa <- function(object, outfile = NULL, save_data = TRUE, use_basilisk = FALSE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (object@status=="trained") stop("The model is already trained! If you want to retrain, create a new untrained MOFA") if (length(object@model_options)==0 | length(object@training_options)==0) { stop("The model is not prepared for training, you have to run `prepare_mofa` before `run_mofa`") } # If no outfile is provided, store a file in a temporary folder with the respective timestamp if (is.null(outfile) || is.na(outfile) || (outfile == "")) { outfile <- object@training_options$outfile if (is.null(outfile) || is.na(outfile) || (outfile == "")) { outfile <- file.path(tempdir(), paste0("mofa_", format(Sys.time(), format = "%Y%m%d-%H%M%S"), ".hdf5")) warning(paste0("No output filename provided. Using ", outfile, " to store the trained model.\n\n")) } } if (file.exists(outfile)) message(paste0("Warning: Output file ", outfile, " already exists, it will be replaced")) # Connect to mofapy2 using reticulate (default) if (!use_basilisk) { message("Connecting to the mofapy2 python package using reticulate (use_basilisk = FALSE)... Please make sure to manually specify the right python binary when loading R with reticulate::use_python(..., force=TRUE) or the right conda environment with reticulate::use_condaenv(..., force=TRUE) If you prefer to let us automatically install a conda environment with 'mofapy2' installed using the 'basilisk' package, please use the argument 'use_basilisk = TRUE'\n") # Sanity checks have_mofa2 <- py_module_available("mofapy2") if (have_mofa2) { mofa <- import("mofapy2") tryCatch(tmp <- strsplit(mofa$version$`__version__`,"\\.")[[1]], error = function(e) { stop(sprintf("mofapy2 is not detected in the specified python binary, see reticulate::py_config(). Consider setting use_basilisk = TRUE to create a python environment with basilisk (https://bioconductor.org/packages/release/bioc/html/basilisk.html)")) }) v_major_reticulate = tmp[1]; v_minor_reticulate = tmp[2]; v_patch_reticulate = tmp[3] tmp <- strsplit(.mofapy2_version,"\\.")[[1]] v_major_pypi = tmp[1]; v_minor_pypi = tmp[2]; v_patch_pypi = tmp[3] # return error if major or minor versions do not agree if ((v_major_reticulate!=v_major_pypi) | (v_minor_reticulate!=v_minor_pypi)) { warning(sprintf("The latest mofapy2 version is %s, you are using %s. Please upgrade with 'pip install mofapy2'",.mofapy2_version, mofa$version$`__version__`)) warning("Connecting to the latest mofapy2 python package using reticulate (use_basilisk = FALSE)") have_mofa2 <- FALSE } # return warning if patch versions do not agree if (v_patch_reticulate!=v_patch_pypi) { warning(sprintf("The latest mofapy2 version is %s, you are using %s. Please upgrade with 'pip install mofapy2'",.mofapy2_version, mofa$version$`__version__`)) } } if (have_mofa2) { .run_mofa_reticulate(object, outfile, save_data) } else { stop(sprintf("mofapy2_%s is not detected in the specified python binary, see reticulate::py_config(). Consider setting use_basilisk = TRUE to create a python environment with basilisk (https://bioconductor.org/packages/release/bioc/html/basilisk.html)", .mofapy2_version)) # use_basilisk <- TRUE } } # Connect to mofapy2 using basilisk (optional) if (use_basilisk) { message("Connecting to the mofapy2 package using basilisk. Set 'use_basilisk' to FALSE if you prefer to manually set the python binary using 'reticulate'.") proc <- basiliskStart(mofa_env) on.exit(basiliskStop(proc)) tmp <- basiliskRun(proc, function(object, outfile, save_data) { .run_mofa_reticulate(object, outfile, save_data) }, object=object, outfile=outfile, save_data=save_data) } # Load the trained model object <- load_model(outfile) return(object) } .run_mofa_reticulate <- function(object, outfile, save_data) { # sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") if (!requireNamespace("reticulate", quietly = TRUE)) { stop("Package \"reticulate\" is required but is not installed.", call. = FALSE) } # Initiate reticulate mofa <- import("mofapy2") # Call entry point mofa_entrypoint <- mofa$run.entry_point$entry_point() # Set data options mofa_entrypoint$set_data_options( scale_views = object@data_options$scale_views, scale_groups = object@data_options$scale_groups, center_groups = object@data_options$center_groups, use_float32 = object@data_options$use_float32 ) # Set samples metadata if (.hasSlot(object, "samples_metadata")) { mofa_entrypoint$data_opts$samples_metadata <- r_to_py(lapply(object@data_options$groups, function(g) object@samples_metadata[object@samples_metadata$group == g,])) } # Set features metadata if (.hasSlot(object, "features_metadata")) { mofa_entrypoint$data_opts$features_metadata <- r_to_py(unname(lapply(object@data_options$views, function(m) object@features_metadata[object@features_metadata$view == m,]))) } # r_to_py will convert a list with a single name to a string, # hence those are to be wrapped in `list()` maybe_list <- function(xs) { if (length(xs) > 1) { xs } else { list(xs) } } # Set the data mofa_entrypoint$set_data_matrix( data = r_to_py( unname(lapply(object@data, function(x) unname( lapply(x, function(y) r_to_py(t(y)) ))) ) ), likelihoods = unname(object@model_options$likelihoods), views_names = r_to_py(as.list(object@data_options$views)), groups_names = r_to_py(as.list(object@data_options$groups)), samples_names = r_to_py(lapply(unname(lapply(object@data[[1]], colnames)), maybe_list)), features_names = r_to_py(lapply(unname(lapply(object@data, function(x) rownames(x[[1]]))), maybe_list)) ) # Set covariates if (.hasSlot(object, "covariates") && !is.null(object@covariates)) { sample_cov_to_py <- r_to_py(unname(lapply(object@covariates, function(x) unname(r_to_py(t(x)))))) cov_names_2_py <- r_to_py(covariates_names(object)) mofa_entrypoint$set_covariates(sample_cov_to_py, cov_names_2_py) } # Set model options mofa_entrypoint$set_model_options( factors = object@model_options$num_factors, spikeslab_factors = object@model_options$spikeslab_factors, spikeslab_weights = object@model_options$spikeslab_weights, ard_factors = object@model_options$ard_factors, ard_weights = object@model_options$ard_weights ) # Set training options mofa_entrypoint$set_train_options( iter = object@training_options$maxiter, convergence_mode = object@training_options$convergence_mode, dropR2 = object@training_options$drop_factor_threshold, startELBO = object@training_options$startELBO, freqELBO = object@training_options$freqELBO, seed = object@training_options$seed, gpu_mode = object@training_options$gpu_mode, gpu_device = object@training_options$gpu_device, verbose = object@training_options$verbose, outfile = object@training_options$outfile, weight_views = object@training_options$weight_views, save_interrupted = object@training_options$save_interrupted ) # Set stochastic options if (object@training_options$stochastic) { mofa_entrypoint$set_stochastic_options( learning_rate = object@stochastic_options$learning_rate, forgetting_rate = object@stochastic_options$forgetting_rate, batch_size = object@stochastic_options$batch_size, start_stochastic = object@stochastic_options$start_stochastic ) } # Set mefisto options if (.hasSlot(object, "covariates") && !is.null(object@covariates) & length(object@mefisto_options)>1) { warping_ref <- which(groups_names(object) == object@mefisto_options$warping_ref) mofa_entrypoint$set_smooth_options( scale_cov = object@mefisto_options$scale_cov, start_opt = as.integer(object@mefisto_options$start_opt), n_grid = as.integer(object@mefisto_options$n_grid), opt_freq = as.integer(object@mefisto_options$opt_freq), model_groups = object@mefisto_options$model_groups, sparseGP = object@mefisto_options$sparseGP, frac_inducing = object@mefisto_options$frac_inducing, warping = object@mefisto_options$warping, warping_freq = as.integer(object@mefisto_options$warping_freq), warping_ref = warping_ref-1, # 0-based python indexing warping_open_begin = object@mefisto_options$warping_open_begin, warping_open_end = object@mefisto_options$warping_open_end, warping_groups = r_to_py(object@mefisto_options$warping_groups) ) } # Build the model mofa_entrypoint$build() # Run the model mofa_entrypoint$run() # Interpolate if (.hasSlot(object, "covariates") && !is.null(object@covariates) & length(object@mefisto_options)>1) { if(!is.null(object@mefisto_options$new_values)) { new_values <- object@mefisto_options$new_values if(is.null(dim(new_values))){ new_values <- matrix(new_values, nrow = 1) } mofa_entrypoint$predict_factor(new_covariates = r_to_py(t(new_values))) } } # Save the model output as an hdf5 file mofa_entrypoint$save(outfile, save_data = save_data) } ================================================ FILE: R/set_methods.R ================================================ #################################### ## Set and retrieve factors names ## #################################### #' @rdname factors_names #' @param object a \code{\link{MOFA}} object. #' @aliases factors_names,MOFA-method #' @return character vector with the factor names #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' factors_names(model) setMethod("factors_names", signature(object="MOFA"), function(object) { colnames(object@expectations$Z[[1]]) } ) #' @rdname factors_names #' @param value a character vector of factor names #' @import methods #' @export setReplaceMethod("factors_names", signature(object="MOFA", value="vector"), function(object, value) { if (!methods::.hasSlot(object, "expectations") || length(object@expectations) == 0) stop("Before assigning factor names you have to assign expectations") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if (length(value) != object@dimensions["K"]) stop("Length of factor names does not match the dimensionality of the latent variable matrix") # Modify expectations object <- .set_expectations_names(object, entity = 'factors', value) # Modify interpolated values if(length(object@interpolated_Z) > 0) { object@interpolated_Z <- lapply(object@interpolated_Z, function(g) { if(!is.null(g$mean)) { rownames(g$mean) <- value } if(!is.null(g$variance)) { rownames(g$variance) <- value } return(g) } ) } # Modify cache if ((methods::.hasSlot(object, "cache")) && ("variance_explained" %in% names(object@cache))) { for (i in seq_len(length(object@cache$variance_explained$r2_per_factor))) { rownames(object@cache$variance_explained$r2_per_factor[[i]]) <- value } } # Modify training stats per factor if (!is.null(object@training_stats$structural_sig)) { rownames(object@training_stats$structural_sig) <- value } if (!is.null(object@training_stats$length_scales)) { rownames(object@training_stats$length_scales) <- value } if (!is.null(object@training_stats$scales)) { rownames(object@training_stats$scales) <- value } object }) #################################### ## Set and retrieve covariate names ## #################################### #' @rdname covariates_names #' @param object a \code{\link{MOFA}} object. #' @aliases covariates,MOFA-method #' @return character vector with the covariate names #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") #' model <- load_model(file) #' covariates_names(model) setMethod("covariates_names", signature(object="MOFA"), function(object) { if(!.hasSlot(object, "covariates") || is.null(object@covariates)) stop("No covariates present in the given MOFA object.") rownames(object@covariates[[1]]) } ) #' @rdname covariates_names #' @param value a character vector of covariate names #' @import methods #' @importFrom dplyr left_join #' @export setReplaceMethod("covariates_names", signature(object="MOFA", value="vector"), function(object, value) { if(!.hasSlot(object, "covariates") || is.null(object@covariates)) stop("No covariates present in the given MOFA object.") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if (length(value) != object@dimensions["C"]) stop("Length of covariate names does not match the dimensionality of the covariate matrix") # Modify covariate names old_names <- rownames(object@covariates[[1]]) for(c in seq_along(object@covariates)) { rownames(object@covariates[[c]]) <- value if(!is.null(object@covariates_warped)) rownames(object@covariates_warped[[c]]) <- value } # Modify meta.data if (methods::.hasSlot(object, "samples_metadata")) { if(!is.null(old_names)) { if(! all(old_names %in% colnames(object@samples_metadata))) stop("Mismatch of covariate names in sample meta data and covariate slot") object@samples_metadata <- object@samples_metadata[,-old_names] } df <- as.data.frame(Reduce(rbind, unname(lapply(object@covariates,t)))) colnames(df) <- value df$sample <- rownames(df) object@samples_metadata <- dplyr::left_join(object@samples_metadata, df, by = "sample", suffix = c("", "_scaled")) if(!is.null(object@covariates_warped)) { df <- as.data.frame(Reduce(rbind, unname(lapply(object@covariates_warped,t)))) colnames(df) <- value df$sample <- rownames(df) object@samples_metadata <- dplyr::left_join(object@samples_metadata, df, by = "sample", suffix = c("", "_warped")) } } object }) #################################### ## Set and retrieve samples names ## #################################### #' @rdname samples_names #' @param object a \code{\link{MOFA}} object. #' @aliases samples_names,MOFA-method #' @return list of character vectors with the sample names for each group #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' samples_names(model) setMethod("samples_names", signature(object="MOFA"), function(object) { # When the model is not trained, the samples slot is not initialized yet if (!("samples_metadata" %in% slotNames(object)) || (length(samples_metadata(object)) == 0)) { return(list()) } # The default case when samples are initialized (trained model) samples_list <- lapply(object@data_options$groups, function(g) { with(object@samples_metadata, object@samples_metadata[group == g, "sample"]) }) names(samples_list) <- object@data_options$groups return(samples_list) }) #' @rdname samples_names #' @param value list of character vectors with the sample names for every group #' @import methods #' @export setReplaceMethod("samples_names", signature(object="MOFA", value="list"), function(object, value) { if (!methods::.hasSlot(object, "data") || length(object@data) == 0 || length(object@data[[1]]) == 0) stop("Before assigning sample names you have to assign the training data") if (!methods::.hasSlot(object, "expectations") || length(object@expectations) == 0) stop("Before assigning sample names you have to assign the expectations") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if (!all(sapply(value, length) == object@dimensions[["N"]])) stop("Length of sample names does not match the dimensionality of the model") if (!all(sapply(value, length) == sapply(object@data[[1]], ncol))) stop("sample names do not match the dimensionality of the data (columns)") value_groups <- rep(names(value), lengths(value)) # Modify sample names in the sample metadata object@samples_metadata$sample <- unlist(value, use.names = FALSE) object@samples_metadata$group <- as.factor( value_groups ) if (is(object@samples_metadata, "list")) { object@samples_metadata <- data.frame(object@samples_metadata, stringsAsFactors = FALSE) } # Add samples names to the expectations object <- .set_expectations_names(object, entity = 'samples', value) # Add samples names to the data if (length(object@data)>0) object <- .set_data_names(object, entity = 'samples', value) # Add sample names to covariates if (.hasSlot(object, "covariates") && !is.null(object@covariates)) { for (m in seq_along(object@covariates)) colnames(object@covariates[[m]]) <- value[[m]] } if (.hasSlot(object, "covariates_warped") && !is.null(object@covariates_warped)) { for (m in seq_along(object@covariates_warped)) colnames(object@covariates_warped[[m]]) <- value[[m]] } # Add samples names to the imputed data if (length(object@imputed_data)>0) object <- .set_imputed_data_names(object, entity = 'samples', value) object }) ###################################### ## Set and retrieve sample metadata ## ###################################### #' @rdname samples_metadata #' @param object a \code{\link{MOFA}} object. #' @return a data frame with sample metadata #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' samples_metadata(model) setMethod("samples_metadata", signature(object="MOFA"), function(object) { object@samples_metadata }) #' @rdname samples_metadata #' @param value data frame with sample metadata, it must at least contain the columns \code{sample} and \code{group}. #' The order of the rows must match the order of \code{samples_names(object)} #' @import methods #' @export setReplaceMethod("samples_metadata", signature(object="MOFA", value="data.frame"), function(object, value) { if (!methods::.hasSlot(object, "data") || length(object@data) == 0 || length(object@data[[1]]) == 0) stop("Before assigning samples metadata you have to assign the input data") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if (nrow(value) != sum(object@dimensions[["N"]])) stop("Number of rows in samples metadata does not match the dimensionality of the model") if (nrow(value) != sum(sapply(object@data[[1]], ncol))) stop("sample names do not match the dimensionality of the data (columns)") if (!("sample" %in% colnames(value))) stop("Metadata has to contain the column 'sample'") if (any(sort(value$sample) != sort(unname(unlist(samples_names(object)))) )) stop("Samples names in the model (see `samples(MOFAobject)`) and in the metadata do not match") if (!("group" %in% colnames(value))) { if (length(unique(object@data_options$groups))==1) { value$group <- groups_names(object) } else { stop("Metadata has to contain the column 'group'") } } if (any(sort(unique(as.character(value$group))) != sort(groups_names(object)))) stop("Groups names in the model (see `groups(MOFAobject)`) and in the metadata do not match") # Make sure that the order of samples metadata match the order of samples # samples <- unname(unlist(samples_names(object))) samples <- unname(unlist(lapply(object@data[[1]],colnames))) value <- value[match(samples, value$sample),] object@samples_metadata <- as.data.frame(value) object }) ##################################### ## Set and retrieve features names ## ##################################### #' @rdname features_names #' @param object a \code{\link{MOFA}} object. #' @aliases features_names,MOFA-method #' @return list of character vectors with the feature names for each view #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' features_names(model) setMethod("features_names", signature(object="MOFA"), function(object) { # When the model is not trained, the features slot is not initialized yet if (!("features_metadata" %in% slotNames(object)) || (length(object@features_metadata) == 0)) { return(list()) } # The default case when features are initialized (trained model) features_list <- lapply(object@data_options$views, function(m) { with(object@features_metadata, object@features_metadata[view == m, "feature"]) }) names(features_list) <- object@data_options$views return(features_list) }) #' @rdname features_names #' @param value list of character vectors with the feature names for every view #' @import methods #' @export setReplaceMethod("features_names", signature(object="MOFA", value="list"), function(object, value) { if (!methods::.hasSlot(object, "data") || length(object@data) == 0) stop("Before assigning feature names you have to assign the training data") if (!methods::.hasSlot(object, "expectations") || length(object@expectations) == 0) stop("Before assigning feature names you have to assign the expectations") if (methods::.hasSlot(object, "dimensions") || length(object@dimensions) == 0) if (!all(sapply(value, length) == object@dimensions[["D"]])) stop("Length of feature names does not match the dimensionality of the model") if (!all(sapply(value, length) == sapply(object@data, function(e) nrow(e[[1]])))) stop("Feature names do not match the dimensionality of the data (rows)") value_groups <- rep(names(value), lengths(value)) object@features_metadata$feature <- unlist(value, use.names = FALSE) object@features_metadata$view <- value_groups if (is(object@features_metadata, "list")) { object@features_metadata <- data.frame(object@features_metadata, stringsAsFactors = FALSE) } # Add features names to the expectations matrices object <- .set_expectations_names(object, entity = 'features', value) # Add features names to the data if (length(object@data)>0) object <- .set_data_names(object, entity = 'features', value) # Add samples names to the imputed data if (length(object@imputed_data)>0) object <- .set_imputed_data_names(object, entity = 'features', value) object }) ####################################### ## Set and retrieve feature metadata ## ####################################### #' @rdname features_metadata #' @param object a \code{\link{MOFA}} object. #' @return a data frame with sample metadata #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' features_metadata(model) setMethod("features_metadata", signature(object="MOFA"), function(object) { object@features_metadata }) #' @rdname features_metadata #' @param value data frame with feature information, it at least must contain the columns \code{feature} and \code{view} #' @import methods #' @export setReplaceMethod("features_metadata", signature(object="MOFA", value="data.frame"), function(object, value) { if (!methods::.hasSlot(object, "data") || length(object@data) == 0 || length(object@data[[1]]) == 0) stop("Before assigning features metadata you have to assign the training data") # if (!methods::.hasSlot(object, "expectations") || length(object@expectations) == 0) # stop("Before assigning features metadata you have to assign the expectations") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if (nrow(value) != sum(object@dimensions[["D"]])) stop("Number of rows in features metadata does not match the dimensionality of the model") if (nrow(value) != sum(sapply(object@data, function(e) nrow(e[[1]])))) stop("Features names do not match the dimensionality of the data (rows)") if (!("feature" %in% colnames(value))) stop("Metadata has to contain the column feature") if (!("view" %in% colnames(value))) stop("Metadata has to contain the column view") if (colnames(value)[1] != "feature") message("Note that feature is currently not the first column of the features metadata.") object@features_metadata <- value object }) ################################## ## Set and retrieve views names ## ################################## #' @rdname views_names #' @param object a \code{\link{MOFA}} object. #' @return character vector with the names for each view #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' views_names(model) #' views_names(model) <- c("viewA", "viewB") setMethod("views_names", signature(object="MOFA"), function(object) { object@data_options$views }) #' @rdname views_names #' @param value character vector with the names for each view #' @import methods #' @export setMethod("views_names<-", signature(object="MOFA", value="character"), function(object, value) { # if (!methods::.hasSlot(object, "data") || length(object@data) == 0) # stop("Before assigning view names you have to assign the training data") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if (length(value) != object@dimensions[["M"]]) stop("Length of view names does not match the dimensionality of the model") # if (length(value) != length(object@data)) # stop("View names do not match the number of views in the training data") # Define types of nodes nodes_types <- .get_nodes_types() # Set view names in data options old_views <- object@data_options$views object@data_options$views <- value # Set view names in model options if (length(object@model_options$likelihoods)>0) names(object@model_options$likelihoods) <- value # Set view names in features_metadata if (!is.null(object@features_metadata) && (length(object@features_metadata) != 0)) { # object@features_metadata$view <- as.character(object@features_metadata$view) for (i in seq_len(object@dimensions[["M"]])) { old_name <- old_views[i] new_name <- value[i] object@features_metadata[object@features_metadata$view == old_name, "view"] <- new_name } } # Set view names in cache if (!is.null(object@cache$variance_explained)) { for (i in names(object@cache$variance_explained$r2_total)) { names(object@cache$variance_explained$r2_total[[i]]) <- value } for (i in names(object@cache$variance_explained$r2_per_factor)) { colnames(object@cache$variance_explained$r2_per_factor[[i]]) <- value } } # Set view names in expectations for (node in names(object@expectations)) { if (node %in% nodes_types$multiview_nodes || node %in% nodes_types$twodim_nodes) { if (is(object@expectations[[node]], "list") && length(object@expectations[[node]]) == object@dimensions["M"]) { names(object@expectations[[node]]) <- value } } } # Set view names in the training data if (length(object@data)>0) names(object@data) <- value # Set view names in the intercepts if (length(object@intercepts)>0) names(object@intercepts) <- value # Set view names in the imputed data if (length(object@imputed_data)>0) names(object@imputed_data) <- value # Set view names in the dimensionalities names(object@dimensions$D) <- value return(object) }) ################################### ## Set and retrieve groups names ## ################################### #' @rdname groups_names #' @param object a \code{\link{MOFA}} object. #' @return character vector with the names for each sample group #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' groups_names(model) #' groups_names(model) <- c("my_group") setMethod("groups_names", signature(object="MOFA"), function(object) { object@data_options$groups }) #' @rdname groups_names #' @param value character vector with the names for each group #' @import methods #' @export setMethod("groups_names<-", signature(object="MOFA", value="character"), function(object, value) { # if (!methods::.hasSlot(object, "data") || length(object@data) == 0) # stop("Before assigning group names you have to assign the training data") if (methods::.hasSlot(object, "dimensions") && length(object@dimensions) != 0) if(length(value) != object@dimensions[["G"]]) stop("Length of group names does not match the dimensionality of the model") # if (length(value) != length(object@data[[1]])) # stop("Group names do not match the number of groups in the training data") # Define types of nodes nodes_types <- .get_nodes_types() # Set sample group names in data options old_groups <- object@data_options$groups object@data_options$groups <- value # Set sample group names in samples_metadata if (!is.null(object@samples_metadata) && (length(object@samples_metadata) != 0)) { object@samples_metadata$group <- as.character(object@samples_metadata$group) for (i in seq_len(object@dimensions[["G"]])) { old_name <- old_groups[i] new_name <- value[i] object@samples_metadata[object@samples_metadata$group == old_name, "group"] <- new_name } object@samples_metadata$group <- factor(object@samples_metadata$group, levels=value) } # Set sample group names in cache if (!is.null(object@cache$variance_explained)) { names(object@cache$variance_explained$r2_total) <- value names(object@cache$variance_explained$r2_per_factor) <- value } # Set sample group names in expectations for (node in nodes_types$multigroup_nodes) { if (node %in% names(object@expectations)) { if (is(object@expectations[[node]], "list") && length(object@expectations[[node]])==object@dimensions["G"]) { names(object@expectations[[node]]) <- value } } } for (node in nodes_types$twodim_nodes) { if (node %in% names(object@expectations)) { for (m in seq_len(length(object@expectations[[node]]))) { if (is(object@expectations[[node]][[m]], "list") && length(object@expectations[[node]][[m]])==object@dimensions["G"]) { names(object@expectations[[node]][[m]]) <- value } } } } # Set sample group names in data if (length(object@data)>0) { for (m in names(object@data)) names(object@data[[m]]) <- value } # Set sample group names in covariates if (.hasSlot(object, "covariates") && !is.null(object@covariates)) { names(object@covariates) <- value } # Set sample group names in the intercepts if (length(object@intercepts)>0) { for (m in names(object@intercepts)) { if (length(object@intercepts[[m]])>0) names(object@intercepts[[m]]) <- value } } # Set sample group names in imputed data if (length(object@imputed_data)>0) { for (m in names(object@imputed_data)) { if (length(object@imputed_data[[m]])>0) names(object@imputed_data[[m]]) <- value } } # Set sample group names in dimensionalities stopifnot(length(object@dimensions$N)==length(value)) names(object@dimensions$N) <- value return(object) }) # (Hidden) General function to set dimension names for the expectations # Entity is features, samples, or factors .set_expectations_names <- function(object, entity, values, views="all", groups="all") { # Define types of nodes nodes_types <- .get_nodes_types() # Define what entities should be updated for which nodes # Notation for axes: 2 is for columns, 1 is for rows, 0 is for vectors, 3 for 3-rd dim in tensors stopifnot(entity %in% c("features", "samples", "factors")) node_lists_options <- list( # features = list(nodes = c("Y", "Tau", "W"), axes = c(1, 1, 1, 1)), # samples = list(nodes = c("Y", "Tau", "Z"), axes = c(2, 2, 1, 1)), features = list(nodes = c("Y", "Tau", "W"), axes = c(1, 2, 1)), samples = list(nodes = c("Y", "Tau", "Z", "Sigma", "Sigma"), axes = c(2, 1, 1, 2, 3)), factors = list(nodes = c("Z", "W", "AlphaZ", "AlphaW", "ThetaZ", "ThetaW", "Sigma"), axes = c(2, 2, 0, 0, 0, 0, 1)) ) if (paste0(views, collapse = "") == "all") { views <- names(object@dimensions$D) } else { stopifnot(all(views %in% names(object@dimensions$D))) } if (paste0(groups, collapse = "") == "all") { groups <- names(object@dimensions$N) } else { stopifnot(all(groups %in% names(object@dimensions$N))) } # Iterate over node list depending on the entity nodes <- node_lists_options[[entity]]$nodes axes <- node_lists_options[[entity]]$axes for (i in seq_len(length(nodes))) { node <- nodes[i] axis <- axes[i] # Update the nodes for which expectations do exist if (node %in% names(object@expectations)) { # Update nodes with one level of nestedness (e.g. W or Z) if (any(node %in% nodes_types$multiview_node, node %in% nodes_types$multigroup_nodes)) { sub_dim <- length(object@expectations[[node]]) for (ind in seq_len(sub_dim)) { # No nestedness in values if factors vals <- if (entity == "factors") values else values[[ind]] dim <- length(vals) # Set names for rows if (axis == 1) { stopifnot(nrow(object@expectations[[node]][[ind]]) == dim) rownames(object@expectations[[node]][[ind]]) <- vals # ... or set names for columns } else if (axis == 2) { stopifnot(ncol(object@expectations[[node]][[ind]]) == dim) colnames(object@expectations[[node]][[ind]]) <- vals # ... or set vector names } else if (axis == 0) { stopifnot(length(object@expectations[[node]][[ind]]) == dim) names(object@expectations[[node]][[ind]]) <- vals } } # Update nodes with two levels of nestedness (e.g. Y or Tau) } else if (node %in% nodes_types$twodim_nodes) { sub_dim <- length(object@expectations[[node]]) for (ind in seq_len(sub_dim)) { sub_dim2 <- length(object@expectations[[node]][[ind]]) for (ind2 in seq_len(sub_dim2)) { # Infer which index to use to iterate over a provided list of values deduced_ind <- if (entity == "features") ind else ind2 # since ind corresponds to views (groups of features) dim <- length(values[[deduced_ind]]) # Set names for rows if (axis == 1) { stopifnot(nrow(object@expectations[[node]][[ind]][[ind2]]) == dim) rownames(object@expectations[[node]][[ind]][[ind2]]) <- values[[deduced_ind]] # ... or set names for columns } else if (axis == 2) { stopifnot(ncol(object@expectations[[node]][[ind]][[ind2]]) == dim) colnames(object@expectations[[node]][[ind]][[ind2]]) <- values[[deduced_ind]] # ... or set vector names } else { stopifnot(length(object@expectations[[node]][[ind]]) == dim) names(object@expectations[[node]][[ind]]) <- vals } } } # Update nodes with multivariate components (e.g. Sigma) } else if (node %in% nodes_types$multivariate_singleview_node) { # Set names for rows if (axis != 0) { dimnames(object@expectations[[node]][[1]])[[axis]] <- unlist(values) } else { # names(object@expectations[[node]][[1]]) <- values # no group structure in Sigma (full covariance across all samples) } } else { print(paste0("DEV :: NOTE: There are no expectations for the node ", node)) } } } object } # (Hidden) Function to set dimensions names for the data and intercepts .set_data_names <- function(object, entity, values) { stopifnot(entity %in% c("features", "samples")) axes_options <- list(features = 1, samples = 2) for (m in seq_len(length(object@data))) { for (g in seq_len(length(object@data[[m]]))) { deduced_ind <- if (entity == "features") m else g # since ind corresponds to views (groups of features) if (axes_options[[entity]] == 1) { rownames(object@data[[m]][[g]]) <- values[[deduced_ind]] } else { colnames(object@data[[m]][[g]]) <- values[[deduced_ind]] } if (entity=="features") tryCatch(names(object@intercepts[[m]][[g]]) <- values[[deduced_ind]], error = function(e) { NULL }) } } object } # (Hidden) Function to set dimensions names for the imputed data .set_imputed_data_names <- function(object, entity, values) { stopifnot(entity %in% c("features", "samples")) axes_options <- list(features = 1, samples = 2) for (m in seq_len(length(object@data))) { for (g in seq_len(length(object@data[[m]]))) { deduced_ind <- if (entity == "features") m else g # since ind corresponds to views (groups of features) if (axes_options[[entity]] == 1) { rownames(object@imputed_data[[m]][[g]]) <- values[[deduced_ind]] # rownames(object@imputed_data[[m]][[g]][["mean"]]) <- values[[deduced_ind]] # rownames(object@imputed_data[[m]][[g]][["variance"]]) <- values[[deduced_ind]] } else { colnames(object@imputed_data[[m]][[g]]) <- values[[deduced_ind]] # colnames(object@imputed_data[[m]][[g]][["mean"]]) <- values[[deduced_ind]] # colnames(object@imputed_data[[m]][[g]][["variance"]]) <- values[[deduced_ind]] } } } object } ================================================ FILE: R/subset.R ================================================ ################################ ## Functions to do subsetting ## ################################ #' @title Subset groups #' @name subset_groups #' @description Method to subset (or sort) groups #' @param object a \code{\link{MOFA}} object. #' @param groups character vector with the groups names, numeric vector with the groups indices #' or logical vector with the groups to be kept as TRUE. #' @return A \code{\link{MOFA}} object #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Subset the first group #' model <- subset_groups(model, groups = 1) subset_groups <- function(object, groups) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(groups) <= object@dimensions[["G"]]) # Define groups groups <- .check_and_get_groups(object, groups) # Subset expectations if (length(object@expectations)>0) { if ("Z" %in% names(object@expectations) & length(object@expectations$Z)>0) object@expectations$Z <- object@expectations$Z[groups] if ("Y" %in% names(object@expectations) & length(object@expectations$Y)>0) object@expectations$Y <- sapply(object@expectations$Y, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE) } # Subset data if (length(object@data)>0) { object@data <- sapply(object@data, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE) } # Subset imputed data if (length(object@imputed_data)>0) { object@imputed_data <- sapply(object@imputed_data, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE) } # Subset intercepts if (length(object@intercepts[[1]])>0) { object@intercepts <- sapply(object@intercepts, function(x) x[groups], simplify = FALSE, USE.NAMES = TRUE) } # Update dimensionality object@dimensions[["G"]] <- length(groups) object@dimensions[["N"]] <- object@dimensions[["N"]][groups] # Subset sample metadata stopifnot(groups%in%unique(object@samples_metadata$group)) object@samples_metadata <- object@samples_metadata[object@samples_metadata$group %in% groups,] object@samples_metadata$group <- factor(object@samples_metadata$group, levels=groups) # Re-order samples samples <- unname(unlist(lapply(object@data[[1]],colnames))) object@samples_metadata <- object@samples_metadata[match(samples, object@samples_metadata$sample),] # Sanity checks stopifnot(object@samples_metadata$sample == unlist(lapply(object@data[[1]],colnames))) stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Z,rownames))) stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Y,colnames))) # Update groups names # groups_names(object) <- groups # don't need to run this object@data_options$groups <- groups # Subset variance explained object@cache[["variance_explained"]]$r2_per_factor <- object@cache[["variance_explained"]]$r2_per_factor[groups] object@cache[["variance_explained"]]$r2_total <- object@cache[["variance_explained"]]$r2_total[groups] return(object) } #' @title Subset views #' @name subset_views #' @description Method to subset (or sort) views #' @param object a \code{\link{MOFA}} object. #' @param views character vector with the views names, numeric vector with the views indices, #' or logical vector with the views to be kept as TRUE. #' @return A \code{\link{MOFA}} object #' @export #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Subset the first view #' model <- subset_views(model, views = 1) subset_views <- function(object, views) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(views) <= object@dimensions[["M"]]) # warning("Removing views a posteriori is fine for an exploratory analysis, but you should removing them before training!") # Define views views <- .check_and_get_views(object, views) # Subset relevant slots if (length(object@expectations)>0) { object@expectations$W <- object@expectations$W[views] object@expectations$Y <- object@expectations$Y[views] } # Subset data if (length(object@data)>0) { object@data <- object@data[views] } # Subset imputed data if (length(object@imputed_data)>0) { object@imputed_data <- object@imputed_data[views] } # Subset intercepts if (length(object@intercepts[[1]])>0) { object@intercepts <- object@intercepts[views] } # Subset feature metadata if (length(object@features_metadata)>0) { object@features_metadata <- object@features_metadata[object@features_metadata$view %in% views,] } # Subset likelihoods object@model_options$likelihoods <- object@model_options$likelihoods[views] # Update dimensionality object@dimensions[["M"]] <- length(views) object@dimensions[["D"]] <- object@dimensions[["D"]][views] # Update view names object@data_options$views <- views # Subset variance explained if ((methods::.hasSlot(object, "cache")) && ("variance_explained" %in% names(object@cache))) { object@cache[["variance_explained"]]$r2_per_factor <- lapply(object@cache[["variance_explained"]]$r2_per_factor, function(x) x[,views,drop=FALSE]) object@cache[["variance_explained"]]$r2_total <- lapply(object@cache[["variance_explained"]]$r2_total, function(x) x[views]) } return(object) } #' @title Subset factors #' @name subset_factors #' @description Method to subset (or sort) factors #' @param object a \code{\link{MOFA}} object. #' @param factors character vector with the factor names, or numeric vector with the index of the factors. #' @param recalculate_variance_explained logical indicating whether to recalculate variance explained values. Default is \code{TRUE}. #' @export #' @return A \code{\link{MOFA}} object #' @examples #' # Using an existing trained model on simulated data #' file <- system.file("extdata", "model.hdf5", package = "MOFA2") #' model <- load_model(file) #' #' # Subset factors 1 to 3 #' model <- subset_factors(model, factors = 1:3) subset_factors <- function(object, factors, recalculate_variance_explained = TRUE) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(factors) <= object@dimensions[["K"]]) # Define factors factors <- .check_and_get_factors(object, factors) # Subset expectations nodes_with_factors <- list(nodes = c("Z", "W", "Sigma", "AlphaZ", "AlphaW", "ThetaZ", "ThetaW"), axes = c(2, 2, 1, 0, 0, 0, 0)) stopifnot(all(nodes_with_factors$axes %in% c(0, 1, 2))) if (length(object@expectations)>0) { for (i in seq_len(length(nodes_with_factors$nodes))) { node <- nodes_with_factors$nodes[i] axis <- nodes_with_factors$axes[i] if (node %in% names(object@expectations)) { if(node != "Sigma") { if (axis == 1) { object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[factors,,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE) } else if (axis == 2) { object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[,factors,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE) } else { object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[factors], simplify = FALSE, USE.NAMES = TRUE) } } else { if (axis == 1) { object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[factors,,,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE) } else if (axis == 2) { object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[,factors,,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE) } else if (axis == 3) { object@expectations[[node]] <- sapply(object@expectations[[node]], function(x) x[,,factors,drop=FALSE], simplify = FALSE, USE.NAMES = TRUE) } } } } } # Subset interpolations if(length(object@interpolated_Z) > 0) { object@interpolated_Z <- lapply(object@interpolated_Z, function(g) { if(!is.null(g$mean)) { m <- g$mean[factors, , drop = FALSE] } if(!is.null(g$variance)) { v <- g$variance[factors, , drop = FALSE] } list(mean = m, variance = v, new_values = g$new_values) }) } # Remove total variance explained estimates if (length(factors)0) { if ("Z" %in% names(object@expectations) & length(object@expectations$Z)>0) { object@expectations$Z[[g]] <- object@expectations$Z[[g]][samples_g,, drop=FALSE] } if ("Y" %in% names(object@expectations) & length(object@expectations$Y)>0) { for (m in views_names(object)) { object@expectations$Y[[m]][[g]] <- object@expectations$Y[[m]][[g]][,samples_g,drop=FALSE] } } if ("Tau" %in% names(object@expectations) & length(object@expectations$Tau)>0) { for (m in views_names(object)) { object@expectations$Tau[[m]][[g]] <- object@expectations$Tau[[m]][[g]][samples_g, , drop=FALSE] } } if(g == groups[1]) {# only one Sigma node if ("Sigma" %in% names(object@expectations) & length(object@expectations$Sigma)>0) { samples <- unique(unlist(tmp)) # TODO - make Sigma live on covariate level or expand to group-level object@expectations$Sigma[[1]] <- object@expectations$Sigma[[1]][,samples,samples,drop=FALSE] } } } # Subset data if (length(object@data)>0) { for (m in views_names(object)) { object@data[[m]][[g]] <- object@data[[m]][[g]][,samples_g,drop=FALSE] } } # Subset imputed data if (length(object@imputed_data)>0) { for (m in views_names(object)) { object@imputed_data[[m]][[g]] <- object@imputed_data[[m]][[g]][,samples_g,drop=FALSE] } } } # Subset sample metadata object@samples_metadata <- object@samples_metadata[object@samples_metadata$sample %in% samples,] # Update dimensionality object@dimensions[["N"]] <- sapply(tmp, length) # Update sample names samples_names(object) <- tmp # Sanity checks stopifnot(object@samples_metadata$sample == unlist(lapply(object@data[[1]],colnames))) stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Z,rownames))) stopifnot(object@samples_metadata$sample == unlist(lapply(object@expectations$Y,colnames))) # Remove variance explained estimates warning("After subsetting the samples the variance explained estimates are not valid anymore, removing them...") object@cache[["variance_explained"]] <- NULL return(object) } #' @title Subset features #' @name subset_features #' @description Method to subset (or sort) features #' @param object a \code{\link{MOFA}} object. #' @param view character vector with the view name or integer with the view index #' @param features character vector with the sample names, numeric vector with the feature indices #' or logical vector with the samples to be kept as TRUE. #' @return A \code{\link{MOFA}} object #' @export subset_features <- function(object, view, features) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(length(features) <= sapply(object@dimensions[["D"]], sum)) warning("Removing features a posteriori is fine for an exploratory analysis, but we recommend removing them before training!") if (is.numeric(view)) view <- views_names(object)[view] stopifnot(all(view %in% views_names(object))) # Define features if (is.character(features)) { stopifnot(all(features %in% features_names(object)[[view]])) } else { features <- features_names(object)[[view]][features] } # Subset relevant slots if (length(object@expectations)>0) { if ("W" %in% names(object@expectations) & length(object@expectations$W)>0) object@expectations$W <- lapply(object@expectations$W, function(x) x[features,, drop=FALSE]) if ("Y" %in% names(object@expectations) & length(object@expectations$Y)>0) object@expectations$Y[[view]] <- lapply(object@expectations$Y[[view]], function(x) x[features,]) if (length(object@data)>0) object@data <- lapply(object@data, function(x) sapply(x, function(y) y[features,], simplify = FALSE, USE.NAMES = TRUE)) if (length(object@expectations)>0) object@intercepts <- lapply(object@intercepts, function(x) sapply(x, function(y) y[features], simplify = FALSE, USE.NAMES = TRUE)) if (length(object@imputed_data) != 0) { stop() # object@imputed_data <- lapply(object@imputed_data, function(x) sapply(x, function(y) y[,samples], simplify = FALSE, USE.NAMES = TRUE)) } } # Update dimensionality object@dimensions[["D"]][[view]] <- length(features) # Update features names features_names(object)[[view]] <- features # Remove variance explained estimates warning("After subsetting the features the variance explained estimates are not valid anymore, removing them...") object@cache[["variance_explained"]] <- NULL return(object) } ================================================ FILE: R/utils.R ================================================ # Function to find "intercept" factors # .detectInterceptFactors <- function(object, cor_threshold = 0.75) { # # # Sanity checks # if (!is(object, "MOFAmodel")) stop("'object' has to be an instance of MOFAmodel") # # # Fetch data # data <- getTrainData(object) # factors <- getfactors_names(object) # # # Correlate the factors with global means per sample # r <- lapply(data, function(x) abs(cor(colSums(x,na.rm=T),factors, use="pairwise.complete.obs"))) # # token <- 0 # for (i in names(r)) { # if (any(r[[i]]>cor_threshold)) { # token <- 1 # message(paste0("Warning: Factor ",which(r[[i]]>cor_threshold)," is strongly correlated with the total expression for each sample in ",i)) # } # } # if (token==1) # message("Such (strong) factors usually appear when count-based assays are not properly normalised by library size.") # } # x: a named vector, where names correspond to sample names .add_column_to_metadata <- function(object, x, name) { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(!is.null(names(x))) stopifnot(names(x) %in% unlist(samples_names(object))) # sort vector to match samples names (fill with NA where applicable) vec <- rep(NA,sum(get_dimensions(object)[["N"]])) names(vec) <- object@samples_metadata$sample vec[names(x)] <- x # add to metadata object@samples_metadata[[name]] <- x return(object) } .infer_likelihoods <- function(object) { # Gaussian by default likelihood <- rep(x="gaussian", times=object@dimensions$M) names(likelihood) <- views_names(object) for (m in views_names(object)) { # data <- get_data(object, views=m)[[1]][[1]] # take only first group data <- object@data[[m]][[1]] # bernoulli if (length(unique(data[!is.na(data)]))==2) { likelihood[m] <- "bernoulli" # poisson } else if (all(data[!is.na(data)]%%1==0)) { likelihood[m] <- "poisson" } } return(likelihood) } # Set view names and group names for nested list objects (e.g. Y) .name_views_and_groups <- function(nested_list, view_names, group_names) { names(nested_list) <- view_names for (view in view_names) { names(nested_list[[view]]) <- group_names } nested_list } #' @importFrom stats sd .detect_outliers <- function(object, groups = "all", factors = "all") { # Sanity checks if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Define groups groups <- .check_and_get_groups(object, groups) H <- length(groups) # Define factors factors <- .check_and_get_factors(object, factors) for (k in factors) { for (g in groups) { Z <- get_factors(object, groups=g, factors=k)[[1]][,1] Z <- Z[!is.na(Z)] # warning("Outlier detection is independent of the inferred lengthscale currently - might lead to unwanted results") cutoff <- 2.5 * 1.96 tmp <- abs(Z - mean(Z)) / sd(Z) outliers <- names(which(tmp>cutoff & abs(Z)>0.5)) if (length(outliers)>0 & length(outliers)<5) { object@expectations$Z[[g]][,k][outliers] <- NA } } } # re-compute variance explained object@cache[["variance_explained"]] <- calculate_variance_explained(object) return(object) } .flip_factor <- function(model, factor){ for(g in names(model@expectations$Z)) { model@expectations$Z[[g]][,factor] <- - model@expectations$Z[[g]][,factor] } for(m in names(model@expectations$W)) { model@expectations$W[[m]][,factor] <- -model@expectations$W[[m]][,factor] } return(model) } .check_and_get_factors <- function(object, factors) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(!any(duplicated(factors))) if (is.numeric(factors)) { stopifnot(all(factors <= object@dimensions$K)) factors_names(object)[factors] } else { if (paste0(factors, collapse = "") == "all") { factors_names(object) } else { stopifnot(all(factors %in% factors_names(object))) factors } } } .check_and_get_covariates <- function(object, covariates) { if (!.hasSlot(object, "covariates") || is.null(object@covariates)) stop("No covariates found in object.") stopifnot(!any(duplicated(covariates))) if (is.numeric(covariates)) { stopifnot(all(covariates <= object@dimensions$C)) covariates_names(object)[covariates] } else { if (paste0(covariates, collapse = "") == "all") { covariates_names(object) } else { stopifnot(all(covariates %in% covariates_names(object))) covariates } } } .check_and_get_views <- function(object, views, non_gaussian=TRUE) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(!any(duplicated(views))) if (is.numeric(views)) { stopifnot(all(views <= object@dimensions$M)) views <- views_names(object)[views] } else { if (paste0(views, sep = "", collapse = "") == "all") { views <- views_names(object) } else { stopifnot(all(views %in% views_names(object))) } } # Ignore non-gaussian views if (isFALSE(non_gaussian)) { non_gaussian_views <- names(which(object@model_options$likelihoods!="gaussian")) views <- views[!views%in%non_gaussian_views] } return(views) } .check_and_get_groups <- function(object, groups) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(!any(duplicated(groups))) if (is.numeric(groups)) { stopifnot(all(groups <= object@dimensions$G)) groups_names(object)[groups] } else { if (paste0(groups, collapse = "") == "all") { groups_names(object) } else { stopifnot(all(groups %in% groups_names(object))) groups } } } .check_and_get_samples <- function(object, samples) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(!any(duplicated(samples))) if (is.numeric(samples)) { stopifnot(all(samples <= sum(object@dimensions$N))) unlist(samples_names(object))[samples] } else { if (paste0(samples, collapse = "") == "all") { unlist(samples_names(object)) } else { stopifnot(all(samples %in% unlist(samples_names(object)))) samples } } } .check_and_get_features_from_view <- function(object, view, features) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") stopifnot(!any(duplicated(features))) if (is.numeric(features)) { stopifnot(all(features <= sum(object@dimensions$D[view]))) unname(unlist(features_names(object)[[view]])[features]) } else { if (paste0(features, collapse = "") == "all") { unlist(features_names(object)[[view]]) } else { stopifnot(all(features %in% unlist(features_names(object)[[view]]))) features } } } .get_top_features_by_loading <- function(object, view, factors, nfeatures = 10) { if (!is(object, "MOFA")) stop("'object' has to be an instance of MOFA") # Collect expectations W <- get_weights(object, factors = factors, views = view, as.data.frame=TRUE) # Work with absolute values to sort them W$value <- abs(W$value) # Extract relevant features W <- W[with(W, order(-abs(value))), ] return(as.character(head(W$feature, nfeatures))) } .get_nodes_types <- function() { nodes_types <- list( multiview_nodes = c("W", "AlphaW", "ThetaW"), multigroup_nodes = c("Z", "AlphaZ", "ThetaZ"), twodim_nodes = c("Y", "Tau"), multivariate_singleview_node = "Sigma" ) } setClass("matrix_placeholder", slots=c(rownames = "ANY", colnames = "ANY", nrow = "integer", ncol = "integer") ) setMethod("rownames", "matrix_placeholder", function(x) { x@rownames }) setMethod("colnames", "matrix_placeholder", function(x) { x@colnames }) setMethod("nrow", "matrix_placeholder", function(x) { x@nrow }) setMethod("ncol", "matrix_placeholder", function(x) { x@ncol }) setReplaceMethod("rownames", signature(x = "matrix_placeholder"), function(x, value) { x@rownames <- value x@nrow <- length(value) x }) setReplaceMethod("colnames", signature(x = "matrix_placeholder"), function(x, value) { x@colnames <- value x@ncol <- length(value) x }) .create_matrix_placeholder <- function(rownames, colnames) { mx <- new("matrix_placeholder") mx@rownames <- rownames mx@colnames <- colnames mx@nrow <- length(rownames) mx@ncol <- length(colnames) mx } # (Hidden) function to define the group .set_groupby <- function(object, group_by) { # Option 0: no group if (is.null(group_by)) { group_by <- rep("1",sum(object@dimensions[["N"]])) # Option 1: by default group } else if (group_by[1] == "group") { # group_by = c() # for (group in names(samples_names(object))) { # group_by <- c(group_by,rep(group,length(samples_names(object)[[group]]))) # } # group_by = factor(group_by, levels=groups_names(object)) group_by <- samples_metadata(object)$group # Option 2: by a metadata column in object@samples$metadata } else if ((length(group_by) == 1) && (is.character(group_by)|is.factor(group_by)) & (group_by[1] %in% colnames(samples_metadata(object)))) { group_by <- samples_metadata(object)[,group_by] # if (is.character(group_by)) group_by <- as.factor( group_by ) # Option 3: input is a data.frame with columns (sample,group) } else if (is(group_by,"data.frame")) { stopifnot(all(colnames(group_by) %in% c("sample","group"))) stopifnot(all(unique(group_by$sample) %in% unlist(samples_names(object)))) # Option 4: group_by is a vector of length N } else if (length(group_by) > 1) { stopifnot(length(group_by) == sum(object@dimensions[["N"]])) # Option not recognised } else { stop("'group_by' was specified but it was not recognised, please read the documentation") } # Create data.frame with columns (sample,group) if (!is(group_by,"data.frame")) { df = data.frame( # sample = unlist(samples_names(object)), sample = samples_metadata(object)$sample, group_by = group_by, stringsAsFactors = FALSE ) } return(df) } # (Hidden) function to define the color .set_xax <- function(object, xax) { # Option 1: by a metadata column in object@samples_metadata if ((length(xax) == 1) && (is.character(xax)|is.factor(xax)) & (xax[1] %in% colnames(samples_metadata(object)))) { xax <- samples_metadata(object)[,xax] # Option 2: by a feature present in the training data } else if ((length(xax) == 1) && is.character(xax) && (xax[1] %in% unlist(features_names(object)))) { data <- lapply(get_data(object), function(l) Reduce(cbind, l)) features <- lapply(data, rownames) viewidx <- which(sapply(features, function(x) xax %in% x)) xax <- data[[viewidx]][xax,] # Option 5: input is a data.frame with columns (sample, value) } else if (is(xax, "data.frame")) { stopifnot(all(colnames(xax) %in% c("sample", "value"))) stopifnot(all(unique(xax$sample) %in% unlist(samples_names(object)))) xax <- dplyr::rename(xax, covariate_value = value) # Option 6: color_by is a vector of length N } else if (length(xax) > 1) { stopifnot(length(xax) == sum(get_dimensions(object)$N)) # Option not recognised } else { stop("'xax' was specified but it was not recognised, please read the documentation") } # Create data.frame with columns (sample,color) if (!is(xax,"data.frame")) { xax = data.frame( sample = unlist(samples_names(object)), covariate_value = xax, stringsAsFactors = FALSE ) } return(xax) } # (Hidden) function to define the color .set_colorby <- function(object, color_by) { # Option 0: no color if (is.null(color_by)) { color_by <- rep("1",sum(object@dimensions[["N"]])) # Option 1: by default group } else if (color_by[1] == "group") { color_by <- samples_metadata(object)$group # Option 2: by a metadata column in object@samples$metadata } else if ((length(color_by) == 1) && (is.character(color_by)|is.factor(color_by)) && (color_by[1] %in% colnames(samples_metadata(object)))) { color_by <- samples_metadata(object)[,color_by] # if (is.character(color_by)) color_by <- as.factor( color_by ) # Option 3: by a feature present in the training data } else if ((length(color_by) == 1) && is.character(color_by) && (color_by[1] %in% unlist(features_names(object)))) { viewidx <- which(sapply(features_names(object), function(x) color_by %in% x)) foo <- list(color_by); names(foo) <- names(viewidx) color_by <- lapply(get_data(object, features = foo), function(l) Reduce(cbind, l))[[1]][1,] # data <- lapply(get_data(object), function(l) Reduce(cbind, l)) # features <- lapply(data, rownames) # viewidx <- which(sapply(features, function(x) color_by %in% x)) # color_by <- data[[viewidx]][color_by,] # Option 4: by a factor value in object@expectations$Z } else if ((length(color_by) == 1) && is.character(color_by) && (color_by[1] %in% colnames(get_factors(object)[[1]]))) { color_by <- do.call(rbind, get_factors(object))[,color_by] # Option 5: input is a data.frame with columns (sample, color) } else if (is(color_by, "data.frame")) { stopifnot(all(colnames(color_by) %in% c("sample", "color"))) stopifnot(all(unique(color_by$sample) %in% unlist(samples_names(object)))) # Option 6: color_by is a vector of length N } else if (length(color_by) > 1) { stopifnot(length(color_by) == sum(get_dimensions(object)$N)) # Option not recognised } else { stop("'color_by' was specified but it was not recognised, please read the documentation") } # Create data.frame with columns (sample,color) if (!is(color_by,"data.frame")) { df <- data.frame( # sample = unlist(samples_names(object)), sample = samples_metadata(object)$sample, color_by = color_by, stringsAsFactors = FALSE ) } if (length(unique(df$color_by)) < 5) df$color_by <- as.factor(df$color_by) return(df) } # (Hidden) function to define the shape .set_shapeby <- function(object, shape_by) { # Option 0: no color if (is.null(shape_by)) { shape_by <- rep("1",sum(object@dimensions[["N"]])) # Option 1: by default group } else if (shape_by[1] == "group") { # shape_by = c() # for (group in names(samples_names(object))){ # shape_by <- c(shape_by,rep(group,length(samples_names(object)[[group]]))) # } shape_by <- samples_metadata(object)$group # Option 2: by a metadata column in object@samples$metadata } else if ((length(shape_by) == 1) && is.character(shape_by) & (shape_by %in% colnames(samples_metadata(object)))) { shape_by <- samples_metadata(object)[,shape_by] # Option 3: by a feature present in the training data } else if ((length(shape_by) == 1) && is.character(shape_by) && (shape_by[1] %in% unlist(features_names(object)))) { # data <- lapply(get_data(object), function(l) Reduce(cbind, l)) # features <- lapply(data, rownames) # viewidx <- which(sapply(features, function(x) shape_by %in% x)) # shape_by <- data[[viewidx]][shape_by,] viewidx <- which(sapply(features_names(object), function(x) shape_by %in% x)) foo <- list(shape_by); names(foo) <- names(viewidx) shape_by <- lapply(get_data(object, features = foo), function(l) Reduce(cbind, l))[[1]][1,] # Option 4: input is a data.frame with columns (sample,color) } else if (is(shape_by,"data.frame")) { stopifnot(all(colnames(shape_by) %in% c("sample","color"))) stopifnot(all(unique(shape_by$sample) %in% unlist(samples_names(object)))) # Option 5: shape_by is a vector of length N } else if (length(shape_by) > 1) { stopifnot(length(shape_by) == sum(object@dimensions[["N"]])) # Option not recognised } else { stop("'shape_by' was specified but it was not recognised, please read the documentation") } # Create data.frame with columns (sample,shape) if (!is(shape_by,"data.frame")) { df = data.frame( sample = samples_metadata(object)$sample, # sample = unlist(samples_names(object)), shape_by = as.factor(shape_by), stringsAsFactors = FALSE ) } return(df) } .add_legend <- function(p, df, legend, color_name, shape_name) { # Add legend for color if (is.numeric(df$color_by)) { p <- p + # guides(color="none") + scale_fill_gradientn(colors=colorRampPalette(rev(brewer.pal(n=5, name="RdYlBu")))(10)) + # scale_fill_gradientn(colours = c('lightgrey', 'blue')) labs(fill=color_name) } else { if (length(unique(df$color_by))>1) { p <- p + guides(fill=guide_legend(override.aes = list(shape=21))) + labs(fill=color_name) } else { p <- p + guides(fill="none", color="none") + scale_color_manual(values="black") + scale_fill_manual(values="gray60") } } # Add legend for shape if (length(unique(df$shape_by))>1) { p <- p + scale_shape_manual(values=c(21,23,24,25)[1:length(unique(df$shape_by))]) + guides(shape = guide_legend(override.aes = list(fill = "black"))) + labs(shape=shape_name) } else { p <- p + scale_shape_manual(values=c(21)) + guides(shape="none") } # Add legend theme if (legend) { p <- p + guides(color=guide_legend(override.aes = list(fill="white"))) + theme( legend.text = element_text(size=rel(0.8)), legend.title = element_text(size=rel(0.8)), legend.key = element_rect(fill = "white", color="white") # legend.background = element_rect(color = NA, fill=NA), # legend.box.background = element_blank() ) } else { p <- p + theme(legend.position = "none") } return(p) } # Function to define the stroke for each dot .select_stroke <- function(N) { if (N<=1000) { stroke <- 0.5 } else if (N>1000 & N<=10000) { stroke <- 0.2 } else { stroke <- 0.05 } } # # (Hidden) function to define the shape # .set_shapeby_features <- function(object, shape_by, view) { # # # Option 1: no color # if (is.null(shape_by)) { # shape_by <- rep("1",sum(object@dimensions[["D"]][view])) # # # Option 2: input is a data.frame with columns (feature,color) # } else if (is(shape_by,"data.frame")) { # stopifnot(all(colnames(shape_by) %in% c("feature","color"))) # stopifnot(all(unique(shape_by$feature) %in% features(object)[[view]])) # # # Option 3: by a feature_metadata column # } else if ((length(shape_by)==1) && is.character(shape_by) & (shape_by %in% colnames(features_metadata(object)))) { # tmp <- features_metadata(object) # shape_by <- tmp[tmp$view==view,shape_by] # # # Option 4: shape_by is a vector of length D # } else if (length(shape_by) > 1) { # stopifnot(length(shape_by) == object@dimensions[["D"]][[view]]) # # # Option not recognised # } else { # stop("'shape_by' was specified but it was not recognised, please read the documentation") # } # # # Create data.frame with columns (feature,shape) # if (!is(shape_by,"data.frame")) { # df = data.frame( # feature = features(object)[[view]], # shape_by = shape_by, # view = view # ) # } # # return(df) # } # # # # (Hidden) function to define the color # .set_colorby_features <- function(object, color_by, view) { # # # Option 1: no color # if (is.null(color_by)) { # color_by <- rep("1",sum(object@dimensions[["D"]][view])) # # # Option 2: input is a data.frame with columns (feature,color) # } else if (is(color_by,"data.frame")) { # stopifnot(all(colnames(color_by) %in% c("feature","color"))) # stopifnot(all(unique(color_by$feature) %in% features(object)[[view]])) # # # Option 3: by a feature_metadata column # } else if ((length(color_by)==1) && is.character(color_by) & (color_by %in% colnames(features_metadata(object)))) { # tmp <- features_metadata(object) # color_by <- tmp[tmp$view==view,color_by] # # # Option 4: color_by is a vector of length D # } else if (length(color_by) > 1) { # stopifnot(length(color_by) == object@dimensions[["D"]][[view]]) # # # Option not recognised # } else { # stop("'color_by' was specified but it was not recognised, please read the documentation") # } # # # Create data.frame with columns (feature,color) # if (!is(color_by,"data.frame")) { # df = data.frame( # feature = features(object)[[view]], # color_by = color_by, # view = view # ) # } # # return(df) # } #' @title Function to add the MOFA representation onto a Seurat object #' @name add_mofa_factors_to_seurat #' @description Function to add the MOFA latent representation to a Seurat object #' @param mofa_object a trained \code{\link{MOFA}} object. #' @param seurat_object a Seurat object #' @param views character vector with the view names, or numeric vector with view indexes. Default is 'all' #' @param factors character vector with the factor names, or numeric vector with the factor indexes. Default is 'all' #' @details This function calls the \code{CreateDimReducObject} function from Seurat to store the MOFA factors. #' @return Returns a Seurat object with the 'reductions' slot filled with the MOFA factors. Also adds, if calculated, the UMAP/TSNE obtained with the MOFA factors. #' @export #' @examples #' # Generate a simulated data set #' MOFAexample <- make_example_data() add_mofa_factors_to_seurat <- function(mofa_object, seurat_object, views = "all", factors = "all") { # Sanity checks if (!is(mofa_object, "MOFA")) stop("'object' has to be an instance of MOFA") if (!requireNamespace("Seurat", quietly = TRUE)) { stop("Package \"Seurat\" is required but is not installed.", call. = FALSE) } if (!all(colnames(seurat_object)==unlist(samples_names(mofa_object)))) { stop("Samples do not match between the MOFA object and the Seurat object") } # Get factors factors <- .check_and_get_factors(mofa_object, factors) Z <- get_factors(mofa_object, factors=factors) Z <- do.call("rbind",Z) # Get weights (currently not exported) views <- .check_and_get_views(mofa_object, views=views) W <- get_weights(mofa_object, views=views, factors=factors) # Collect MOFA options mofa_options <- list( "data_options" = mofa_object@data_options, "model_options" = mofa_object@model_options, "training_options" = mofa_object@training_options, "dimensions" = mofa_object@dimensions ) # Sanity checks stopifnot(rownames(Z) %in% colnames(seurat_object)) stopifnot(views_names(mofa_object) %in% names(seurat_object@assays)) # Add to seurat # Add "MOFA" with no view-specific weights to the default assay message("(1) Adding the MOFA factors to the 'reductions' slot of the default Seurat assay with the 'MOFA' key (no feature weights/loadings provided)...") seurat_object@reductions[["MOFA"]] <- CreateDimReducObject( embeddings = Z, key = "MOFA_", misc = mofa_options ) # Add a view-specific "MOFA_" that includes the weights # message("(2) Adding the MOFA representation to the 'reductions' slot of each assay, including the feature weights/loadings...") # for (m in views_names(mofa_object)) { # seurat_object@reductions[[sprintf("MOFA%s_",m)]] <- CreateDimReducObject( # embeddings = Z, # loadings = W[[m]], # assay = m, # key = sprintf("MOFA%s_",m), # misc = mofa_options # ) # } if (length(mofa_object@dim_red)>0) { if ("UMAP" %in% names(mofa_object@dim_red)) { message("(2) Adding the UMAP representation obtained with the MOFA factors to the 'reductions' slot of the default Seurat assay using the key 'MOFAUMAP'...") df <- mofa_object@dim_red$UMAP; mtx <- as.matrix(df[,-1]); rownames(mtx) <- df$sample colnames(df) <- paste0("MOFA_UMAP",1:ncol(df)) seurat_object@reductions[["MOFAUMAP"]] <- CreateDimReducObject(embeddings = mtx, key = "MOFAUMAP_") } if ("TSNE" %in% names(mofa_object@dim_red)) { message("(2) Adding the UMAP representation obtained with the MOFA factors to the 'reductions' slot of the default Seurat assay using the key 'MOFATSNE'...") df <- mofa_object@dim_red$UMAP; mtx <- as.matrix(df[,-1]); rownames(mtx) <- df$sample seurat_object@reductions[["MOFATSNE"]] <- CreateDimReducObject(embeddings = mtx, key = "MOFATSNE_") } } return(seurat_object) } ================================================ FILE: README.md ================================================ # Multi-Omics Factor Analysis MOFA is a factor analysis model that provides a general framework for the integration of multi-omic data sets in an unsupervised fashion. Please [visit our website](https://biofam.github.io/MOFA2/) for installation instructions, tutorials, and much more! ================================================ FILE: configure ================================================ #!/bin/sh ${R_HOME}/bin/Rscript -e "basilisk::configureBasiliskEnv()" ================================================ FILE: configure.win ================================================ #!/bin/sh ${R_HOME}/bin${R_ARCH_BIN}/Rscript.exe -e "basilisk::configureBasiliskEnv()" ================================================ FILE: inst/CITATION ================================================ citEntry(entry="article", title = "Multi‐Omics Factor Analysis—a framework for unsupervised integration of multi‐omics data sets", author = personList( as.person("Ricard Argelaguet"), as.person("Britta Velten"), as.person("Damien Arnol"), as.person("Sascha Dietrich"), as.person("Thorsten Zenz"), as.person("John C Marioni"), as.person("Florian Buettner"), as.person("Wolfgang Huber"), as.person("Oliver Stegle")), year = 2018, journal = "Molecular Systems Biology", doi = "10.15252/msb.20178124", volume = 14, textVersion = paste("Argelaguet, Velten, Arnol, Dietrich, Zenz, Marioni, Buettner, Huber and Stegle:", "Multi‐Omics Factor Analysis — a framework for unsupervised integration of multi‐omics data sets.", "Mol Syst Biol (2018)14:e8124")) citEntry(entry="article", title = "MOFA+: a statistical framework for comprehensive integration of multi-modal single-cell data.", author = personList( as.person("Ricard Argelaguet"), as.person("Damien Arnol"), as.person("Danila Bredikhin"), as.person("Yonatan Deloro"), as.person("Britta Velten"), as.person("John C Marioni"), as.person("Oliver Stegle")), year = 2020, journal = "Genome Biology", doi = "10.1186/s13059-020-02015-1", volume = 21, textVersion = paste("Argelaguet, Arnol, Bredikhin, Deloro, Velten, Marioni,and Stegle:", "MOFA+: a statistical framework for comprehensive integration of multi-modal single-cell data", "Genome Biology, 21(1), 1-17")) citEntry(entry="article", title = "Identifying temporal and spatial patterns of variation from multi-modal data using MEFISTO.", author = personList( as.person("Britta Velten"), as.person("Jana M. Braunger"), as.person("Damien Arnol"), as.person("Ricard Argelaguet"), as.person("Oliver Stegle")), year = 2020, journal = "bioRxiv", doi = "10.1101/2020.11.03.366674", textVersion = paste("Velten, Braunger, Arnol, Argelaguet and Stegle:", "Identifying temporal and spatial patterns of variation from multi-modal data using MEFISTO", "bioRxiv 2020")) ================================================ FILE: inst/scripts/template_script.R ================================================ library(MOFA2) library(data.table) # (Optional) set up reticulate connection with Python # library(reticulate) # reticulate::use_python("/Users/ricard/anaconda3/envs/base_new/bin/python", required = T) ############### ## Load data ## ############### # Multiple formats are allowed for the input data: ## -- Option 1 -- ## # nested list of matrices, where the first index refers to the view and the second index refers to the group. # samples are stored in the rows and features are stored in the columns. # Missing values must be filled with NAs, including samples missing an entire view # (...) ## -- Option 2 -- ## # data.frame with columns ["sample","feature","view","group","value"] # In this case there is no need to have missing values in the data.frame, # they will be automatically filled in when creating the corresponding matrices file = "ftp://ftp.ebi.ac.uk/pub/databases/mofa/getting_started/data.txt.gz" data = fread(file) ####################### # Create MOFA object ## ####################### MOFAobject <- create_mofa(data) # Visualise data structure plot_data_overview(MOFAobject) #################### ## Define options ## #################### # Data options # - scale_views: if views have very different ranges/variances, it is good practice to scale each view to unit variance (default is FALSE) data_opts <- get_default_data_options(MOFAobject) # Model options # - likelihoods: likelihood per view (options are "gaussian","poisson","bernoulli"). "gaussian" is used by default # - num_factors: number of factors. By default K=10 model_opts <- get_default_model_options(MOFAobject) model_opts$num_factors <- 10 # Training options # - maxiter: number of iterations # - convergence_mode: "fast", "medium", "slow". For exploration, the fast mode is good enough. # - drop_factor_threshold: minimum variance explained criteria to drop factors while training. Default is -1 (no dropping of factors) # - gpu_mode: use GPU mode? This needs cupy installed and a functional GPU, see https://biofam.github.io/MOFA2/gpu_training.html # - seed: random seed train_opts <- get_default_training_options(MOFAobject) train_opts$convergence_mode <- "medium" train_opts$seed <- 42 ######################### ## Prepare MOFA object ## ######################### MOFAobject <- prepare_mofa(MOFAobject, data_options = data_opts, model_options = model_opts, training_options = train_opts ) ##################### ## Train the model ## ##################### MOFAobject <- run_mofa(MOFAobject) #################### ## Save the model ## #################### outfile <- paste0(getwd(),"/model.hdf5") saveRDS(MOFAobject, outfile) ================================================ FILE: inst/scripts/template_script.py ================================================ ###################################################### ## Template script to train a MOFA+ model in Python ## ###################################################### from mofapy2.run.entry_point import entry_point import pandas as pd import io import requests # to download the online data ############### ## Load data ## ############### # Two formats are allowed for the input data: # Option 1: a nested list of matrices, where the first index refers to the view and the second index refers to the group. # samples are stored in the rows and features are stored in the columns. # Missing values must be filled with NAs, including samples missing an entire view # datadir = "/Users/ricard/data/mofaplus/test" # views = ["0","1"] # groups = ["0","1"] # data = [None]*len(views) # for m in range(len(views)): # data[m] = [None]*len(groups) # for g in range(len(groups)): # datafile = "%s/%s_%s.txt.gz" % (datadir, views[m], groups[g]) # data[m][g] = pd.read_csv(datafile, header=None, sep=' ') # Option 2: a data.frame with columns ["sample","feature","view","group","value"] # In this case there is no need to have missing values in the data.frame, # they will be automatically filled in when creating the corresponding matrices file = "ftp://ftp.ebi.ac.uk/pub/databases/mofa/getting_started/data.txt.gz" data = pd.read_csv(file, sep="\t") ########################### ## Initialise MOFA model ## ########################### ## (1) initialise the entry point ent = entry_point() ## (2) Set data options # - scale_views: if views have very different ranges, one can to scale each view to unit variance ent.set_data_options( scale_views = False ) # (3) Set data using the long data frame format ent.set_data_df(data) ## (3) Set data using the nested list of matrices format ## # views_names = ["view1","view2"] # groups_names = ["groupA","groupB"] # samples_names nested list with length NGROUPS. Each entry g is a list with the sample names for the g-th group # - if not provided, MOFA will fill it with default samples names # samples_names = (...) # features_names nested list with length NVIEWS. Each entry m is a list with the features names for the m-th view # - if not provided, MOFA will fill it with default features names # features_names = (...) # ent.set_data_matrix(data, # views_names = views_names, # groups_names = groups_names, # samples_names = samples_names, # features_names = features_names # ) ## (4) Set model options # - factors: number of factors. Default is 15 # - likelihods: likelihoods per view (options are "gaussian","poisson","bernoulli"). Default and recommended is "gaussian" # - spikeslab_weights: use spike-slab sparsity prior in the weights? (recommended TRUE) # - ard_weights: use automatic relevance determination prior in the weights? (TRUE if using multiple views) # using default values ent.set_model_options() # using personalised values ent.set_model_options( factors = 5, spikeslab_weights = True, ard_weights = True ) ## (5) Set training options ## # - iter: number of iterations # - convergence_mode: "fast", "medium", "slow". Fast mode is usually good enough. # - dropR2: minimum variance explained criteria to drop factors while training. Default is None, inactive factors are not dropped during training # - gpu_mode: use GPU mode? this functionality needs cupy installed and a functional GPU, see https://biofam.github.io/MOFA2/gpu_training.html # - seed: random seed # using default values ent.set_train_options() # using personalised values ent.set_train_options( iter = 100, convergence_mode = "fast", dropR2 = None, gpu_mode = False, seed = 42 ) #################################### ## Build and train the MOFA model ## #################################### # Build the model ent.build() # Run the model ent.run() #################### ## Save the model ## #################### outfile = "/Users/ricard/data/mofaplus/hdf5/test.hdf5" # - save_data: logical indicating whether to save the training data in the hdf5 file. # this is useful for some downstream analysis in R, but it can take a lot of disk space. ent.save(outfile, save_data=True) ######################### ## Downstream analysis ## ######################### # Check the mofax package for the downstream analysis in Python: https://github.com/bioFAM/mofax # Check the MOFA2 R package for the downstream analysis in R: https://www.bioconductor.org/packages/release/bioc/html/MOFA2.html # All tutorials: https://biofam.github.io/MOFA2/tutorials.html # Extract factor values (a list with one matrix per sample group) factors = ent.model.nodes["Z"].getExpectation() # Extract weights (a list with one matrix per view) weights = ent.model.nodes["W"].getExpectation() # Extract variance explained values r2 = ent.model.calculate_variance_explained() # Interact directly with the hdf5 file import h5py f = h5py.File(outfile, 'r') f.keys() # Extract factors f["expectations"]["Z"]["group_0"].value f["expectations"]["Z"]["group_1"].value # Extract weights f["expectations"]["W"]["view_0"].value f["expectations"]["W"]["view_1"].value # Extract variance explained estimates f["variance_explained"]["r2_per_factor"] f["variance_explained"]["r2_total"] ================================================ FILE: inst/scripts/template_script_dataframe.py ================================================ ###################################################### ## Template script to train a MOFA+ model in Python ## ###################################################### from mofapy2.run.entry_point import entry_point import pandas as pd import io import requests # to download the online data ############### ## Load data ## ############### # The input needs to be a data.frame with columns ["sample","feature","view","group","value"] # In this case there is no need to have missing values in the data.frame, they will be automatically filled in when creating the corresponding matrices file = "ftp://ftp.ebi.ac.uk/pub/databases/mofa/getting_started/data.txt.gz" data = pd.read_csv(file, sep="\t") ########################### ## Initialise MOFA model ## ########################### ## (1) initialise the entry point ent = entry_point() ## (2) Set data options # - scale_views: if views have very different ranges, one can to scale each view to unit variance ent.set_data_options( scale_views = False ) # (3) Set data using the data frame format ent.set_data_df(data) # using default values ent.set_model_options() # using personalised values ent.set_model_options( factors = 5, spikeslab_weights = True, ard_weights = True ) ## (5) Set training options ## # - iter: number of iterations # - convergence_mode: "fast", "medium", "slow". Fast mode is usually good enough. # - dropR2: minimum variance explained criteria to drop factors while training. Default is None, inactive factors are not dropped during training # - gpu_mode: use GPU mode? this functionality needs cupy installed and a functional GPU, see https://biofam.github.io/MOFA2/gpu_training.html # - seed: random seed # using default values ent.set_train_options() # using personalised values ent.set_train_options( iter = 100, convergence_mode = "fast", dropR2 = None, gpu_mode = False, seed = 42 ) #################################### ## Build and train the MOFA model ## #################################### # Build the model ent.build() # Run the model ent.run() #################### ## Save the model ## #################### outfile = "/Users/ricard/data/mofaplus/hdf5/test.hdf5" # - save_data: logical indicating whether to save the training data in the hdf5 file. # this is useful for some downstream analysis in R, but it can take a lot of disk space. ent.save(outfile, save_data=True) ######################### ## Downstream analysis ## ######################### # Check the mofax package for the downstream analysis in Python: https://github.com/bioFAM/mofax # Check the MOFA2 R package for the downstream analysis in R: https://www.bioconductor.org/packages/release/bioc/html/MOFA2.html # All tutorials: https://biofam.github.io/MOFA2/tutorials.html # Extract factor values (a list with one matrix per sample group) factors = ent.model.nodes["Z"].getExpectation() # Extract weights (a list with one matrix per view) weights = ent.model.nodes["W"].getExpectation() # Extract variance explained values r2 = ent.model.calculate_variance_explained() # Interact directly with the hdf5 file import h5py f = h5py.File(outfile, 'r') f.keys() # Extract factors f["expectations"]["Z"]["group_0"].value f["expectations"]["Z"]["group_1"].value # Extract weights f["expectations"]["W"]["view_0"].value f["expectations"]["W"]["view_1"].value # Extract variance explained estimates f["variance_explained"]["r2_per_factor"] f["variance_explained"]["r2_total"] ================================================ FILE: inst/scripts/template_script_matrix.py ================================================ ###################################################### ## Template script to train a MOFA+ model in Python ## ###################################################### from mofapy2.run.entry_point import entry_point import pandas as pd import io import requests # to download the online data ############### ## Load data ## ############### # The data format is a nested list of matrices, where the first index refers to the view and the second index refers to the group. # samples are stored in the rows and features are stored in the columns. # Missing values must be explicitly filled using NAs, including samples missing an entire view datadir = "/Users/ricard/data/mofaplus/test" views = ["0","1"] groups = ["0","1"] data = [None]*len(views) for m in range(len(views)): data[m] = [None]*len(groups) for g in range(len(groups)): datafile = "%s/%s_%s.txt.gz" % (datadir, views[m], groups[g]) data[m][g] = pd.read_csv(datafile, header=None, sep=' ') ########################### ## Initialise MOFA model ## ########################### ## (1) initialise the entry point ent = entry_point() ## (2) Set data options # - scale_views: if views have very different ranges, one can to scale each view to unit variance ent.set_data_options( scale_views = False ) ## (3) Define names views_names = ["view1","view2"] # groups_names = ["groupA","groupB"] # samples_names nested list with length n_groups. Each entry g is a list with the sample names for the g-th group # - if not provided, MOFA will fill it with default samples names samples_names = (...) # features_names nested list with length NVIEWS. Each entry m is a list with the features names for the m-th view # - if not provided, MOFA will fill it with default features names features_names = (...) ## (4) Set data matrix ent.set_data_matrix(data, views_names = views_names, groups_names = groups_names, samples_names = samples_names, features_names = features_names ) ## (5) Set model options # - factors: number of factors. Default is 15 # - likelihods: likelihoods per view (options are "gaussian","poisson","bernoulli"). Default and recommended is "gaussian" # - spikeslab_weights: use spike-slab sparsity prior in the weights? (recommended TRUE) # - ard_weights: use automatic relevance determination prior in the weights? (TRUE if using multiple views) # using default values ent.set_model_options() # using personalised values ent.set_model_options( factors = 5, spikeslab_weights = True, ard_weights = True ) ## (5) Set training options ## # - iter: number of iterations # - convergence_mode: "fast", "medium", "slow". Fast mode is usually good enough. # - dropR2: minimum variance explained criteria to drop factors while training. Default is None, inactive factors are not dropped during training # - gpu_mode: use GPU mode? this functionality needs cupy installed and a functional GPU, see https://biofam.github.io/MOFA2/gpu_training.html # - seed: random seed # using default values ent.set_train_options() # using personalised values ent.set_train_options( iter = 100, convergence_mode = "fast", dropR2 = None, gpu_mode = False, seed = 42 ) #################################### ## Build and train the MOFA model ## #################################### # Build the model ent.build() # Run the model ent.run() #################### ## Save the model ## #################### outfile = "/Users/ricard/data/mofaplus/hdf5/test.hdf5" # - save_data: logical indicating whether to save the training data in the hdf5 file. # this is useful for some downstream analysis in R, but it can take a lot of disk space. ent.save(outfile, save_data=True) ######################### ## Downstream analysis ## ######################### # Check the mofax package for the downstream analysis in Python: https://github.com/bioFAM/mofax # Check the MOFA2 R package for the downstream analysis in R: https://www.bioconductor.org/packages/release/bioc/html/MOFA2.html # All tutorials: https://biofam.github.io/MOFA2/tutorials.html # Extract factor values (a list with one matrix per sample group) factors = ent.model.nodes["Z"].getExpectation() # Extract weights (a list with one matrix per view) weights = ent.model.nodes["W"].getExpectation() # Extract variance explained values r2 = ent.model.calculate_variance_explained() # Interact directly with the hdf5 file import h5py f = h5py.File(outfile, 'r') f.keys() # Extract factors f["expectations"]["Z"]["group_0"].value f["expectations"]["Z"]["group_1"].value # Extract weights f["expectations"]["W"]["view_0"].value f["expectations"]["W"]["view_1"].value # Extract variance explained estimates f["variance_explained"]["r2_per_factor"] f["variance_explained"]["r2_total"] ================================================ FILE: man/.Rapp.history ================================================ ================================================ FILE: man/MOFA.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllClasses.R \docType{class} \name{MOFA} \alias{MOFA} \alias{MOFA-class} \title{Class to store a mofa model} \description{ The \code{MOFA} is an S4 class used to store all relevant data to analyse a MOFA model } \section{Slots}{ \describe{ \item{\code{data}}{The input data} \item{\code{intercepts}}{Feature intercepts} \item{\code{samples_metadata}}{Samples metadata} \item{\code{features_metadata}}{Features metadata.} \item{\code{imputed_data}}{The imputed data.} \item{\code{expectations}}{expected values of the factors and the loadings.} \item{\code{dim_red}}{non-linear dimensionality reduction manifolds.} \item{\code{training_stats}}{model training statistics.} \item{\code{data_options}}{Data processing options.} \item{\code{training_options}}{Model training options.} \item{\code{stochastic_options}}{Stochastic variational inference options.} \item{\code{model_options}}{Model options.} \item{\code{mefisto_options}}{Options for the use of MEFISO} \item{\code{dimensions}}{Dimensionalities of the model: M for the number of views, G for the number of groups, N for the number of samples (per group), C for the number of covariates per sample, D for the number of features (per view), K for the number of factors.} \item{\code{on_disk}}{Logical indicating whether data is loaded from disk.} \item{\code{cache}}{Cache.} \item{\code{status}}{Auxiliary variable indicating whether the model has been trained.} \item{\code{covariates}}{optional slot to store sample covariate for training in MEFISTO} \item{\code{covariates_warped}}{optional slot to store warped sample covariate for training in MEFISTO} \item{\code{interpolated_Z}}{optional slot to store interpolated factor values (used only with MEFISTO)} }} ================================================ FILE: man/add_mofa_factors_to_seurat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/utils.R \name{add_mofa_factors_to_seurat} \alias{add_mofa_factors_to_seurat} \title{Function to add the MOFA representation onto a Seurat object} \usage{ add_mofa_factors_to_seurat( mofa_object, seurat_object, views = "all", factors = "all" ) } \arguments{ \item{mofa_object}{a trained \code{\link{MOFA}} object.} \item{seurat_object}{a Seurat object} \item{views}{character vector with the view names, or numeric vector with view indexes. Default is 'all'} \item{factors}{character vector with the factor names, or numeric vector with the factor indexes. Default is 'all'} } \value{ Returns a Seurat object with the 'reductions' slot filled with the MOFA factors. Also adds, if calculated, the UMAP/TSNE obtained with the MOFA factors. } \description{ Function to add the MOFA latent representation to a Seurat object } \details{ This function calls the \code{CreateDimReducObject} function from Seurat to store the MOFA factors. } \examples{ # Generate a simulated data set MOFAexample <- make_example_data() } ================================================ FILE: man/calculate_contribution_scores.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/contribution_scores.R \name{calculate_contribution_scores} \alias{calculate_contribution_scores} \title{Calculate contribution scores for each view in each sample} \usage{ calculate_contribution_scores( object, views = "all", groups = "all", factors = "all", scale = TRUE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{views}{character vector with the view names, or numeric vector with view indexes. Default is 'all'} \item{groups}{character vector with the group names, or numeric vector with group indexes. Default is 'all'} \item{factors}{character vector with the factor names, or numeric vector with the factor indexes. Default is 'all'} \item{scale}{logical indicating whether to scale the sample-wise variance explained values by the total amount of variance explained per view. This effectively normalises each view by its total variance explained. It is important when different amounts of variance is explained for each view (check with \code{plot_variance_explained(..., plot_total=TRUE)})} } \value{ adds the contribution scores to the metadata slot (\code{samples_metadata(MOFAobject)}) and to the \code{MOFAobject@cache} slot } \description{ This function calculates, *for each sample* how much each view contributes to its location in the latent manifold, what we call \emph{contribution scores} } \details{ Contribution scores are calculated in three steps: \itemize{ \item{\strong{Step 1}: calculate variance explained for each cell i and each view m (\eqn{R_{im}}), using all factors} \item{\strong{Step 2} (optional): scale values by the total variance explained for each view} \item{\strong{Step 3}: calculate contribution score (\eqn{C_{im}}) for cell i and view m as: \deqn{C_{im} = \frac{R2_{im}}{\sum_{m} R2_{im}} } } } Note that contribution scores can be calculated using any number of data modalities, but it is easier to interpret when you specify two. \cr Please note that this functionality is still experimental, contact the authors if you have questions. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) model <- calculate_contribution_scores(model) } ================================================ FILE: man/calculate_variance_explained.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculate_variance_explained.R \name{calculate_variance_explained} \alias{calculate_variance_explained} \title{Calculate variance explained by the model} \usage{ calculate_variance_explained( object, views = "all", groups = "all", factors = "all" ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{views}{character vector with the view names, or numeric vector with view indexes. Default is 'all'} \item{groups}{character vector with the group names, or numeric vector with group indexes. Default is 'all'} \item{factors}{character vector with the factor names, or numeric vector with the factor indexes. Default is 'all'} } \value{ a list with matrices with the amount of variation explained per factor and view. } \description{ This function takes a trained MOFA model as input and calculates the proportion of variance explained (i.e. the coefficient of determinations (R^2)) by the MOFA factors across the different views. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Calculate variance explained (R2) r2 <- calculate_variance_explained(model) # Plot variance explained values (view as x-axis, and factor as y-axis) plot_variance_explained(model, x="view", y="factor") # Plot variance explained values (view as x-axis, and group as y-axis) plot_variance_explained(model, x="view", y="group") # Plot variance explained values for factors 1 to 3 plot_variance_explained(model, x="view", y="group", factors=1:3) # Scale R2 values plot_variance_explained(model, max_r2 = 0.25) } ================================================ FILE: man/calculate_variance_explained_per_sample.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculate_variance_explained.R \name{calculate_variance_explained_per_sample} \alias{calculate_variance_explained_per_sample} \title{Calculate variance explained by the MOFA factors for each sample} \usage{ calculate_variance_explained_per_sample( object, views = "all", groups = "all", factors = "all" ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{views}{character vector with the view names, or numeric vector with view indexes. Default is 'all'} \item{groups}{character vector with the group names, or numeric vector with group indexes. Default is 'all'} \item{factors}{character vector with the factor names, or numeric vector with the factor indexes. Default is 'all'} } \value{ a list with matrices with the amount of variation explained per sample and view. } \description{ This function takes a trained MOFA model as input and calculates, **for each sample** the proportion of variance explained (i.e. the coefficient of determinations (R^2)) by the MOFA factors across the different views. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Calculate variance explained (R2) r2 <- calculate_variance_explained_per_sample(model) } ================================================ FILE: man/cluster_samples.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/cluster_samples.R \name{cluster_samples} \alias{cluster_samples} \title{K-means clustering on samples based on latent factors} \usage{ cluster_samples(object, k, factors = "all", ...) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{k}{number of clusters (integer).} \item{factors}{character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. Default is 'all'} \item{...}{extra arguments passed to \code{\link{kmeans}}} } \value{ output from \code{\link{kmeans}} function } \description{ MOFA factors are continuous in nature but they can be used to predict discrete clusters of samples. \cr The clustering can be performed in a single factor, which is equivalent to setting a manual threshold. More interestingly, it can be done using multiple factors, where multiple sources of variation are aggregated. \cr Importantly, this type of clustering is not weighted and does not take into account the different importance of the latent factors. } \details{ In some cases, due to model technicalities, samples can have missing values in the latent factor space. In such a case, these samples are currently ignored in the clustering procedure. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Cluster samples in the factor space using factors 1 to 3 and K=2 clusters clusters <- cluster_samples(model, k=2, factors=1:3) } ================================================ FILE: man/compare_elbo.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare_models.R \name{compare_elbo} \alias{compare_elbo} \title{Compare different trained \code{\link{MOFA}} objects in terms of the final value of the ELBO statistics and number of inferred factors} \usage{ compare_elbo(models, log = FALSE, return_data = FALSE) } \arguments{ \item{models}{a list containing \code{\link{MOFA}} objects.} \item{log}{logical indicating whether to plot the log of the ELBO.} \item{return_data}{logical indicating whether to return a data.frame with the ELBO values per model} } \value{ A \code{\link{ggplot}} object or the underlying data.frame if return_data is TRUE } \description{ Different objects of \code{\link{MOFA}} are compared in terms of the final value of the ELBO statistics. For model selection the model with the highest ELBO value is selected. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model1 <- load_model(file) model2 <- load_model(file) # Compare ELBO between models \dontrun{compare_elbo(list(model1,model2))} } ================================================ FILE: man/compare_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare_models.R \name{compare_factors} \alias{compare_factors} \title{Plot the correlation of factors between different models} \usage{ compare_factors(models, ...) } \arguments{ \item{models}{a list with \code{\link{MOFA}} objects.} \item{...}{extra arguments passed to pheatmap} } \value{ Plots a heatmap of the Pearson correlation between latent factors across all input models. } \description{ Different \code{\link{MOFA}} objects are compared in terms of correlation between their factors. } \details{ If assessing model robustness across trials, the output should look like a block diagonal matrix, suggesting that all factors are robustly detected in all model instances. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model1 <- load_model(file) model2 <- load_model(file) # Compare factors between models compare_factors(list(model1,model2)) } ================================================ FILE: man/correlate_factors_with_covariates.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/correlate_covariates.R \name{correlate_factors_with_covariates} \alias{correlate_factors_with_covariates} \title{Plot correlation of factors with external covariates} \usage{ correlate_factors_with_covariates( object, covariates, factors = "all", groups = "all", abs = FALSE, plot = c("log_pval", "r"), alpha = 0.05, return_data = FALSE, transpose = FALSE, ... ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{covariates}{\itemize{ \item{\strong{data.frame}: a data.frame where the samples are stored in the rows and the covariates are stored in the columns. Use row names for sample names and column names for covariate names. Columns values must be numeric. } \item{\strong{character vector}: character vector with names of columns that are present in the sample metadata (\code{samples_metadata(model)}} }} \item{factors}{character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. Default is 'all'.} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups.} \item{abs}{logical indicating whether to take the absolute value of the correlation coefficient (default is \code{TRUE}).} \item{plot}{character indicating whether to plot Pearson correlation coefficients (\code{plot="r"}) or log10 adjusted p-values (\code{plot="log_pval"}).} \item{alpha}{p-value threshold} \item{return_data}{logical indicating whether to return the correlation results instead of plotting} \item{transpose}{logical indicating whether to transpose the plot} \item{...}{extra arguments passed to \code{\link[corrplot]{corrplot}} (if \code{plot=="r"}) or \code{\link[pheatmap]{pheatmap}} (if \code{plot=="log_pval"}).} } \value{ A \code{\link[corrplot]{corrplot}} (if \code{plot=="r"}) or \code{\link[pheatmap]{pheatmap}} (if \code{plot=="log_pval"}) or the underlying data.frame if return_data is TRUE } \description{ Function to correlate factor values with external covariates. } ================================================ FILE: man/covariates_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{covariates_names} \alias{covariates_names} \alias{covariates_names<-} \alias{covariates_names,MOFA-method} \alias{covariates,MOFA-method} \alias{covariates_names<-,MOFA,vector-method} \title{covariates_names: set and retrieve covariate names} \usage{ covariates_names(object) covariates_names(object) <- value \S4method{covariates_names}{MOFA}(object) \S4method{covariates_names}{MOFA,vector}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{a character vector of covariate names} } \value{ character vector with the covariate names } \description{ covariates_names: set and retrieve covariate names } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) covariates_names(model) } ================================================ FILE: man/create_mofa.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/create_mofa.R \name{create_mofa} \alias{create_mofa} \title{create a MOFA object} \usage{ create_mofa(data, groups = NULL, extract_metadata = TRUE, ...) } \arguments{ \item{data}{one of the formats above} \item{groups}{group information, only relevant when using the multi-group framework.} \item{extract_metadata}{logical indicating whether to incorporate the sample metadata from the input object into the MOFA object ( not relevant when the input is a list of matrices). Default is \code{TRUE}.} \item{...}{further arguments that can be passed to the function depending on the input data format. See the documentation of above functions for details.} } \value{ Returns an untrained \code{\link{MOFA}} object } \description{ Method to create a \code{\link{MOFA}} object. Depending on the input data format, this method calls one of the following functions: \itemize{ \item{\strong{long data.frame}: \code{\link{create_mofa_from_df}}} \item{\strong{List of matrices}: \code{\link{create_mofa_from_matrix}}} \item{\strong{MultiAssayExperiment}: \code{\link{create_mofa_from_MultiAssayExperiment}}} \item{\strong{Seurat}: \code{\link{create_mofa_from_Seurat}}} \item{\strong{SingleCellExperiment}: \code{\link{create_mofa_from_SingleCellExperiment}}} } Please read the documentation of the corresponding function for more details on your specific data format. } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data (in long data.frame format) load(file) MOFAmodel <- create_mofa(dt) } ================================================ FILE: man/create_mofa_from_MultiAssayExperiment.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/create_mofa.R \name{create_mofa_from_MultiAssayExperiment} \alias{create_mofa_from_MultiAssayExperiment} \title{create a MOFA object from a MultiAssayExperiment object} \usage{ create_mofa_from_MultiAssayExperiment( mae, groups = NULL, extract_metadata = FALSE ) } \arguments{ \item{mae}{a MultiAssayExperiment object} \item{groups}{a string specifying column name of the colData to use it as a group variable. Alternatively, a character vector with group assignment for every sample. Default is \code{NULL} (no group structure).} \item{extract_metadata}{logical indicating whether to incorporate the metadata from the MultiAssayExperiment object into the MOFA object} } \value{ Returns an untrained \code{\link{MOFA}} object } \description{ Method to create a \code{\link{MOFA}} object from a MultiAssayExperiment object } ================================================ FILE: man/create_mofa_from_Seurat.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/create_mofa.R \name{create_mofa_from_Seurat} \alias{create_mofa_from_Seurat} \title{create a MOFA object from a Seurat object} \usage{ create_mofa_from_Seurat( seurat, groups = NULL, assays = NULL, layer = "data", features = NULL, extract_metadata = FALSE ) } \arguments{ \item{seurat}{Seurat object} \item{groups}{a string specifying column name of the samples metadata to use it as a group variable. Alternatively, a character vector with group assignment for every sample. Default is \code{NULL} (no group structure).} \item{assays}{assays to use, default is \code{NULL}, it fetched all assays available} \item{layer}{layer to be used (default is data).} \item{features}{a list with vectors, which are used to subset features, with names corresponding to assays; a vector can be provided when only one assay is used} \item{extract_metadata}{logical indicating whether to incorporate the metadata from the Seurat object into the MOFA object} } \value{ Returns an untrained \code{\link{MOFA}} object } \description{ Method to create a \code{\link{MOFA}} object from a Seurat object } ================================================ FILE: man/create_mofa_from_SingleCellExperiment.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/create_mofa.R \name{create_mofa_from_SingleCellExperiment} \alias{create_mofa_from_SingleCellExperiment} \title{create a MOFA object from a SingleCellExperiment object} \usage{ create_mofa_from_SingleCellExperiment( sce, groups = NULL, assay = "logcounts", extract_metadata = FALSE ) } \arguments{ \item{sce}{SingleCellExperiment object} \item{groups}{a string specifying column name of the colData to use it as a group variable. Alternatively, a character vector with group assignment for every sample. Default is \code{NULL} (no group structure).} \item{assay}{assay to use, default is \code{logcounts}.} \item{extract_metadata}{logical indicating whether to incorporate the metadata from the SingleCellExperiment object into the MOFA object} } \value{ Returns an untrained \code{\link{MOFA}} object } \description{ Method to create a \code{\link{MOFA}} object from a SingleCellExperiment object } ================================================ FILE: man/create_mofa_from_df.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/create_mofa.R \name{create_mofa_from_df} \alias{create_mofa_from_df} \title{create a MOFA object from a data.frame object} \usage{ create_mofa_from_df(df, extract_metadata = TRUE) } \arguments{ \item{df}{\code{data.frame} object with at most 5 columns: \code{sample}, \code{group}, \code{feature}, \code{view}, \code{value}. The \code{group} column (optional) indicates the group of each sample when using the multi-group framework. The \code{view} column (optional) indicates the view of each feature when having multi-view data.} \item{extract_metadata}{logical indicating whether to incorporate the extra columns as sample metadata into the MOFA object} } \value{ Returns an untrained \code{\link{MOFA}} object } \description{ Method to create a \code{\link{MOFA}} object from a data.frame object } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data (in long data.frame format) load(file) MOFAmodel <- create_mofa_from_df(dt) } ================================================ FILE: man/create_mofa_from_matrix.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/create_mofa.R \name{create_mofa_from_matrix} \alias{create_mofa_from_matrix} \title{create a MOFA object from a a list of matrices} \usage{ create_mofa_from_matrix(data, groups = NULL) } \arguments{ \item{data}{A list of matrices, where each entry corresponds to one view. Samples are stored in columns and features in rows. Missing values must be filled in prior to creating the MOFA object (see for example the CLL tutorial)} \item{groups}{A character vector with group assignment for every sample. Default is \code{NULL}, no group structure.} } \value{ Returns an untrained \code{\link{MOFA}} object } \description{ Method to create a \code{\link{MOFA}} object from a list of matrices } \examples{ m <- make_example_data() create_mofa_from_matrix(m$data) } ================================================ FILE: man/factors_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{factors_names} \alias{factors_names} \alias{factors_names<-} \alias{factors_names,MOFA-method} \alias{factors_names<-,MOFA,vector-method} \title{factors_names: set and retrieve factor names} \usage{ factors_names(object) factors_names(object) <- value \S4method{factors_names}{MOFA}(object) \S4method{factors_names}{MOFA,vector}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{a character vector of factor names} } \value{ character vector with the factor names } \description{ factors_names: set and retrieve factor names } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) factors_names(model) } ================================================ FILE: man/features_metadata.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{features_metadata} \alias{features_metadata} \alias{features_metadata<-} \alias{features_metadata,MOFA-method} \alias{features_metadata<-,MOFA,data.frame-method} \title{features_metadata: set and retrieve feature metadata} \usage{ features_metadata(object) features_metadata(object) <- value \S4method{features_metadata}{MOFA}(object) \S4method{features_metadata}{MOFA,data.frame}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{data frame with feature information, it at least must contain the columns \code{feature} and \code{view}} } \value{ a data frame with sample metadata } \description{ features_metadata: set and retrieve feature metadata } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) features_metadata(model) } ================================================ FILE: man/features_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{features_names} \alias{features_names} \alias{features_names<-} \alias{features_names,MOFA-method} \alias{features_names<-,MOFA,list-method} \title{features_names: set and retrieve feature names} \usage{ features_names(object) features_names(object) <- value \S4method{features_names}{MOFA}(object) \S4method{features_names}{MOFA,list}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{list of character vectors with the feature names for every view} } \value{ list of character vectors with the feature names for each view } \description{ features_names: set and retrieve feature names } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) features_names(model) } ================================================ FILE: man/get_covariates.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{get_covariates} \alias{get_covariates} \title{Get sample covariates} \usage{ get_covariates( object, covariates = "all", as.data.frame = FALSE, warped = FALSE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{covariates}{character vector with the covariate name(s), or numeric vector with the covariate index(es).} \item{as.data.frame}{logical indicating whether to output the result as a long data frame, default is \code{FALSE}.} \item{warped}{logical indicating whether to extract the aligned covariates} } \value{ a matrix with dimensions (samples,covariates). If \code{as.data.frame} is \code{TRUE}, a long-formatted data frame with columns (sample,factor,value) } \description{ Function to extract the covariates from a \code{\link{MOFA}} object using MEFISTO. } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) covariates <- get_covariates(model) } ================================================ FILE: man/get_data.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_data} \alias{get_data} \title{Get data} \usage{ get_data( object, views = "all", groups = "all", features = "all", as.data.frame = FALSE, add_intercept = TRUE, denoise = FALSE, na.rm = TRUE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{views}{character vector with the view name(s), or numeric vector with the view index(es). Default is "all".} \item{groups}{character vector with the group name(s), or numeric vector with the group index(es). Default is "all".} \item{features}{a *named* list of character vectors. Example: list("view1"=c("feature_1","feature_2"), "view2"=c("feature_3","feature_4")) Default is "all".} \item{as.data.frame}{logical indicating whether to return a long data frame instead of a list of matrices. Default is \code{FALSE}.} \item{add_intercept}{logical indicating whether to add feature intercepts to the data. Default is \code{TRUE}.} \item{denoise}{logical indicating whether to return the denoised data (i.e. the model predictions). Default is \code{FALSE}.} \item{na.rm}{remove NAs from the data.frame (only if as.data.frame is \code{TRUE}).} } \value{ A list of data matrices with dimensionality (D,N) or a \code{data.frame} (if \code{as.data.frame} is TRUE) } \description{ Fetch the input data } \details{ By default this function returns a list where each element is a data matrix with dimensionality (D,N) where D is the number of features and N is the number of samples. \cr Alternatively, if \code{as.data.frame} is \code{TRUE}, the function returns a long-formatted data frame with columns (view,feature,sample,value). Missing values are not included in the the long data.frame format by default. To include them use the argument \code{na.rm=FALSE}. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Fetch data data <- get_data(model) # Fetch a specific view data <- get_data(model, views = "view_0") # Fetch data in data.frame format instead of matrix format data <- get_data(model, as.data.frame = TRUE) # Fetch centered data (do not add the feature intercepts) data <- get_data(model, as.data.frame = FALSE) # Fetch denoised data (do not add the feature intercepts) data <- get_data(model, denoise = TRUE) } ================================================ FILE: man/get_default_data_options.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/prepare_mofa.R \name{get_default_data_options} \alias{get_default_data_options} \title{Get default data options} \usage{ get_default_data_options(object) } \arguments{ \item{object}{an untrained \code{\link{MOFA}} object} } \value{ Returns a list with the default data options. } \description{ Function to obtain the default data options. } \details{ This function provides a default set of data options that can be modified and passed to the \code{\link{MOFA}} object in the \code{\link{prepare_mofa}} step (see example), i.e. after creating a \code{\link{MOFA}} object (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) The data options are the following: \cr \itemize{ \item{\strong{scale_views}: logical indicating whether to scale views to have the same unit variance. As long as the scale differences between the views is not too high, this is not required. Default is FALSE.} \item{\strong{scale_groups}: logical indicating whether to scale groups to have the same unit variance. As long as the scale differences between the groups is not too high, this is not required. Default is FALSE.} \item{\strong{use_float32}: logical indicating whether use float32 instead of float64 arrays to increase speed and memory usage. Default is FALSE.} } } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data dt (in data.frame format) load(file) # Create the MOFA object MOFAmodel <- create_mofa(dt) # Load default data options data_opts <- get_default_data_options(MOFAmodel) # Edit some of the data options data_opts$scale_views <- TRUE # Prepare the MOFA object MOFAmodel <- prepare_mofa(MOFAmodel, data_options = data_opts) } ================================================ FILE: man/get_default_mefisto_options.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{get_default_mefisto_options} \alias{get_default_mefisto_options} \title{Get default options for MEFISTO covariates} \usage{ get_default_mefisto_options(object) } \arguments{ \item{object}{an untrained \code{\link{MOFA}} object} } \value{ Returns a list with default options for the MEFISTO covariate(s) functionality. } \description{ Function to obtain the default options for the usage of MEFISTO covariates with MEFISTO } \details{ The options are the following: \cr \itemize{ \item{\strong{scale_cov}:} logical: Scale covariates? \item{\strong{start_opt}:} integer: First iteration to start the optimisation of GP hyperparameters \item{\strong{n_grid}:} integer: Number of points for the grid search in the optimisation of GP hyperparameters \item{\strong{opt_freq}:} integer: Frequency of optimisation of GP hyperparameters \item{\strong{sparseGP}:} logical: Use sparse GPs to speed up the optimisation of the GP parameters? \item{\strong{frac_inducing}:} numeric between 0 and 1: Fraction of samples to use as inducing points (only relevant if sparseGP is \code{TRUE}) \item{\strong{warping}:} logical: Activate warping functionality to align covariates between groups (requires a multi-group design) \item{\strong{warping_freq}:} numeric: frequency of the warping (only relevant if warping is \code{TRUE}) \item{\strong{warping_ref}:} A character specifying the reference group for warping (only relevant if warping is \code{TRUE}) \item{\strong{warping_open_begin}:} logical: Warping: Allow for open beginning? (only relevant warping is \code{TRUE}) \item{\strong{warping_open_end}:} logical: Warping: Allow for open end? (only relevant warping is \code{TRUE}) \item{\strong{warping_groups}:} Assignment of groups to classes used for alignment (advanced option). Needs to be a vector of length number of samples, e.g. a column of samples_metadata, which needs to have the same value within each group. By default groups are used specified in `create_mofa`. \item{\strong{model_groups}:} logical: Model covariance structure across groups (for more than one group, otherwise FALSE)? If FALSE, we assume the same patterns in all groups. \item{\strong{new_values}:} Values for which to predict the factor values (for interpolation / extrapolation). This should be numeric matrix in the same format with covariate(s) in rows and new values in columns. Default is NULL, leading to no interpolation. } } \examples{ # generate example data dd <- make_example_data(sample_cov = seq(0,1,length.out = 200), n_samples = 200, n_factors = 4, n_features = 200, n_views = 4, lscales = c(0.5, 0.2, 0, 0)) # input data data <- dd$data # covariate matrix with samples in columns time <- dd$sample_cov rownames(time) <- "time" # create mofa and set covariates sm <- create_mofa(data = dd$data) sm <- set_covariates(sm, covariates = time) MEFISTO_opt <- get_default_mefisto_options(sm) } ================================================ FILE: man/get_default_model_options.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/prepare_mofa.R \name{get_default_model_options} \alias{get_default_model_options} \title{Get default model options} \usage{ get_default_model_options(object) } \arguments{ \item{object}{an untrained \code{\link{MOFA}} object} } \value{ Returns a list with the default model options. } \description{ Function to obtain the default model options. } \details{ This function provides a default set of model options that can be modified and passed to the \code{\link{MOFA}} object in the \code{\link{prepare_mofa}} step (see example), i.e. after creating a \code{\link{MOFA}} object (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) The model options are the following: \cr \itemize{ \item{\strong{likelihoods}: character vector with data likelihoods per view: 'gaussian' for continuous data (Default for all views), 'bernoulli' for binary data and 'poisson' for count data.} \item{\strong{num_factors}: numeric value indicating the (initial) number of factors. Default is 15.} \item{\strong{spikeslab_factors}: logical indicating whether to use spike and slab sparsity on the factors (Default is FALSE)} \item{\strong{spikeslab_weights}: logical indicating whether to use spike and slab sparsity on the weights (Default is TRUE)} \item{\strong{ard_factors}: logical indicating whether to use ARD sparsity on the factors (Default is TRUE only if using multiple groups)} \item{\strong{ard_weights}: logical indicating whether to use ARD sparsity on the weights (Default is TRUE)} } } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data dt (in data.frame format) load(file) # Create the MOFA object MOFAmodel <- create_mofa(dt) # Load default model options model_opts <- get_default_model_options(MOFAmodel) # Edit some of the model options model_opts$num_factors <- 10 model_opts$spikeslab_weights <- FALSE # Prepare the MOFA object MOFAmodel <- prepare_mofa(MOFAmodel, model_options = model_opts) } ================================================ FILE: man/get_default_stochastic_options.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/prepare_mofa.R \name{get_default_stochastic_options} \alias{get_default_stochastic_options} \title{Get default stochastic options} \usage{ get_default_stochastic_options(object) } \arguments{ \item{object}{an untrained \code{\link{MOFA}}} } \value{ Returns a list with default options } \description{ Function to obtain the default options for stochastic variational inference. } \details{ This function provides a default set of stochastic inference options that can be modified and passed to the \code{\link{MOFA}} object in the \code{\link{prepare_mofa}} step), i.e. after creating a \code{\link{MOFA}} object (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) These options are only relevant when activating stochastic inference in training_options (see example). The stochastic inference options are the following: \cr \itemize{ \item{\strong{batch_size}: numeric value indicating the batch size (as a fraction)}. Default is 0.5 (half of the data set). \item{\strong{learning_rate}: numeric value indicating the learning rate. } Default is 1.0 \item{\strong{forgetting_rate}: numeric indicating the forgetting rate.} Default is 0.5 \item{\strong{start_stochastic}: integer indicating the first iteration to start stochastic inference} Default is 1 } } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data dt (in data.frame format) load(file) # Create the MOFA object MOFAmodel <- create_mofa(dt) # activate stochastic inference in training options train_opts <- get_default_training_options(MOFAmodel) train_opts$stochastic <- TRUE # Load default stochastic options stochastic_opts <- get_default_stochastic_options(MOFAmodel) # Edit some of the stochastic options stochastic_opts$learning_rate <- 0.75 stochastic_opts$batch_size <- 0.25 # Prepare the MOFA object MOFAmodel <- prepare_mofa(MOFAmodel, training_options = train_opts, stochastic_options = stochastic_opts ) } ================================================ FILE: man/get_default_training_options.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/prepare_mofa.R \name{get_default_training_options} \alias{get_default_training_options} \title{Get default training options} \usage{ get_default_training_options(object) } \arguments{ \item{object}{an untrained \code{\link{MOFA}}} } \value{ Returns a list with default training options } \description{ Function to obtain the default training options. } \details{ This function provides a default set of training options that can be modified and passed to the \code{\link{MOFA}} object in the \code{\link{prepare_mofa}} step (see example), i.e. after creating a \code{\link{MOFA}} object (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}) The training options are the following: \cr \itemize{ \item{\strong{maxiter}: numeric value indicating the maximum number of iterations. Default is 1000. Convergence is assessed using the ELBO statistic.} \item{\strong{drop_factor_threshold}: numeric indicating the threshold on fraction of variance explained to consider a factor inactive and drop it from the model. For example, a value of 0.01 implies that factors explaining less than 1\% of variance (in each view) will be dropped. Default is -1 (no dropping of factors)} \item{\strong{convergence_mode}: character indicating the convergence criteria, either "fast", "medium" or "slow", corresponding to 0.0005\%, 0.00005\% or 0.000005\% deltaELBO change. } \item{\strong{verbose}: logical indicating whether to generate a verbose output.} \item{\strong{startELBO}: integer indicating the first iteration to compute the ELBO (default is 1). } \item{\strong{freqELBO}: integer indicating the first iteration to compute the ELBO (default is 1). } \item{\strong{stochastic}: logical indicating whether to use stochastic variational inference (only required for very big data sets, default is \code{FALSE}).} \item{\strong{gpu_mode}: logical indicating whether to use GPUs (see details).} \item{\strong{gpu_device}: integer indicating which GPU to use.} \item{\strong{seed}: numeric indicating the seed for reproducibility (default is 42).} } } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data dt (in data.frame format) load(file) # Create the MOFA object MOFAmodel <- create_mofa(dt) # Load default training options train_opts <- get_default_training_options(MOFAmodel) # Edit some of the training options train_opts$convergence_mode <- "medium" train_opts$startELBO <- 100 train_opts$seed <- 42 # Prepare the MOFA object MOFAmodel <- prepare_mofa(MOFAmodel, training_options = train_opts) } ================================================ FILE: man/get_dimensions.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_dimensions} \alias{get_dimensions} \title{Get dimensions} \usage{ get_dimensions(object) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} } \value{ list containing the dimensionalities of the model } \description{ Extract dimensionalities from the model. } \details{ K indicates the number of factors, M indicates the number of views, D indicates the number of features (per view), N indicates the number of samples (per group) and C indicates the number of covariates. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) dims <- get_dimensions(model) } ================================================ FILE: man/get_elbo.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_elbo} \alias{get_elbo} \title{Get ELBO} \usage{ get_elbo(object) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} } \value{ Value of the ELBO } \description{ Extract the value of the ELBO statistics after model training. This can be useful for model selection. } \details{ This can be useful for model selection. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) elbo <- get_elbo(model) } ================================================ FILE: man/get_expectations.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_expectations} \alias{get_expectations} \title{Get expectations} \usage{ get_expectations(object, variable, as.data.frame = FALSE) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{variable}{variable name: 'Z' for factors and 'W' for weights.} \item{as.data.frame}{logical indicating whether to output the result as a long data frame, default is \code{FALSE}.} } \value{ the output varies depending on the variable of interest: \cr \itemize{ \item{\strong{"Z"}: a matrix with dimensions (samples,factors). If \code{as.data.frame} is \code{TRUE}, a long-formatted data frame with columns (sample,factor,value)} \item{\strong{"W"}: a list of length (views) where each element is a matrix with dimensions (features,factors). If \code{as.data.frame} is \code{TRUE}, a long-formatted data frame with columns (view,feature,factor,value)} } } \description{ Function to extract the expectations from the (variational) posterior distributions of a trained \code{\link{MOFA}} object. } \details{ Technical note: MOFA is a Bayesian model where each variable has a prior distribution and a posterior distribution. In particular, to achieve scalability we used the variational inference framework, thus true posterior distributions are replaced by approximated variational distributions. This function extracts the expectations of the variational distributions, which can be used as final point estimates to analyse the results of the model. \cr The priors and variational distributions of each variable are extensively described in the supplementary methods of the original paper. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) factors <- get_expectations(model, "Z") weights <- get_expectations(model, "W") } ================================================ FILE: man/get_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_factors} \alias{get_factors} \title{Get factors} \usage{ get_factors( object, groups = "all", factors = "all", scale = FALSE, as.data.frame = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{groups}{character vector with the group name(s), or numeric vector with the group index(es). Default is "all".} \item{factors}{character vector with the factor name(s), or numeric vector with the factor index(es). Default is "all".} \item{scale}{logical indicating whether to scale factor values.} \item{as.data.frame}{logical indicating whether to return a long data frame instead of a matrix. Default is \code{FALSE}.} } \value{ By default it returns the latent factor matrix of dimensionality (N,K), where N is number of samples and K is number of factors. \cr Alternatively, if \code{as.data.frame} is \code{TRUE}, returns a long-formatted data frame with columns (sample,factor,value). } \description{ Extract the latent factors from the model. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Fetch factors in matrix format (a list, one matrix per group) factors <- get_factors(model) # Concatenate groups factors <- do.call("rbind",factors) # Fetch factors in data.frame format instead of matrix format factors <- get_factors(model, as.data.frame = TRUE) } ================================================ FILE: man/get_group_kernel.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_group_kernel} \alias{get_group_kernel} \title{Get group covariance matrix} \usage{ get_group_kernel(object) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} } \value{ A list of group-group correlation matrices per factor } \description{ Extract the inferred group-group covariance matrix per factor } \details{ This can be used only if covariates are passed to the MOFAobject upon creation and GP_factors is set to True. } ================================================ FILE: man/get_imputed_data.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_imputed_data} \alias{get_imputed_data} \title{Get imputed data} \usage{ get_imputed_data( object, views = "all", groups = "all", features = "all", as.data.frame = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{views}{character vector with the view name(s), or numeric vector with the view index(es). Default is "all".} \item{groups}{character vector with the group name(s), or numeric vector with the group index(es). Default is "all".} \item{features}{list of character vectors with the feature names or list of numeric vectors with the feature indices. Default is "all".} \item{as.data.frame}{logical indicating whether to return a long-formatted data frame instead of a list of matrices. Default is \code{FALSE}.} } \value{ A list containing the imputed valued or a data.frame if as.data.frame is TRUE } \description{ Function to get the imputed data. It requires the previous use of the \code{\link{impute}} method. } \details{ Data is imputed from the generative model of MOFA. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) model <- impute(model) imputed <- get_imputed_data(model) } ================================================ FILE: man/get_interpolated_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_interpolated_factors} \alias{get_interpolated_factors} \title{Get interpolated factor values} \usage{ get_interpolated_factors(object, as.data.frame = FALSE, only_mean = FALSE) } \arguments{ \item{object}{a \code{\link{MOFA}} object} \item{as.data.frame}{logical indicating whether to return data as a data.frame} \item{only_mean}{logical indicating whether include only mean or also uncertainties} } \value{ By default, a nested list containing for each group a list with a matrix with the interpolated factor values ("mean"), their variance ("variance") and the values of the covariate at which interpolation took place ("new_values"). Alternatively, if \code{as.data.frame} is \code{TRUE}, returns a long-formatted data frame with columns containing the covariates and (factor, group, mean and variance). } \description{ Extract the interpolated factor values } \details{ This can be used only if covariates are passed to the object upon creation, GP_factors is set to True and new covariates were passed for interpolation. } ================================================ FILE: man/get_lengthscales.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_lengthscales} \alias{get_lengthscales} \title{Get lengthscales} \usage{ get_lengthscales(object) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} } \value{ A numeric vector containing the lengthscale for each factor. } \description{ Extract the inferred lengthscale for each factor after model training. } \details{ This can be used only if covariates are passed to the MOFAobject upon creation and GP_factors is set to True. } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) ls <- get_lengthscales(model) } ================================================ FILE: man/get_scales.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_scales} \alias{get_scales} \title{Get scales} \usage{ get_scales(object) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} } \value{ A numeric vector containing the scale for each factor. } \description{ Extract the inferred scale for each factor after model training. } \details{ This can be used only if covariates are passed to the MOFAobject upon creation and GP_factors is set to True. } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) s <- get_scales(model) } ================================================ FILE: man/get_variance_explained.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_variance_explained} \alias{get_variance_explained} \title{Get variance explained values} \usage{ get_variance_explained( object, groups = "all", views = "all", factors = "all", as.data.frame = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{groups}{character vector with the group name(s), or numeric vector with the group index(es). Default is "all".} \item{views}{character vector with the view name(s), or numeric vector with the view index(es). Default is "all".} \item{factors}{character vector with the factor name(s), or numeric vector with the factor index(es). Default is "all".} \item{as.data.frame}{logical indicating whether to return a long data frame instead of a matrix. Default is \code{FALSE}.} } \value{ A list of data matrices with variance explained per group or a \code{data.frame} (if \code{as.data.frame} is TRUE) } \description{ Extract the latent factors from the model. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Fetch variance explained values (in matrix format) r2 <- get_variance_explained(model) # Fetch variance explained values (in data.frame format) r2 <- get_variance_explained(model, as.data.frame = TRUE) } ================================================ FILE: man/get_weights.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_methods.R \name{get_weights} \alias{get_weights} \title{Get weights} \usage{ get_weights( object, views = "all", factors = "all", abs = FALSE, scale = FALSE, as.data.frame = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{views}{character vector with the view name(s), or numeric vector with the view index(es). Default is "all".} \item{factors}{character vector with the factor name(s) or numeric vector with the factor index(es). \cr Default is "all".} \item{abs}{logical indicating whether to take the absolute value of the weights.} \item{scale}{logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if \code{abs=TRUE}).} \item{as.data.frame}{logical indicating whether to return a long data frame instead of a list of matrices. Default is \code{FALSE}.} } \value{ By default it returns a list where each element is a loading matrix with dimensionality (D,K), where D is the number of features and K is the number of factors. \cr Alternatively, if \code{as.data.frame} is \code{TRUE}, returns a long-formatted data frame with columns (view,feature,factor,value). } \description{ Extract the weights from the model. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Fetch weights in matrix format (a list, one matrix per view) weights <- get_weights(model) # Fetch weights for factor 1 and 2 and view 1 weights <- get_weights(model, views = 1, factors = c(1,2)) # Fetch weights in data.frame format weights <- get_weights(model, as.data.frame = TRUE) } ================================================ FILE: man/groups_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{groups_names} \alias{groups_names} \alias{groups_names<-} \alias{groups_names,MOFA-method} \alias{groups_names<-,MOFA,character-method} \title{groups_names: set and retrieve group names} \usage{ groups_names(object) groups_names(object) <- value \S4method{groups_names}{MOFA}(object) \S4method{groups_names}{MOFA,character}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{character vector with the names for each group} } \value{ character vector with the names for each sample group } \description{ groups_names: set and retrieve group names } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) groups_names(model) groups_names(model) <- c("my_group") } ================================================ FILE: man/impute.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/impute.R \name{impute} \alias{impute} \title{Impute missing values from a fitted MOFA} \usage{ impute( object, views = "all", groups = "all", factors = "all", add_intercept = TRUE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{views}{character vector with the view name(s), or numeric vector with view index(es).} \item{groups}{character vector with the group name(s), or numeric vector with group index(es).} \item{factors}{character vector with the factor names, or numeric vector with the factor index(es).} \item{add_intercept}{add feature intercepts to the imputation (default is TRUE).} } \value{ This method fills the \code{imputed_data} slot by replacing the missing values in the input data with the model predictions. } \description{ This function uses the latent factors and the loadings to impute missing values. } \details{ MOFA generates a denoised and condensed low-dimensional representation of the data that captures the main sources of heterogeneity of the data. This representation can be used to reconstruct the data, simply using the equation \code{Y = WX}. For more details read the supplementary methods of the manuscript. \cr Note that with \code{\link{impute}} you can only generate the point estimates (the means of the posterior distributions). If you want to add uncertainty estimates (the variance) you need to set \code{impute=TRUE} in the training options. See \code{\link{get_default_training_options}}. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Impute missing values in all data modalities imputed_data <- impute(model, views = "all") # Impute missing values in all data modalities using factors 1:3 imputed_data <- impute(model, views = "all", factors = 1:3) } ================================================ FILE: man/interpolate_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{interpolate_factors} \alias{interpolate_factors} \title{Interpolate factors in MEFISTO based on new covariate values} \usage{ interpolate_factors(object, new_values) } \arguments{ \item{object}{a \code{\link{MOFA}} object trained with MEFISTO options and a covariate} \item{new_values}{a matrix containing the new covariate values to inter/extrapolate to. Should be in the same format as the covariates used for training.} } \value{ Returns the \code{\link{MOFA}} with interpolated factor values filled in the corresponding slot (interpolatedZ) } \description{ Function to interpolate factors in MEFISTO based on new covariate values. } \details{ This function requires the functional MEFISTO framework to be used in training. Use \code{set_covariates} and specify mefisto_options when preparing the training using \code{prepare_mofa}. Currently, only the mean of the interpolation is provided from R. } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) model <- interpolate_factors(model, new_values = seq(0,1.1,0.01)) } ================================================ FILE: man/load_model.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/load_model.R \name{load_model} \alias{load_model} \title{Load a trained MOFA} \usage{ load_model( file, sort_factors = TRUE, on_disk = FALSE, load_data = TRUE, remove_outliers = FALSE, remove_inactive_factors = TRUE, verbose = FALSE, load_interpol_Z = FALSE ) } \arguments{ \item{file}{an hdf5 file saved by the mofa Python framework} \item{sort_factors}{logical indicating whether factors should be sorted by variance explained (default is TRUE)} \item{on_disk}{logical indicating whether to work from memory (FALSE) or disk (TRUE). \cr This should be set to TRUE when the training data is so big that cannot fit into memory. \cr On-disk operations are performed using the \code{\link{HDF5Array}} and \code{\link{DelayedArray}} framework.} \item{load_data}{logical indicating whether to load the training data (default is TRUE, it can be memory expensive)} \item{remove_outliers}{logical indicating whether to mask outlier values.} \item{remove_inactive_factors}{logical indicating whether to remove inactive factors from the model.} \item{verbose}{logical indicating whether to print verbose output (default is FALSE)} \item{load_interpol_Z}{(MEFISTO) logical indicating whether to load predictions for factor values based on latent processed (only relevant for models trained with covariates and Gaussian processes, where prediction was enabled)} } \value{ a \code{\link{MOFA}} model } \description{ Method to load a trained MOFA \cr The training of mofa is done using a Python framework, and the model output is saved as an .hdf5 file, which has to be loaded in the R package. } \examples{ #' # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) } ================================================ FILE: man/make_example_data.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/make_example_data.R \name{make_example_data} \alias{make_example_data} \title{Simulate a data set using the generative model of MOFA} \usage{ make_example_data( n_views = 3, n_features = 100, n_samples = 50, n_groups = 1, n_factors = 5, likelihood = "gaussian", lscales = 1, sample_cov = NULL, as.data.frame = FALSE ) } \arguments{ \item{n_views}{number of views} \item{n_features}{number of features in each view} \item{n_samples}{number of samples in each group} \item{n_groups}{number of groups} \item{n_factors}{number of factors} \item{likelihood}{likelihood for each view, one of "gaussian" (default), "bernoulli", "poisson", or a character vector of length n_views} \item{lscales}{vector of lengthscales, needs to be of length n_factors (default is 0 - no smooth factors)} \item{sample_cov}{(only for use with MEFISTO) matrix of sample covariates for one group with covariates in rows and samples in columns or "equidistant" for sequential ordering, default is NULL (no smooth factors)} \item{as.data.frame}{return data and covariates as long dataframe} } \value{ Returns a list containing the simulated data and simulation parameters. } \description{ Function to simulate an example multi-view multi-group data set according to the generative model of MOFA2. } \examples{ # Generate a simulated data set MOFAexample <- make_example_data() } ================================================ FILE: man/pipe.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/imports.R \name{\%>\%} \alias{\%>\%} \title{Re-exporting the pipe operator See \code{magrittr::\link[magrittr]{\%>\%}} for details.} \usage{ lhs \%>\% rhs } \arguments{ \item{lhs}{see \code{magrittr::\link[magrittr]{\%>\%}}} \item{rhs}{see \code{magrittr::\link[magrittr]{\%>\%}}} } \value{ depending on lhs and rhs } \description{ Re-exporting the pipe operator See \code{magrittr::\link[magrittr]{\%>\%}} for details. } ================================================ FILE: man/plot_alignment.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_alignment} \alias{plot_alignment} \title{Plot covariate alignment across groups} \usage{ plot_alignment(object) } \arguments{ \item{object}{a \code{\link{MOFA}} object using MEFISTO with warping} } \value{ ggplot object showing the alignment } \description{ Function to plot the alignment learnt by MEFISTO for the covariate values between different groups } \details{ This function requires the functional MEFISTO framework to be used in training. Use \code{set_covariates} and specify mefisto_options when preparing the training using \code{prepare_mofa}. } ================================================ FILE: man/plot_ascii_data.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_data.R \name{plot_ascii_data} \alias{plot_ascii_data} \title{Visualize the structure of the data in the terminal} \usage{ plot_ascii_data(object, nonzero = FALSE) } \arguments{ \item{object}{a \code{\link{MOFA}} object} \item{nonzero}{a logical value specifying whether to calculate the fraction of non-zero values (non-NA values by default)} } \value{ None } \description{ A Fancy printing method } \details{ This function is helpful to get an overview of the structure of the data as a text output } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_ascii_data(model) } ================================================ FILE: man/plot_data_heatmap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_data.R \name{plot_data_heatmap} \alias{plot_data_heatmap} \title{Plot heatmap of relevant features} \usage{ plot_data_heatmap( object, factor, view = 1, groups = "all", features = 50, annotation_features = NULL, annotation_samples = NULL, transpose = FALSE, imputed = FALSE, denoise = FALSE, max.value = NULL, min.value = NULL, ... ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{factor}{a string with the factor name, or an integer with the index of the factor.} \item{view}{a string with the view name, or an integer with the index of the view. Default is the first view.} \item{groups}{groups to plot. Default is "all".} \item{features}{if an integer (default), the total number of features to plot based on the absolute value of the weights. If a character vector, a set of manually defined features.} \item{annotation_features}{annotation metadata for features (rows). Either a character vector specifying columns in the feature metadata, or a data.frame that will be passed to \code{\link[pheatmap]{pheatmap}} as \code{annotation_col}} \item{annotation_samples}{annotation metadata for samples (columns). Either a character vector specifying columns in the sample metadata, or a data.frame that will be passed to \code{\link[pheatmap]{pheatmap}} as \code{annotation_row}} \item{transpose}{logical indicating whether to transpose the heatmap. Default corresponds to features as rows and samples as columns.} \item{imputed}{logical indicating whether to plot the imputed data instead of the original data. Default is FALSE.} \item{denoise}{logical indicating whether to plot a denoised version of the data reconstructed using the MOFA factors.} \item{max.value}{numeric indicating the maximum value to display in the heatmap (i.e. the matrix values will be capped at \code{max.value} ).} \item{min.value}{numeric indicating the minimum value to display in the heatmap (i.e. the matrix values will be capped at \code{min.value} ). See \code{\link{predict}}. Default is FALSE.} \item{...}{further arguments that can be passed to \code{\link[pheatmap]{pheatmap}}} } \value{ A \code{\link[pheatmap]{pheatmap}} object } \description{ Function to plot a heatmap of the data for relevant features, typically the ones with high weights. } \details{ One of the first steps for the annotation of a given factor is to visualise the corresponding weights, using for example \code{\link{plot_weights}} or \code{\link{plot_top_weights}}. \cr However, one might also be interested in visualising the direct relationship between features and factors, rather than looking at "abstract" weights. \cr This function generates a heatmap for selected features, which should reveal the underlying pattern that is captured by the latent factor. \cr A similar function for doing scatterplots rather than heatmaps is \code{\link{plot_data_scatter}}. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_data_heatmap(model, factor = 1, show_rownames = FALSE, show_colnames = FALSE) } ================================================ FILE: man/plot_data_overview.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_data.R \name{plot_data_overview} \alias{plot_data_overview} \title{Overview of the input data} \usage{ plot_data_overview( object, covariate = 1, colors = NULL, show_covariate = FALSE, show_dimensions = TRUE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{covariate}{(only for MEFISTO) specifies sample covariate to order samples by in the plot. This should be a character or a numeric index giving the name or position of a column present in the covariates slot of the object. Default is the first sample covariate in covariates slot. \code{NULL} does not order by covariate} \item{colors}{a vector specifying the colors per view (see example for details).} \item{show_covariate}{(only for MEFISTO) boolean specifying whether to include the covariate in the plot} \item{show_dimensions}{logical indicating whether to plot the dimensions of the data (default is TRUE).} } \value{ A \code{\link{ggplot}} object } \description{ Function to do a tile plot showing the missing value structure of the input data } \details{ This function is helpful to get an overview of the structure of the data. It shows the model dimensionalities (number of samples, groups, views and features) and it indicates which measurements are missing. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_data_overview(model) } ================================================ FILE: man/plot_data_scatter.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_data.R \name{plot_data_scatter} \alias{plot_data_scatter} \title{Scatterplots of feature values against latent factors} \usage{ plot_data_scatter( object, factor = 1, view = 1, groups = "all", features = 10, sign = "all", color_by = "group", legend = TRUE, alpha = 1, shape_by = NULL, stroke = NULL, dot_size = 2.5, text_size = NULL, add_lm = TRUE, lm_per_group = TRUE, imputed = FALSE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{factor}{string with the factor name, or an integer with the index of the factor.} \item{view}{string with the view name, or an integer with the index of the view. Default is the first view.} \item{groups}{groups to plot. Default is "all".} \item{features}{if an integer (default), the total number of features to plot. If a character vector, a set of manually-defined features.} \item{sign}{can be 'positive', 'negative' or 'all' (default) to show only positive, negative or all weights, respectively.} \item{color_by}{specifies groups or values (either discrete or continuous) used to color the dots (samples). This can be either: \itemize{ \item the string "group": dots are coloured with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the same of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. \item a dataframe with two columns: "sample" and "color" }} \item{legend}{logical indicating whether to add a legend} \item{alpha}{numeric indicating dot transparency (default is 1).} \item{shape_by}{specifies groups or values (only discrete) used to shape the dots (samples). This can be either: \itemize{ \item the string "group": dots are shaped with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the same of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. \item a dataframe with two columns: "sample" and "shape" }} \item{stroke}{numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically).} \item{dot_size}{numeric indicating dot size (default is 5).} \item{text_size}{numeric indicating text size (default is 5).} \item{add_lm}{logical indicating whether to add a linear regression line for each plot} \item{lm_per_group}{logical indicating whether to add a linear regression line separately for each group} \item{imputed}{logical indicating whether to include imputed measurements} } \value{ A \code{\link{ggplot}} object } \description{ Function to do a scatterplot of features against factor values. } \details{ One of the first steps for the annotation of factors is to visualise the weights using \code{\link{plot_weights}} or \code{\link{plot_top_weights}}. However, one might also be interested in visualising the direct relationship between features and factors, rather than looking at "abstract" weights. \cr A similar function for doing heatmaps rather than scatterplots is \code{\link{plot_data_heatmap}}. } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_data_scatter(model) } ================================================ FILE: man/plot_data_vs_cov.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_data_vs_cov} \alias{plot_data_vs_cov} \title{Scatterplots of feature values against sample covariates} \usage{ plot_data_vs_cov( object, covariate = 1, warped = TRUE, factor = 1, view = 1, groups = "all", features = 10, sign = "all", color_by = "group", legend = TRUE, alpha = 1, shape_by = NULL, stroke = NULL, dot_size = 2.5, text_size = NULL, add_lm = FALSE, lm_per_group = FALSE, imputed = FALSE, return_data = FALSE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object using MEFISTO.} \item{covariate}{string with the covariate name or a samples_metadata column, or an integer with the index of the covariate} \item{warped}{logical indicating whether to show the aligned covariate (default: TRUE), only relevant if warping has been used to align multiple sample groups} \item{factor}{string with the factor name, or an integer with the index of the factor to take top features from} \item{view}{string with the view name, or an integer with the index of the view. Default is the first view.} \item{groups}{groups to plot. Default is "all".} \item{features}{if an integer (default), the total number of features to plot (given by highest weights). If a character vector, a set of manually-defined features.} \item{sign}{can be 'positive', 'negative' or 'all' (default) to show only features with highest positive, negative or all weights, respectively.} \item{color_by}{specifies groups or values (either discrete or continuous) used to color the dots (samples). This can be either: \itemize{ \item the string "group": dots are coloured with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the same of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. \item a dataframe with two columns: "sample" and "color" }} \item{legend}{logical indicating whether to add a legend} \item{alpha}{numeric indicating dot transparency (default is 1).} \item{shape_by}{specifies groups or values (only discrete) used to shape the dots (samples). This can be either: \itemize{ \item the string "group": dots are shaped with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the same of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. \item a dataframe with two columns: "sample" and "shape" }} \item{stroke}{numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically).} \item{dot_size}{numeric indicating dot size (default is 5).} \item{text_size}{numeric indicating text size (default is 5).} \item{add_lm}{logical indicating whether to add a linear regression line for each plot} \item{lm_per_group}{logical indicating whether to add a linear regression line separately for each group} \item{imputed}{logical indicating whether to include imputed measurements} \item{return_data}{logical indicating whether to return a data frame instead of a plot} } \value{ Returns a \code{ggplot2} object or the underlying dataframe if return_data is set to \code{TRUE}. } \description{ Function to do a scatterplot of features against sample covariate values. } \details{ One of the first steps for the annotation of factors is to visualise the weights using \code{\link{plot_weights}} or \code{\link{plot_top_weights}} and inspect the relationship of the factor to the covariate(s) using \code{\link{plot_factors_vs_cov}}. However, one might also be interested in visualising the direct relationship between features and covariate(s), rather than looking at "abstract" weights and possibly look at the interpolated and extrapolated values by setting imputed to True. } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) plot_data_vs_cov(model, factor = 3, features = 2) } ================================================ FILE: man/plot_dimred.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimensionality_reduction.R \name{plot_dimred} \alias{plot_dimred} \title{Plot dimensionality reduction based on MOFA factors} \usage{ plot_dimred( object, method = c("UMAP", "TSNE"), groups = "all", show_missing = TRUE, color_by = NULL, shape_by = NULL, color_name = NULL, shape_name = NULL, label = FALSE, dot_size = 1.5, stroke = NULL, alpha_missing = 1, legend = TRUE, rasterize = FALSE, return_data = FALSE, ... ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{method}{string indicating which method has been used for non-linear dimensionality reduction (either 'umap' or 'tsne')} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups.} \item{show_missing}{logical indicating whether to include samples for which \code{shape_by} or \code{color_by} is missing} \item{color_by}{specifies groups or values used to color the samples. This can be either: (1) a character giving the name of a feature present in the training data. (2) a character giving the same of a column present in the sample metadata. (3) a vector of the same length as the number of samples specifying discrete groups or continuous numeric values.} \item{shape_by}{specifies groups or values used to shape the samples. This can be either: (1) a character giving the name of a feature present in the training data, (2) a character giving the same of a column present in the sample metadata. (3) a vector of the same length as the number of samples specifying discrete groups.} \item{color_name}{name for color legend.} \item{shape_name}{name for shape legend.} \item{label}{logical indicating whether to label the medians of the clusters. Only if color_by is specified} \item{dot_size}{numeric indicating dot size.} \item{stroke}{numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically).} \item{alpha_missing}{numeric indicating dot transparency of missing data.} \item{legend}{logical indicating whether to add legend.} \item{rasterize}{logical indicating whether to rasterize plot using \code{\link[ggrastr]{geom_point_rast}}} \item{return_data}{logical indicating whether to return the long data frame to plot instead of plotting} \item{...}{extra arguments passed to \code{\link{run_umap}} or \code{\link{run_tsne}}.} } \value{ Returns a \code{ggplot2} object or a long data.frame (if return_data is TRUE) } \description{ Plot dimensionality reduction based on MOFA factors } \details{ This function plots dimensionality reduction projections that are stored in the \code{dim_red} slot. Typically this contains UMAP or t-SNE projections computed using \code{\link{run_tsne}} or \code{\link{run_umap}}, respectively. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Run UMAP model <- run_umap(model) # Plot UMAP plot_dimred(model, method = "UMAP") # Plot UMAP, colour by Factor 1 values plot_dimred(model, method = "UMAP", color_by = "Factor1") # Plot UMAP, colour by the values of a specific feature plot_dimred(model, method = "UMAP", color_by = "feature_0_view_0") } ================================================ FILE: man/plot_enrichment.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/enrichment.R \name{plot_enrichment} \alias{plot_enrichment} \title{Plot output of gene set Enrichment Analysis} \usage{ plot_enrichment( enrichment.results, factor, alpha = 0.1, max.pathways = 25, text_size = 1, dot_size = 5 ) } \arguments{ \item{enrichment.results}{output of \link{run_enrichment} function} \item{factor}{a string with the factor name or an integer with the factor index} \item{alpha}{p.value threshold to filter out gene sets} \item{max.pathways}{maximum number of enriched pathways to display} \item{text_size}{text size} \item{dot_size}{dot size} } \value{ a \code{ggplot2} object } \description{ Method to plot the results of the gene set Enrichment Analysis } \details{ it requires \code{\link{run_enrichment}} to be run beforehand. } ================================================ FILE: man/plot_enrichment_detailed.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/enrichment.R \name{plot_enrichment_detailed} \alias{plot_enrichment_detailed} \title{Plot detailed output of the Feature Set Enrichment Analysis} \usage{ plot_enrichment_detailed( enrichment.results, factor, alpha = 0.1, max.genes = 5, max.pathways = 10, text_size = 3 ) } \arguments{ \item{enrichment.results}{output of \link{run_enrichment} function} \item{factor}{string with factor name or numeric with factor index} \item{alpha}{p.value threshold to filter out feature sets} \item{max.genes}{maximum number of genes to display, per pathway} \item{max.pathways}{maximum number of enriched pathways to display} \item{text_size}{size of the text to label the top genes} } \value{ a \code{ggplot2} object } \description{ Method to plot a detailed output of the Feature Set Enrichment Analysis (FSEA). \cr Each row corresponds to a significant pathway, sorted by statistical significance, and each dot corresponds to a gene. \cr For each pathway, we display the top genes of the pathway sorted by the corresponding feature statistic (by default, the absolute value of the weight) The top genes with the highest statistic (max.genes argument) are displayed and labelled in black. The remaining genes are colored in grey. } ================================================ FILE: man/plot_enrichment_heatmap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/enrichment.R \name{plot_enrichment_heatmap} \alias{plot_enrichment_heatmap} \title{Heatmap of Feature Set Enrichment Analysis results} \usage{ plot_enrichment_heatmap( enrichment.results, alpha = 0.1, cap = 1e-50, log_scale = TRUE, ... ) } \arguments{ \item{enrichment.results}{output of \link{run_enrichment} function} \item{alpha}{FDR threshold to filter out unsignificant feature sets which are not represented in the heatmap. Default is 0.10.} \item{cap}{cap p-values below this threshold} \item{log_scale}{logical indicating whether to plot the -log of the p.values.} \item{...}{extra arguments to be passed to the \link{pheatmap} function} } \value{ produces a heatmap } \description{ This method generates a heatmap with the adjusted p.values that result from the the feature set enrichment analysis. Rows are feature sets and columns are factors. } ================================================ FILE: man/plot_factor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_factors.R \name{plot_factor} \alias{plot_factor} \title{Beeswarm plot of factor values} \usage{ plot_factor( object, factors = 1, groups = "all", group_by = "group", color_by = "group", shape_by = NULL, add_dots = TRUE, dot_size = 2, dot_alpha = 1, add_violin = FALSE, violin_alpha = 0.5, color_violin = TRUE, add_boxplot = FALSE, boxplot_alpha = 0.5, color_boxplot = TRUE, show_missing = TRUE, scale = FALSE, dodge = FALSE, color_name = "", shape_name = "", stroke = NULL, legend = TRUE, rasterize = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{factors}{character vector with the factor names, or numeric vector with the indices of the factors to use, or "all" to plot all factors.} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups.} \item{group_by}{specifies grouping of samples: \itemize{ \item (default) the string "group": in this case, the plot will color samples with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the name of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. }} \item{color_by}{specifies color of samples. This can be either: \itemize{ \item (default) the string "group": in this case, the plot will color the dots with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the name of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. }} \item{shape_by}{specifies shape of samples. This can be either: \itemize{ \item (default) the string "group": in this case, the plot will shape the dots with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the name of a column in the sample metadata slot \item a vector of the same length as the number of samples specifying the value for each sample. }} \item{add_dots}{logical indicating whether to add dots.} \item{dot_size}{numeric indicating dot size.} \item{dot_alpha}{numeric indicating dot transparency.} \item{add_violin}{logical indicating whether to add violin plots} \item{violin_alpha}{numeric indicating violin plot transparency.} \item{color_violin}{logical indicating whether to color violin plots.} \item{add_boxplot}{logical indicating whether to add box plots} \item{boxplot_alpha}{numeric indicating boxplot transparency.} \item{color_boxplot}{logical indicating whether to color box plots.} \item{show_missing}{logical indicating whether to remove samples for which \code{shape_by} or \code{color_by} is missing.} \item{scale}{logical indicating whether to scale factor values.} \item{dodge}{logical indicating whether to dodge the dots (default is FALSE).} \item{color_name}{name for color legend (usually only used if color_by is not a character itself).} \item{shape_name}{name for shape legend (usually only used if shape_by is not a character itself).} \item{stroke}{numeric indicating the stroke size (the black border around the dots).} \item{legend}{logical indicating whether to add a legend to the plot (default is TRUE).} \item{rasterize}{logical indicating whether to rasterize the plot (default is FALSE).} } \value{ Returns a \code{ggplot2} } \description{ Beeswarm plot of the latent factor values. } \details{ One of the main steps for the annotation of factors is to visualise and color them using known covariates or phenotypic data. \cr This function generates a Beeswarm plot of the sample values in a given latent factor. \cr Similar functions are \code{\link{plot_factors}} for doing scatter plots. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Plot Factors 1 and 2 and colour by "group" plot_factor(model, factors = c(1,2), color_by="group") # Plot Factor 3 and colour by the value of a specific feature plot_factor(model, factors = 3, color_by="feature_981_view_1") # Add violin plots plot_factor(model, factors = c(1,2), color_by="group", add_violin = TRUE) # Scale factor values from -1 to 1 plot_factor(model, factors = c(1,2), scale = TRUE) } ================================================ FILE: man/plot_factor_cor.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_factors.R \name{plot_factor_cor} \alias{plot_factor_cor} \title{Plot correlation matrix between latent factors} \usage{ plot_factor_cor(object, method = "pearson", ...) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{method}{a character indicating the type of correlation coefficient to be computed: pearson (default), kendall, or spearman.} \item{...}{arguments passed to \code{\link[corrplot]{corrplot}}} } \value{ Returns a symmetric matrix with the correlation coefficient between every pair of factors. } \description{ Function to plot the correlation matrix between the latent factors. } \details{ This method plots the correlation matrix between the latent factors. \cr The model encourages the factors to be uncorrelated, so this function usually yields a diagonal correlation matrix. \cr However, it is not a hard constraint such as in Principal Component Analysis and correlations between factors can occur, particularly with large number factors. \cr Generally, correlated factors are redundant and should be avoided, as they make interpretation harder. Therefore, if you have too many correlated factors we suggest you try reducing the number of factors. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Plot correlation between all factors plot_factor_cor(model) } ================================================ FILE: man/plot_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_factors.R \name{plot_factors} \alias{plot_factors} \title{Scatterplots of two factor values} \usage{ plot_factors( object, factors = c(1, 2), groups = "all", show_missing = TRUE, scale = FALSE, color_by = NULL, shape_by = NULL, color_name = NULL, shape_name = NULL, dot_size = 2, alpha = 1, legend = TRUE, stroke = NULL, return_data = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{factors}{a vector of length two with the factors to plot. Factors can be specified either as a characters} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups.} \item{show_missing}{logical indicating whether to include samples for which \code{shape_by} or \code{color_by} is missing} \item{scale}{logical indicating whether to scale factor values.} \item{color_by}{specifies groups or values used to color the samples. This can be either: (1) a character giving the name of a feature present in the training data. (2) a character giving the name of a column present in the sample metadata. (3) a vector of the same length as the number of samples specifying discrete groups or continuous numeric values.} \item{shape_by}{specifies groups or values used to shape the samples. This can be either: (1) a character giving the name of a feature present in the training data, (2) a character giving the name of a column present in the sample metadata. (3) a vector of the same length as the number of samples specifying discrete groups.} \item{color_name}{name for color legend.} \item{shape_name}{name for shape legend.} \item{dot_size}{numeric indicating dot size (default is 2).} \item{alpha}{numeric indicating dot transparency (default is 1).} \item{legend}{logical indicating whether to add legend.} \item{stroke}{numeric indicating the stroke size (the black border around the dots, default is NULL, inferred automatically).} \item{return_data}{logical indicating whether to return the data frame to plot instead of plotting} } \value{ Returns a \code{ggplot2} object } \description{ Scatterplot of the values of two latent factors. } \details{ One of the first steps for the annotation of factors is to visualise and group/color them using known covariates such as phenotypic or clinical data. This method generates a single scatterplot for the combination of two latent factors. TO-FINISH... \code{\link{plot_factors}} for doing Beeswarm plots for factors. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Scatterplot of factors 1 and 2 plot_factors(model, factors = c(1,2)) # Shape dots by a column in the metadata plot_factors(model, factors = c(1,2), shape_by="group") # Scale factor values from -1 to 1 plot_factors(model, factors = c(1,2), scale = TRUE) } ================================================ FILE: man/plot_factors_vs_cov.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_factors_vs_cov} \alias{plot_factors_vs_cov} \title{Scatterplots of a factor's values against the sample covariates} \usage{ plot_factors_vs_cov( object, factors = "all", covariates = NULL, warped = TRUE, show_missing = TRUE, scale = FALSE, color_by = NULL, shape_by = NULL, color_name = NULL, shape_name = NULL, dot_size = 1.5, alpha = 1, stroke = NULL, legend = TRUE, rotate_x = FALSE, rotate_y = FALSE, return_data = FALSE, show_variance = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object using MEFISTO.} \item{factors}{character or numeric specifying the factor(s) to plot, default is "all"} \item{covariates}{specifies sample covariate(s) to plot against: (1) a character giving the name of a column present in the sample covariates or sample metadata. (2) a character giving the name of a feature present in the training data. (3) a vector of the same length as the number of samples specifying continuous numeric values per sample. Default is the first sample covariates in covariates slot} \item{warped}{logical indicating whether to show the aligned covariate (default: TRUE), only relevant if warping has been used to align multiple sample groups} \item{show_missing}{(for 1-dim covariates) logical indicating whether to include samples for which \code{shape_by} or \code{color_by} is missing} \item{scale}{logical indicating whether to scale factor values.} \item{color_by}{(for 1-dim covariates) specifies groups or values used to color the samples. This can be either: (1) a character giving the name of a feature present in the training data. (2) a character giving the same of a column present in the sample metadata. (3) a vector of the same length as the number of samples specifying discrete groups or continuous numeric values.} \item{shape_by}{(for 1-dim covariates) specifies groups or values used to shape the samples. This can be either: (1) a character giving the name of a feature present in the training data, (2) a character giving the same of a column present in the sample metadata. (3) a vector of the same length as the number of samples specifying discrete groups.} \item{color_name}{(for 1-dim covariates) name for color legend.} \item{shape_name}{(for 1-dim covariates) name for shape legend.} \item{dot_size}{(for 1-dim covariates) numeric indicating dot size.} \item{alpha}{(for 1-dim covariates) numeric indicating dot transparency.} \item{stroke}{(for 1-dim covariates) numeric indicating the stroke size} \item{legend}{(for 1-dim covariates) logical indicating whether to add legend.} \item{rotate_x}{(for spatial, 2-dim covariates) Rotate covariate on x-axis} \item{rotate_y}{(for spatial, 2-dim covariates) Rotate covariate on y-axis} \item{return_data}{logical indicating whether to return the data frame to plot instead of plotting} \item{show_variance}{(for 1-dim covariates) logical indicating whether to show the marginal variance of inferred factor values (only relevant for 1-dimensional covariates)} } \value{ Returns a \code{ggplot2} object } \description{ Scatterplots of a factor's values against the sample covariates } \details{ To investigate the factors pattern along the covariates (such as time or a spatial coordinate) this function an be used to plot a scatterplot of the factor against the values of each covariate } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) plot_factors_vs_cov(model) } ================================================ FILE: man/plot_group_kernel.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_group_kernel} \alias{plot_group_kernel} \title{Heatmap plot showing the group-group correlations per factor} \usage{ plot_group_kernel(object, factors = "all", groups = "all", ...) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object using MEFISTO.} \item{factors}{character vector with the factors names, or numeric vector indicating the indices of the factors to use} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups.} \item{...}{additional parameters that can be passed to \code{pheatmap}} } \value{ Returns a \code{ggplot,gg} object containing the heatmaps } \description{ Heatmap plot showing the group-group correlations inferred by the model per factor } \details{ The heatmap gives insight into the clustering of the patterns that factors display along the covariate in each group. A correlation of 1 indicates that the module captured by a factor shows identical patterns across groups, a correlation of zero that it shows distinct patterns, a negative correlation that the patterns go in opposite directions. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) plot_group_kernel(model) } ================================================ FILE: man/plot_interpolation_vs_covariate.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_interpolation_vs_covariate} \alias{plot_interpolation_vs_covariate} \title{Plot interpolated factors versus covariate (1-dimensional)} \usage{ plot_interpolation_vs_covariate( object, covariate = 1, factors = "all", only_mean = TRUE, show_observed = TRUE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object using MEFISTO.} \item{covariate}{covariate to use for plotting} \item{factors}{character or numeric specifying the factor(s) to plot, default is "all"} \item{only_mean}{show only mean or include uncertainties?} \item{show_observed}{include observed factor values as dots on the plot} } \value{ Returns a \code{ggplot2} object } \description{ make a plot of interpolated covariates versus covariate } \details{ to be filled } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) model <- interpolate_factors(model, new_values = seq(0,1.1,0.1)) plot_interpolation_vs_covariate(model, covariate = "time", factors = 1) } ================================================ FILE: man/plot_sharedness.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_sharedness} \alias{plot_sharedness} \title{Barplot showing the sharedness per factor} \usage{ plot_sharedness(object, factors = "all", color = "#B8CF87") } \arguments{ \item{object}{a trained \code{\link{MOFA}} object using MEFISTO.} \item{factors}{character vector with the factors names, or numeric vector indicating the indices of the factors to use} \item{color}{for the shared part of the bar} } \value{ Returns a \code{ggplot2} object } \description{ Barplot indicating a sharedness score (between 0 (non-shared) and 1 (shared)) per factor } \details{ The sharedness score is calculated as the distance of the learnt group correlation matrix to the identity matrix in terms of the mean absolute distance on the off-diagonal elements. } ================================================ FILE: man/plot_smoothness.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_smoothness} \alias{plot_smoothness} \title{Barplot showing the smoothness per factor} \usage{ plot_smoothness(object, factors = "all", color = "cadetblue") } \arguments{ \item{object}{a trained \code{\link{MOFA}} object using MEFISTO.} \item{factors}{character vector with the factors names, or numeric vector indicating the indices of the factors to use} \item{color}{for the smooth part of the bar} } \value{ Returns a \code{ggplot2} object } \description{ Barplot indicating a smoothness score (between 0 (non-smooth) and 1 (smooth)) per factor } \details{ The smoothness score is given by the scale parameter for the underlying Gaussian process of each factor. } \examples{ # Using an existing trained model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) smoothness_bars <- plot_smoothness(model) } ================================================ FILE: man/plot_top_weights.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_weights.R \name{plot_top_weights} \alias{plot_top_weights} \title{Plot top weights} \usage{ plot_top_weights( object, view = 1, factors = 1, nfeatures = 10, abs = TRUE, scale = TRUE, sign = "all" ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{view}{a string with the view name, or an integer with the index of the view.} \item{factors}{a character string with factors names, or an integer vector with factors indices.} \item{nfeatures}{number of top features to display. Default is 10} \item{abs}{logical indicating whether to use the absolute value of the weights (Default is FALSE).} \item{scale}{logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if abs=TRUE). Default is TRUE.} \item{sign}{can be 'positive', 'negative' or 'all' to show only positive, negative or all weights, respectively. Default is 'all'.} } \value{ Returns a \code{ggplot2} object } \description{ Plot top weights for a given factor and view. } \details{ An important step to annotate factors is to visualise the corresponding feature weights. \cr This function displays the top features with highest loading whereas the function \code{\link{plot_weights}} plots all weights for a given latent factor and view. \cr Importantly, the weights of the features within a view have relative values and they should not be interpreted in an absolute scale. Therefore, for interpretability purposes we always recommend to scale the weights with \code{scale=TRUE}. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Plot top weights for Factors 1 and 2 and View 1 plot_top_weights(model, view = 1, factors = c(1,2)) # Do not take absolute value plot_weights(model, abs = FALSE) } ================================================ FILE: man/plot_variance_explained.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculate_variance_explained.R \name{plot_variance_explained} \alias{plot_variance_explained} \title{Plot variance explained by the model} \usage{ plot_variance_explained( object, x = "view", y = "factor", split_by = NA, plot_total = FALSE, factors = "all", min_r2 = 0, max_r2 = NULL, legend = TRUE, use_cache = TRUE, ... ) } \arguments{ \item{object}{a \code{\link{MOFA}} object} \item{x}{character specifying the dimension for the x-axis ("view", "factor", or "group").} \item{y}{character specifying the dimension for the y-axis ("view", "factor", or "group").} \item{split_by}{character specifying the dimension to be faceted ("view", "factor", or "group").} \item{plot_total}{logical value to indicate if to plot the total variance explained (for the variable in the x-axis)} \item{factors}{character vector with a factor name(s), or numeric vector with the index(es) of the factor(s). Default is "all".} \item{min_r2}{minimum variance explained for the color scheme (default is 0).} \item{max_r2}{maximum variance explained for the color scheme.} \item{legend}{logical indicating whether to add a legend to the plot (default is TRUE).} \item{use_cache}{logical indicating whether to use cache (default is TRUE)} \item{...}{extra arguments to be passed to \code{\link{calculate_variance_explained}}} } \value{ A list of \code{\link{ggplot}} objects (if \code{plot_total} is TRUE) or a single \code{\link{ggplot}} object } \description{ plots the variance explained by the MOFA factors across different views and groups, as specified by the user. Consider using cowplot::plot_grid(plotlist = ...) to combine the multiple plots that this function generates. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Calculate variance explained (R2) r2 <- calculate_variance_explained(model) # Plot variance explained values (view as x-axis, and factor as y-axis) plot_variance_explained(model, x="view", y="factor") # Plot variance explained values (view as x-axis, and group as y-axis) plot_variance_explained(model, x="view", y="group") # Plot variance explained values for factors 1 to 3 plot_variance_explained(model, x="view", y="group", factors=1:3) # Scale R2 values plot_variance_explained(model, max_r2=0.25) } ================================================ FILE: man/plot_variance_explained_by_covariates.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{plot_variance_explained_by_covariates} \alias{plot_variance_explained_by_covariates} \title{Plot variance explained by the smooth components of the model} \usage{ plot_variance_explained_by_covariates( object, factors = "all", x = "view", y = "factor", split_by = NA, min_r2 = 0, max_r2 = NULL, compare_total = FALSE, legend = TRUE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object} \item{factors}{character vector with a factor name(s), or numeric vector with the index(es) of the factor(s). Default is "all".} \item{x}{character specifying the dimension for the x-axis ("view", "factor", or "group").} \item{y}{character specifying the dimension for the y-axis ("view", "factor", or "group").} \item{split_by}{character specifying the dimension to be faceted ("view", "factor", or "group").} \item{min_r2}{minimum variance explained for the color scheme (default is 0).} \item{max_r2}{maximum variance explained for the color scheme.} \item{compare_total}{plot corresponding variance explained in total in addition} \item{legend}{logical indicating whether to add a legend to the plot (default is TRUE).} } \value{ A list of \code{\link{ggplot}} objects (if \code{compare_total} is TRUE) or a single \code{\link{ggplot}} object. Consider using cowplot::plot_grid(plotlist = ...) to combine the multiple plots that this function generates. } \description{ This function plots the variance explained by the smooth components (Gaussian processes) underlying the factors in MEFISTO across different views and groups, as specified by the user. } \details{ Note that this function requires the use of MEFISTO. To activate the functional MEFISTO framework, specify mefisto_options when preparing the training using \code{prepare_mofa} } \examples{ # load_model file <- system.file("extdata", "MEFISTO_model.hdf5", package = "MOFA2") model <- load_model(file) plot_variance_explained_by_covariates(model) # compare to total variance explained plist <- plot_variance_explained_by_covariates(model, compare_total = TRUE) cowplot::plot_grid(plotlist = plist) } ================================================ FILE: man/plot_variance_explained_per_feature.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/calculate_variance_explained.R \name{plot_variance_explained_per_feature} \alias{plot_variance_explained_per_feature} \title{Plot variance explained by the model for a set of features} \usage{ plot_variance_explained_per_feature( object, view, features = 10, split_by_factor = FALSE, group_features_by = NULL, groups = "all", factors = "all", min_r2 = 0, max_r2 = NULL, legend = TRUE, return_data = FALSE, ... ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{view}{a view name or index.} \item{features}{a vector with indices or names for features from the respective view, or number of top features to be fetched by their loadings across specified factors. "all" to plot all features.} \item{split_by_factor}{logical indicating whether to split R2 per factor or plot R2 jointly} \item{group_features_by}{column name of features metadata to group features by} \item{groups}{a vector with indices or names for sample groups (default is all)} \item{factors}{a vector with indices or names for factors (default is all)} \item{min_r2}{minimum variance explained for the color scheme (default is 0).} \item{max_r2}{maximum variance explained for the color scheme.} \item{legend}{logical indicating whether to add a legend to the plot (default is TRUE).} \item{return_data}{logical indicating whether to return the data frame to plot instead of plotting} \item{...}{extra arguments to be passed to \code{\link{calculate_variance_explained}}} } \value{ ggplot object } \description{ Returns a tile plot with a group on the X axis and a feature along the Y axis } \examples{ # Using an existing trained model file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_variance_explained_per_feature(model, view = 1) } ================================================ FILE: man/plot_weights.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_weights.R \name{plot_weights} \alias{plot_weights} \title{Plot distribution of feature weights (weights)} \usage{ plot_weights( object, view = 1, factors = 1, nfeatures = 10, color_by = NULL, shape_by = NULL, abs = FALSE, manual = NULL, color_manual = NULL, scale = TRUE, dot_size = 1, text_size = 5, legend = TRUE, return_data = FALSE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{view}{a string with the view name, or an integer with the index of the view.} \item{factors}{character vector with the factor name(s), or numeric vector with the index of the factor(s).} \item{nfeatures}{number of top features to label.} \item{color_by}{specifies groups or values (either discrete or continuous) used to color the dots (features). This can be either: \itemize{ \item (default) the string "group": in this case, the plot will color the dots with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the same of a column in the features metadata slot \item a vector of the same length as the number of features specifying the value for each feature \item a dataframe with two columns: "feature" and "color" }} \item{shape_by}{specifies groups or values (only discrete) used to shape the dots (features). This can be either: \itemize{ \item (default) the string "group": in this case, the plot will shape the dots with respect to their predefined groups. \item a character giving the name of a feature that is present in the input data \item a character giving the same of a column in the features metadata slot \item a vector of the same length as the number of features specifying the value for each feature \item a dataframe with two columns: "feature" and "shape" }} \item{abs}{logical indicating whether to take the absolute value of the weights.} \item{manual}{A nested list of character vectors with features to be manually labelled (see the example for details).} \item{color_manual}{a character vector with colors, one for each element of 'manual'} \item{scale}{logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if abs=TRUE).} \item{dot_size}{numeric indicating the dot size.} \item{text_size}{numeric indicating the text size.} \item{legend}{logical indicating whether to add legend.} \item{return_data}{logical indicating whether to return the data frame to plot instead of plotting} } \value{ A \code{\link{ggplot}} object or a \code{data.frame} if return_data is TRUE } \description{ An important step to annotate factors is to visualise the corresponding feature weights. \cr This function plots all weights for a given latent factor and view, labeling the top ones. \cr In contrast, the function \code{\link{plot_top_weights}} displays only the top features with highest loading. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Plot distribution of weights for Factor 1 and View 1 plot_weights(model, view = 1, factors = 1) # Plot distribution of weights for Factors 1 to 3 and View 1 plot_weights(model, view = 1, factors = 1:3) # Take the absolute value and highlight the top 10 features plot_weights(model, view = 1, factors = 1, nfeatures = 10, abs = TRUE) # Change size of dots and text plot_weights(model, view = 1, factors = 1, text_size = 5, dot_size = 1) } ================================================ FILE: man/plot_weights_heatmap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_weights.R \name{plot_weights_heatmap} \alias{plot_weights_heatmap} \title{Plot heatmap of the weights} \usage{ plot_weights_heatmap( object, view = 1, features = "all", factors = "all", threshold = 0, ... ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{view}{character vector with the view name(s), or numeric vector with the index of the view(s) to use. Default is the first view.} \item{features}{character vector with the feature name(s), or numeric vector with the index of the feature(s) to use. Default is 'all'.} \item{factors}{character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. Default is 'all'.} \item{threshold}{threshold on absolute weight values, so that weights with a magnitude below this threshold (in all factors) are removed} \item{...}{extra arguments passed to \code{\link[pheatmap]{pheatmap}}.} } \value{ A \code{\link{pheatmap}} object } \description{ Function to visualize the weights for a given set of factors in a given view. \cr This is useful to visualize the overall pattern of the weights but not to individually characterise the factors. \cr To inspect the weights of individual factors, use the functions \code{\link{plot_weights}} and \code{\link{plot_top_weights}} } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_weights_heatmap(model) } ================================================ FILE: man/plot_weights_scatter.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_weights.R \name{plot_weights_scatter} \alias{plot_weights_scatter} \title{Scatterplots of weights} \usage{ plot_weights_scatter( object, factors, view = 1, color_by = NULL, shape_by = NULL, dot_size = 1, name_color = "", name_shape = "", show_missing = TRUE, abs = FALSE, scale = TRUE, legend = TRUE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{factors}{a vector of length two with the factors to plot. Factors can be specified either as a characters using the factor names, or as numeric with the index of the factors} \item{view}{character vector with the view name, or numeric vector with the index of the view to use. Default is the first view.} \item{color_by}{specifies groups or values used to color the features. This can be either \itemize{ \item a character giving the same of a column in the feature metadata slot \item a vector specifying the value for each feature. \item a dataframe with two columns: "feature" and "color" }} \item{shape_by}{specifies groups or values used to shape the features. This can be either \itemize{ \item a character giving the same of a column in the feature metadata slot \item a vector specifying the value for each feature. \item a dataframe with two columns: "feature" and "shape" }} \item{dot_size}{numeric indicating dot size.} \item{name_color}{name for color legend (usually only used if color_by is not a character itself)} \item{name_shape}{name for shape legend (usually only used if shape_by is not a character itself)} \item{show_missing}{logical indicating whether to include dots for which \code{shape_by} or \code{color_by} is missing} \item{abs}{logical indicating whether to take the absolute value of the weights.} \item{scale}{logical indicating whether to scale all weights from -1 to 1 (or from 0 to 1 if \code{abs=TRUE}).} \item{legend}{logical indicating whether to add a legend to the plot (default is TRUE).} } \value{ Returns a \code{ggplot2} object } \description{ Scatterplot of the weights values for two factors } \details{ One of the first steps for the annotation of factors is to visualise and group/color them using known covariates such as phenotypic or clinical data. This method generates a single scatterplot for the combination of two latent factors. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) plot_weights_scatter(model, factors = 1:2) } ================================================ FILE: man/predict.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/predict.R \name{predict} \alias{predict} \title{Do predictions using a fitted MOFA} \usage{ predict( object, views = "all", groups = "all", factors = "all", add_intercept = TRUE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{views}{character vector with the view name(s), or numeric vector with the view index(es). Default is "all".} \item{groups}{character vector with the group name(s), or numeric vector with the group index(es). Default is "all".} \item{factors}{character vector with the factor name(s) or numeric vector with the factor index(es). Default is "all".} \item{add_intercept}{add feature intercepts to the prediction (default is TRUE).} } \value{ Returns a list with the data reconstructed by the model predictions. } \description{ This function uses the latent factors and the weights to do data predictions. } \details{ MOFA generates a denoised and condensed low-dimensional representation of the data that captures the main sources of heterogeneity of the data. This representation can be used to reconstruct a denoised representation of the data, simply using the equation \code{Y = WX}. For more mathematical details read the supplementary methods of the manuscript. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Predict observations for all data modalities predictions <- predict(model) } ================================================ FILE: man/prepare_mofa.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/prepare_mofa.R \name{prepare_mofa} \alias{prepare_mofa} \title{Prepare a MOFA for training} \usage{ prepare_mofa( object, data_options = NULL, model_options = NULL, training_options = NULL, stochastic_options = NULL, mefisto_options = NULL ) } \arguments{ \item{object}{an untrained \code{\link{MOFA}}} \item{data_options}{list of data_options (see \code{\link{get_default_data_options}} details). If NULL, default options are used.} \item{model_options}{list of model options (see \code{\link{get_default_model_options}} for details). If NULL, default options are used.} \item{training_options}{list of training options (see \code{\link{get_default_training_options}} for details). If NULL, default options are used.} \item{stochastic_options}{list of options for stochastic variational inference (see \code{\link{get_default_stochastic_options}} for details). If NULL, default options are used.} \item{mefisto_options}{list of options for mefisto (see \code{\link{get_default_mefisto_options}} for details). If NULL, default options are used.} } \value{ Returns an untrained \code{\link{MOFA}} with specified options filled in the corresponding slots } \description{ Function to prepare a \code{\link{MOFA}} object for training. It requires defining data, model and training options. } \details{ This function is called after creating a \code{\link{MOFA}} object (using \code{\link{create_mofa}}) and before starting the training (using \code{\link{run_mofa}}). Here, we can specify different options for the data (data_options), the model (model_options) and the training (training_options, stochastic_options). Take a look at the individual default options for an overview using the get_default_XXX_options functions above. } \examples{ # Using an existing simulated data with two groups and two views file <- system.file("extdata", "test_data.RData", package = "MOFA2") # Load data dt (in data.frame format) load(file) # Create the MOFA object MOFAmodel <- create_mofa(dt) # Prepare MOFA object using default options MOFAmodel <- prepare_mofa(MOFAmodel) # Prepare MOFA object changing some of the default model options values model_opts <- get_default_model_options(MOFAmodel) model_opts$num_factors <- 10 MOFAmodel <- prepare_mofa(MOFAmodel, model_options = model_opts) } ================================================ FILE: man/run_enrichment.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/enrichment.R \name{run_enrichment} \alias{run_enrichment} \title{Run feature set Enrichment Analysis} \usage{ run_enrichment( object, view, feature.sets, factors = "all", set.statistic = c("mean.diff", "rank.sum"), statistical.test = c("parametric", "cor.adj.parametric", "permutation"), sign = c("all", "positive", "negative"), min.size = 10, nperm = 1000, p.adj.method = "BH", alpha = 0.1, verbose = TRUE ) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{view}{a character with the view name, or a numeric vector with the index of the view to use.} \item{feature.sets}{data structure that holds feature set membership information. Must be a binary membership matrix (rows are feature sets and columns are features). See details below for some pre-built gene set matrices.} \item{factors}{character vector with the factor names, or numeric vector with the index of the factors for which to perform the enrichment.} \item{set.statistic}{the set statistic computed from the feature statistics. Must be one of the following: "mean.diff" (default) or "rank.sum".} \item{statistical.test}{the statistical test used to compute the significance of the feature set statistics under a competitive null hypothesis. Must be one of the following: "parametric" (default), "cor.adj.parametric", "permutation".} \item{sign}{use only "positive" or "negative" weights. Default is "all".} \item{min.size}{Minimum size of a feature set (default is 10).} \item{nperm}{number of permutations. Only relevant if statistical.test is set to "permutation". Default is 1000} \item{p.adj.method}{Method to adjust p-values factor-wise for multiple testing. Can be any method in p.adjust.methods(). Default uses Benjamini-Hochberg procedure.} \item{alpha}{FDR threshold to generate lists of significant pathways. Default is 0.1} \item{verbose}{boolean indicating whether to print messages on progress} } \value{ a list with five elements: \item{\strong{pval}:}{ matrices with nominal p-values. } \item{\strong{pval.adj}:}{ matrices with FDR-adjusted p-values. } \item{\strong{feature.statistics}:}{ matrices with the local (feature-wise) statistics. } \item{\strong{set.statistics}:}{ matrices with the global (gene set-wise) statistics. } \item{\strong{sigPathways}}{ list with significant pathways per factor. } } \description{ Method to perform feature set enrichment analysis. Here we use a slightly modified version of the \code{\link[PCGSE]{pcgse}} function. } \details{ The aim of this function is to relate each factor to pre-defined biological pathways by performing a gene set enrichment analysis on the feature weights. \cr This function is particularly useful when a factor is difficult to characterise based only on the genes with the highest weight. \cr We provide a few pre-built gene set matrices in the MOFAdata package. See \code{https://github.com/bioFAM/MOFAdata} for details. \cr The function we implemented is based on the \code{\link[PCGSE]{pcgse}} function with some modifications. Please read this paper https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4543476 for details on the math. } ================================================ FILE: man/run_mofa.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/run_mofa.R \name{run_mofa} \alias{run_mofa} \title{Train a MOFA model} \usage{ run_mofa(object, outfile = NULL, save_data = TRUE, use_basilisk = FALSE) } \arguments{ \item{object}{an untrained \code{\link{MOFA}} object} \item{outfile}{output file for the model (.hdf5 format). If \code{NULL}, a temporary file is created.} \item{save_data}{logical indicating whether to save the training data in the hdf5 file. This is useful for some downstream analysis (mainly functions with the prefix \code{plot_data}), but it can take a lot of disk space.} \item{use_basilisk}{use \code{basilisk} to automatically install a conda environment with mofapy2 and all dependencies? If \code{FALSE} (default), you should specify the right python binary when loading R with \code{reticulate::use_python(..., force=TRUE)} or the right conda environment with \code{reticulate::use_condaenv(..., force=TRUE)}.} } \value{ a trained \code{\link{MOFA}} object } \description{ Function to train an untrained \code{\link{MOFA}} object. } \details{ This function is called once a MOFA object has been prepared (using \code{\link{prepare_mofa}}) In this step the R package calls the \code{mofapy2} Python package, where model training is performed. \cr The interface with Python is done with the \code{\link{reticulate}} package. If you have several versions of Python installed and R is not detecting the correct one, you can change it using \code{reticulate::use_python} when loading the R session. Alternatively, you can let us install mofapy2 for you using \code{basilisk} if you set use_basilisk to \code{TRUE} } \examples{ # Load data (in data.frame format) file <- system.file("extdata", "test_data.RData", package = "MOFA2") load(file) # Create the MOFA object MOFAmodel <- create_mofa(dt) # Prepare the MOFA object with default options MOFAmodel <- prepare_mofa(MOFAmodel) # Run the MOFA model \dontrun{ MOFAmodel <- run_mofa(MOFAmodel, use_basilisk = TRUE) } } ================================================ FILE: man/run_tsne.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimensionality_reduction.R \name{run_tsne} \alias{run_tsne} \title{Run t-SNE on the MOFA factors} \usage{ run_tsne(object, factors = "all", groups = "all", ...) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{factors}{character vector with the factor names, or numeric vector with the indices of the factors to use, or "all" to use all factors (default).} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use all groups (default).} \item{...}{arguments passed to \code{\link{Rtsne}}} } \value{ Returns a \code{\link{MOFA}} object with the \code{MOFAobject@dim_red} slot filled with the t-SNE output } \description{ Run t-SNE on the MOFA factors } \details{ This function calls \code{\link[Rtsne]{Rtsne}} to calculate a TSNE representation from the MOFA factors. Subsequently, you can plot the TSNE representation with \code{\link{plot_dimred}} or fetch the coordinates using \code{plot_dimred(..., method="TSNE", return_data=TRUE)}. Remember to use set.seed before the function call to get reproducible results. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Run \dontrun{ model <- run_tsne(model, perplexity = 15) } # Plot \dontrun{ model <- plot_dimred(model, method="TSNE") } # Fetch data \dontrun{ tsne.df <- plot_dimred(model, method="TSNE", return_data=TRUE) } } ================================================ FILE: man/run_umap.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/dimensionality_reduction.R \name{run_umap} \alias{run_umap} \title{Run UMAP on the MOFA factors} \usage{ run_umap( object, factors = "all", groups = "all", n_neighbors = 30, min_dist = 0.3, metric = "cosine", ... ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{factors}{character vector with the factor names, or numeric vector with the indices of the factors to use, or "all" to use all factors (default).} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use all groups (default).} \item{n_neighbors}{number of neighbouring points used in local approximations of manifold structure. Larger values will result in more global structure being preserved at the loss of detailed local structure. In general this parameter should often be in the range 5 to 50.} \item{min_dist}{This controls how tightly the embedding is allowed compress points together. Larger values ensure embedded points are more evenly distributed, while smaller values allow the algorithm to optimise more accurately with regard to local structure. Sensible values are in the range 0.01 to 0.5} \item{metric}{choice of metric used to measure distance in the input space} \item{...}{arguments passed to \code{\link[uwot]{umap}}} } \value{ Returns a \code{\link{MOFA}} object with the \code{MOFAobject@dim_red} slot filled with the UMAP output } \description{ Run UMAP on the MOFA factors } \details{ This function calls \code{\link[uwot]{umap}} to calculate a UMAP representation from the MOFA factors For details on the hyperparameters of UMAP see the documentation of \code{\link[uwot]{umap}}. Subsequently, you can plot the UMAP representation with \code{\link{plot_dimred}} or fetch the coordinates using \code{plot_dimred(..., method="UMAP", return_data=TRUE)}. Remember to use set.seed before the function call to get reproducible results. } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Change hyperparameters passed to umap \dontrun{ model <- run_umap(model, min_dist = 0.01, n_neighbors = 10) } # Plot \dontrun{ model <- plot_dimred(model, method="UMAP") } # Fetch data \dontrun{ umap.df <- plot_dimred(model, method="UMAP", return_data=TRUE) } } ================================================ FILE: man/samples_metadata.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{samples_metadata} \alias{samples_metadata} \alias{samples_metadata<-} \alias{samples_metadata,MOFA-method} \alias{samples_metadata<-,MOFA,data.frame-method} \title{samples_metadata: retrieve sample metadata} \usage{ samples_metadata(object) samples_metadata(object) <- value \S4method{samples_metadata}{MOFA}(object) \S4method{samples_metadata}{MOFA,data.frame}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{data frame with sample metadata, it must at least contain the columns \code{sample} and \code{group}. The order of the rows must match the order of \code{samples_names(object)}} } \value{ a data frame with sample metadata } \description{ samples_metadata: retrieve sample metadata } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) samples_metadata(model) } ================================================ FILE: man/samples_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{samples_names} \alias{samples_names} \alias{samples_names<-} \alias{samples_names,MOFA-method} \alias{samples_names<-,MOFA,list-method} \title{samples_names: set and retrieve sample names} \usage{ samples_names(object) samples_names(object) <- value \S4method{samples_names}{MOFA}(object) \S4method{samples_names}{MOFA,list}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{list of character vectors with the sample names for every group} } \value{ list of character vectors with the sample names for each group } \description{ samples_names: set and retrieve sample names } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) samples_names(model) } ================================================ FILE: man/select_model.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/compare_models.R \name{select_model} \alias{select_model} \title{Select a model from a list of trained \code{\link{MOFA}} objects based on the best ELBO value} \usage{ select_model(models, plot = FALSE) } \arguments{ \item{models}{a list containing \code{\link{MOFA}} objects.} \item{plot}{boolean indicating whether to show a plot of the ELBO for each model instance} } \value{ A \code{\link{MOFA}} object } \description{ Different objects of \code{\link{MOFA}} are compared in terms of the final value of the ELBO statistics and the model with the highest ELBO value is selected. } ================================================ FILE: man/set_covariates.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/mefisto.R \name{set_covariates} \alias{set_covariates} \title{Add covariates to a MOFA model} \usage{ set_covariates(object, covariates) } \arguments{ \item{object}{an untrained \code{\link{MOFA}}} \item{covariates}{Sample-covariates to be passed to the model. This can be either: \itemize{ \item{a character, specifying columns already present in the samples_metadata of the object} \item{a data.frame with columns "sample", "covariate", "value". Sample names need to match those present in the data} \item{a matrix with samples in columns and covariate(s) in row(s)} } Note that the covariate should be numeric and continuous.} } \value{ Returns an untrained \code{\link{MOFA}} with covariates filled in the corresponding slots } \description{ Function to add continuous covariate(s) to a \code{\link{MOFA}} object for training with MEFISTO } \details{ To activate the functional MEFISTO framework, specify mefisto_options when preparing the training using \code{prepare_mofa} } \examples{ #' # Simulate data dd <- make_example_data(sample_cov = seq(0,1,length.out = 100), n_samples = 100, n_factors = 4) # Create MOFA object sm <- create_mofa(data = dd$data) # Add a covariate sm <- set_covariates(sm, covariates = dd$sample_cov) sm } ================================================ FILE: man/subset_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset.R \name{subset_factors} \alias{subset_factors} \title{Subset factors} \usage{ subset_factors(object, factors, recalculate_variance_explained = TRUE) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{factors}{character vector with the factor names, or numeric vector with the index of the factors.} \item{recalculate_variance_explained}{logical indicating whether to recalculate variance explained values. Default is \code{TRUE}.} } \value{ A \code{\link{MOFA}} object } \description{ Method to subset (or sort) factors } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Subset factors 1 to 3 model <- subset_factors(model, factors = 1:3) } ================================================ FILE: man/subset_features.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset.R \name{subset_features} \alias{subset_features} \title{Subset features} \usage{ subset_features(object, view, features) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{view}{character vector with the view name or integer with the view index} \item{features}{character vector with the sample names, numeric vector with the feature indices or logical vector with the samples to be kept as TRUE.} } \value{ A \code{\link{MOFA}} object } \description{ Method to subset (or sort) features } ================================================ FILE: man/subset_groups.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset.R \name{subset_groups} \alias{subset_groups} \title{Subset groups} \usage{ subset_groups(object, groups) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{groups}{character vector with the groups names, numeric vector with the groups indices or logical vector with the groups to be kept as TRUE.} } \value{ A \code{\link{MOFA}} object } \description{ Method to subset (or sort) groups } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Subset the first group model <- subset_groups(model, groups = 1) } ================================================ FILE: man/subset_samples.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset.R \name{subset_samples} \alias{subset_samples} \title{Subset samples} \usage{ subset_samples(object, samples) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{samples}{character vector with the sample names or numeric vector with the sample indices.} } \value{ A \code{\link{MOFA}} object } \description{ Method to subset (or sort) samples } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # (TO-DO) Remove a specific sample from the model (an outlier) } ================================================ FILE: man/subset_views.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/subset.R \name{subset_views} \alias{subset_views} \title{Subset views} \usage{ subset_views(object, views) } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{views}{character vector with the views names, numeric vector with the views indices, or logical vector with the views to be kept as TRUE.} } \value{ A \code{\link{MOFA}} object } \description{ Method to subset (or sort) views } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) # Subset the first view model <- subset_views(model, views = 1) } ================================================ FILE: man/summarise_factors.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/correlate_covariates.R \name{summarise_factors} \alias{summarise_factors} \title{Summarise factor values using external groups} \usage{ summarise_factors( object, df, factors = "all", groups = "all", abs = FALSE, return_data = FALSE ) } \arguments{ \item{object}{a trained \code{\link{MOFA}} object.} \item{df}{a data.frame with the columns "sample" and "level", where level is a factor with discrete group assignments for each sample.} \item{factors}{character vector with the factor name(s), or numeric vector with the index of the factor(s) to use. Default is 'all'.} \item{groups}{character vector with the groups names, or numeric vector with the indices of the groups of samples to use, or "all" to use samples from all groups.} \item{abs}{logical indicating whether to take the absolute value of the factors (default is \code{FALSE}).} \item{return_data}{logical indicating whether to return the fa instead of plotting} } \value{ A \code{\link{ggplot}} object or a \code{data.frame} if return_data is TRUE } \description{ Function to summarise factor values using a discrete grouping of samples. } ================================================ FILE: man/views_names.Rd ================================================ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/AllGenerics.R, R/set_methods.R \name{views_names} \alias{views_names} \alias{views_names<-} \alias{views_names,MOFA-method} \alias{views_names<-,MOFA,character-method} \title{views_names: set and retrieve view names} \usage{ views_names(object) views_names(object) <- value \S4method{views_names}{MOFA}(object) \S4method{views_names}{MOFA,character}(object) <- value } \arguments{ \item{object}{a \code{\link{MOFA}} object.} \item{value}{character vector with the names for each view} } \value{ character vector with the names for each view } \description{ views_names: set and retrieve view names } \examples{ # Using an existing trained model on simulated data file <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(file) views_names(model) views_names(model) <- c("viewA", "viewB") } ================================================ FILE: setup.py ================================================ import sys import os from setuptools import setup from setuptools import find_packages exec(open(os.path.join(os.path.dirname(__file__), 'mofapy2', 'version.py')).read()) def setup_package(): install_requires = ['pandas', 'scipy>=1.5.1', 'numpy', 'sklearn', 'argparse', 'h5py', 'dtw-python>=1.1.5'] metadata = dict( name = 'mofapy2', version = __version__, description = 'Multi-Omics Factor Analysis v2, a statistical framework for the integration of multi-group and multi-omics data', url = 'http://github.com/bioFAM/MOFA2', author = 'Ricard Argelaguet , Damien Arnol, Danila Bredikhin, Britta Velten ', license = 'LGPL-3.0', packages = find_packages(), install_requires = install_requires ) setup(**metadata) if __name__ == '__main__': if sys.version_info < (2,7): sys.exit('Sorry, Python < 2.7 is not supported') setup_package() ================================================ FILE: tests/testthat/barcodes.tsv ================================================ CACCGGGACGTGTA-1 CGTGTAGAGTTCAG-1 CCTGCAACACGTTG-1 CCGATAGACCTAAG-1 GATATAACACGCAT-1 TACTACTGATGTCG-1 AGCCTCACTGTCAG-1 CGGAATTGCACTAG-1 ACGTCCTGATAAGG-1 GAAACCTGGACTAC-1 TACATCACTGAACC-1 AAGTAACTCTGAAC-1 GCAAGACTACTGGT-1 ATTGATGAAGGTTC-1 TGAACCGAAAACGA-1 GAAGGTCTGAAAGT-1 TGATACCTCACTAG-1 CGCCATTGAGAGGC-1 CAGTTTACCCCAAA-1 TCCCACGATCATTC-1 CGATACGACAGGAG-1 AGCCAATGTATCTC-1 AAAGTTTGATCACG-1 TCAGAGACTCCAGA-1 ATGTTCACCGTAGT-1 AGCGCCGACAGAGG-1 CCCTCAGACACTTT-1 TTACACACGTGTTG-1 GTGTACGATCAGTG-1 CATCATACGGAGCA-1 GCTCAGCTGTCTAG-1 ACAAATTGATTCTC-1 AGTTAAACCACTTT-1 GGCGGACTCTGACA-1 CGGTAAACTCGCAA-1 GAATGCACCTTCGC-1 ATCCAGGACGCTAA-1 GTTCAACTTATGCG-1 TAGGAGCTTGCATG-1 CGTTAGGATCATTC-1 TGATACCTTATGCG-1 ATACGGACAGACTC-1 TCGGACCTATAAGG-1 GTTAGGTGCCAGTA-1 ACCAGTGAGGGATG-1 GTGATTCTGTCGAT-1 TTCAGTACTCCTAT-1 CTATTGTGGCAAGG-1 TACCATTGTGAGGG-1 CTCGCATGCTTAGG-1 GAAACAGACATTCT-1 TTAACCACTAAGGA-1 ATTGCACTGGAGCA-1 AAATGTTGAACGAA-1 TATCCAACCAGCTA-1 GGTATCGATGAACC-1 GAGCGCACGCGTAT-1 CCAACCTGTTCGCC-1 TTCAGTTGCCAAGT-1 CCTGACTGTGTCTT-1 GAACCAACCACAAC-1 ATGAGCACACAGCT-1 TGATTCTGCCGAAT-1 GAGATAGAAAAAGC-1 AATCTCTGCTTTAC-1 ACACAGACACCTGA-1 GTACCCTGTGAACC-1 CATGTTACCTGAGT-1 TCCACTCTGAGCTT-1 CAAGACTGACCTGA-1 AGCATCGAGTGAGG-1 CGTACCACGCTACA-1 CATTTGTGGGATCT-1 TCGACCTGCCGATA-1 ACGTGATGTAACCG-1 CACGGGACGTAGGG-1 CCCAGTTGGGTACT-1 ATTTCTCTCACTTT-1 TATGAATGTTTGCT-1 ATTCGACTGAATAG-1 TAGTCTTGGGACTT-1 ACTTAAGACCACAA-1 AGGAAATGAGGAGC-1 CGAGGGCTACGACT-1 GTTAGGTGCCCAAA-1 AACCTTACGCGAGA-1 TTACTCGAAGAATG-1 CTCAGCACTCTAGG-1 CGACCGGATGGAAA-1 GTCGACCTGTTCAG-1 TTTCGAACTCTCAT-1 GAAATACTTCCTCG-1 CGAGAACTAAGGCG-1 GAAGTCTGTCGCAA-1 TTCGTATGTCCTTA-1 ACTTGGGATTGACG-1 GCACCACTGTTTGG-1 AGACTGACCCTTTA-1 TCCTAAACATCGAC-1 ACTTAAGAACCACA-1 GCTAGAACGGATCT-1 TATTTCCTATTGGC-1 TAACATGACACTAG-1 TACTTGACTCCTCG-1 ACAATTGATGACTG-1 GGATGTACCAAAGA-1 ATTGGTCTGACTAC-1 AGGATGCTTTAGGC-1 TGCACAGACGACAT-1 GCCTACACCTTGAG-1 CAACCAGAGTTCAG-1 TACGATCTCACTGA-1 GGCGCATGCTCCAC-1 GACTCCTGGGTTAC-1 CATACTACCTGAAC-1 TTGAGGTGGACGGA-1 GAAAGATGCTGATG-1 TTTCAGTGTCACGA-1 CCAAGATGTTTCAC-1 AGGATGCTTTACCC-1 AGAAACGAAAGTAG-1 CACCTGACTCGTAG-1 ACTTGTACCCGAAT-1 ACACGATGACGCAT-1 CCACTGACCCGCTT-1 GGCATATGTGTGAC-1 GAAAGATGATTTCC-1 CACCGGGATTCGGA-1 AGTTATGAACAGTC-1 CCAACCTGACGTAC-1 AAGAGATGGGTAGG-1 GGGATGGATACTTC-1 GCCTCAACTCTTTG-1 TGTATCTGTTAGGC-1 TATTTCCTATTCCC-1 GATTTAGACACTCC-1 AGTACTCTCGGTAT-1 CCAGTCTGCGGAGA-1 TTAGAATGTGGTGT-1 CGATACGAACAGTC-1 GCAGGGCTAAGGGC-1 GGAACTTGAAGGTA-1 ================================================ FILE: tests/testthat/genes.tsv ================================================ ENSGXXXXXX TDRG1 ENSGXXXXXX TCTE1 ENSGXXXXXX CCDC106 ENSGXXXXXX TIGD6 ENSGXXXXXX MSANTD3-TMEFF1 ENSGXXXXXX QRSL1 ENSGXXXXXX CDK18 ENSGXXXXXX CTB-25B13.6 ENSGXXXXXX ELAVL1 ENSGXXXXXX TRPC5OS ENSGXXXXXX AC016995.3 ENSGXXXXXX RP11-899L11.1 ENSGXXXXXX GAS1 ENSGXXXXXX RP5-1077H22.1 ENSGXXXXXX LINC00566 ENSGXXXXXX RP11-383C5.3 ENSGXXXXXX RP1-59D14.1 ENSGXXXXXX RPP40 ENSGXXXXXX RP11-74C3.1 ENSGXXXXXX RGCC ENSGXXXXXX OLIG3 ENSGXXXXXX AP2B1 ENSGXXXXXX ELL3 ENSGXXXXXX CTD-2521M24.5 ENSGXXXXXX ANKLE1 ENSGXXXXXX ODF2L ENSGXXXXXX RP11-128M1.1 ENSGXXXXXX RP11-71H9.2 ENSGXXXXXX HOXB-AS3 ENSGXXXXXX ACSBG2 ENSGXXXXXX RP11-261P24.2 ENSGXXXXXX IL3 ENSGXXXXXX SLC10A4 ENSGXXXXXX AHSP ENSGXXXXXX PROSER1 ENSGXXXXXX TPRXL ENSGXXXXXX LINC00659 ENSGXXXXXX HOXC5 ENSGXXXXXX ACYP2 ENSGXXXXXX GALR3 ENSGXXXXXX TFF2 ENSGXXXXXX PIM2 ENSGXXXXXX STMN3 ENSGXXXXXX RP11-73M7.6 ENSGXXXXXX RP11-350G8.5 ENSGXXXXXX RP11-26P13.2 ENSGXXXXXX RP11-692N5.1 ENSGXXXXXX RP11-1030E3.1 ENSGXXXXXX CTC-360P9.2 ENSGXXXXXX NEURL1 ENSGXXXXXX IAH1 ENSGXXXXXX RP1-102E24.8 ENSGXXXXXX RP11-197N18.7 ENSGXXXXXX C11orf21 ENSGXXXXXX RPA4 ENSGXXXXXX RP11-271K21.11 ENSGXXXXXX BMP7 ENSGXXXXXX LINC00502 ENSGXXXXXX RP11-50B3.2 ENSGXXXXXX LINC00112 ENSGXXXXXX RP1-137K2.2 ENSGXXXXXX LIPC ENSGXXXXXX EIF2AK4 ENSGXXXXXX RP4-753F5.1 ENSGXXXXXX LRTM2 ENSGXXXXXX AC003051.1 ENSGXXXXXX RP11-87G24.6 ENSGXXXXXX RP11-573G6.8 ENSGXXXXXX COPRS ENSGXXXXXX TRMT6 ENSGXXXXXX HOXD-AS2 ENSGXXXXXX RP11-203E8.1 ENSGXXXXXX LL0XNC01-116E7.2 ENSGXXXXXX ZMYND12 ENSGXXXXXX TARSL2 ENSGXXXXXX WDR6 ENSGXXXXXX RP11-302F12.3 ENSGXXXXXX KCNH5 ENSGXXXXXX RP11-78A19.4 ENSGXXXXXX RP11-232D9.3 ENSGXXXXXX GTF2H3 ENSGXXXXXX RP11-173P15.3 ENSGXXXXXX LRRC27 ENSGXXXXXX MBTD1 ENSGXXXXXX RP11-24F11.2 ENSGXXXXXX GUCY2D ENSGXXXXXX PDS5B ENSGXXXXXX PIH1D1 ENSGXXXXXX CFLAR-AS1 ENSGXXXXXX ZNF202 ENSGXXXXXX ERRFI1 ENSGXXXXXX RPS12 ENSGXXXXXX H1FNT ENSGXXXXXX RP11-514P8.7 ENSGXXXXXX DNAJA2 ENSGXXXXXX AC073409.1 ENSGXXXXXX CCDC181 ENSGXXXXXX AC108056.1 ENSGXXXXXX RP11-626H12.1 ENSGXXXXXX RP11-127B20.3 ENSGXXXXXX PER3 ENSGXXXXXX MAGEL2 ENSGXXXXXX LCE1E ENSGXXXXXX RP1-266L20.2 ENSGXXXXXX MMP16 ENSGXXXXXX NKX2-1 ENSGXXXXXX TAOK2 ENSGXXXXXX RP11-498C9.15 ENSGXXXXXX RP11-492A10.1 ENSGXXXXXX AQP6 ENSGXXXXXX RP1-12G14.7 ENSGXXXXXX OR5B21 ENSGXXXXXX TYRP1 ENSGXXXXXX CTC-297N7.11 ENSGXXXXXX PTPLA ENSGXXXXXX RP11-53A1.2 ENSGXXXXXX GOLGA6L6 ENSGXXXXXX TKTL1 ENSGXXXXXX CEP70 ENSGXXXXXX RP11-178L8.9 ENSGXXXXXX JTB ENSGXXXXXX ZNF493 ENSGXXXXXX C4orf27 ENSGXXXXXX LAMA2 ENSGXXXXXX PYCR2 ENSGXXXXXX POLR3C ENSGXXXXXX TNNT3 ENSGXXXXXX CTD-2139B15.5 ENSGXXXXXX AP000593.7 ENSGXXXXXX OR56A4 ENSGXXXXXX RP11-944L7.5 ENSGXXXXXX RP11-293M10.1 ENSGXXXXXX RP11-160H22.3 ENSGXXXXXX LCA5 ENSGXXXXXX HOXD10 ENSGXXXXXX FCGR1A ENSGXXXXXX RP11-223C24.2 ENSGXXXXXX LINC00500 ENSGXXXXXX PLSCR5 ENSGXXXXXX FAM3A ENSGXXXXXX AC100802.3 ENSGXXXXXX N4BP2L2 ENSGXXXXXX RP11-513G19.1 ENSGXXXXXX METTL4 ENSGXXXXXX AC011518.1 ENSGXXXXXX RP11-344B5.2 ENSGXXXXXX RP11-265D19.6 ENSGXXXXXX NKX2-1-1 ENSGXXXXXX SIGLECL1 ENSGXXXXXX RP11-455F5.5 ENSGXXXXXX RP11-348J24.1 ENSGXXXXXX FRRS1L ENSGXXXXXX INTS1 ENSGXXXXXX AC073236.3 ENSGXXXXXX MSTO1 ENSGXXXXXX DGCR6L ENSGXXXXXX RP11-97C16.1 ENSGXXXXXX RP11-16P20.3 ENSGXXXXXX SMAD7 ENSGXXXXXX MARS ENSGXXXXXX RP11-179A16.2 ENSGXXXXXX ELOF1 ENSGXXXXXX LINC00654 ENSGXXXXXX RP11-440L14.3 ENSGXXXXXX RREB1 ENSGXXXXXX TBC1D19 ENSGXXXXXX FAT3 ENSGXXXXXX CTC-436K13.5 ENSGXXXXXX INTS8 ENSGXXXXXX KIAA1549L ENSGXXXXXX EFCAB7 ENSGXXXXXX SPOPL ENSGXXXXXX NAA50 ENSGXXXXXX CBLN3 ENSGXXXXXX CANT1 ENSGXXXXXX C17orf74 ENSGXXXXXX LINC00265 ENSGXXXXXX RP11-157P1.4 ENSGXXXXXX PRRT4 ENSGXXXXXX OAT ENSGXXXXXX AC145212.1 ENSGXXXXXX SLC6A6 ENSGXXXXXX WDR55 ENSGXXXXXX RAPGEF3 ENSGXXXXXX RP11-321M21.3 ENSGXXXXXX RBFOX1 ENSGXXXXXX TMEM249 ENSGXXXXXX CTD-2357A8.3 ENSGXXXXXX KCNT2 ENSGXXXXXX UBE2E1 ENSGXXXXXX RP11-162D16.2 ENSGXXXXXX CD244 ENSGXXXXXX XX-FW80269A6.1 ENSGXXXXXX RP5-1065P14.2 ENSGXXXXXX TREX2 ENSGXXXXXX RIC8B ENSGXXXXXX GTSF1 ENSGXXXXXX GS1-166A23.2 ENSGXXXXXX TAF1A ENSGXXXXXX RP11-149I9.2 ENSGXXXXXX NARS ENSGXXXXXX PRKG1 ENSGXXXXXX CT47A9 ENSGXXXXXX CDC25B ENSGXXXXXX LA16c-325D7.1 ENSGXXXXXX MOBP ENSGXXXXXX CCDC85C ENSGXXXXXX RP11-603J24.5 ENSGXXXXXX MBTPS1 ENSGXXXXXX ARL1 ENSGXXXXXX AC104135.3 ENSGXXXXXX RP1-59D14.5 ENSGXXXXXX RP11-244H18.3 ENSGXXXXXX CTD-2521M24.10 ENSGXXXXXX MID1IP1 ENSGXXXXXX RP11-262K1.1 ENSGXXXXXX MYL6 ENSGXXXXXX HOXD11 ENSGXXXXXX GTF3C6 ENSGXXXXXX PPP1R9B ENSGXXXXXX NMI ENSGXXXXXX CTD-2015A6.1 ENSGXXXXXX SPRY1 ENSGXXXXXX AC004014.3 ENSGXXXXXX RP11-110I1.6 ENSGXXXXXX HK1 ENSGXXXXXX PEX5L-AS2 ENSGXXXXXX RP11-109A6.3 ENSGXXXXXX ERI1 ENSGXXXXXX DOCK5 ENSGXXXXXX AC012506.2 ENSGXXXXXX TTC23L ENSGXXXXXX HLA-DRA ENSGXXXXXX MON1A ENSGXXXXXX RP11-667M19.10 ENSGXXXXXX AC138472.6 ENSGXXXXXX RP11-339B21.11 ENSGXXXXXX RP11-130C19.3 ENSGXXXXXX APAF1 ENSGXXXXXX BACH1 ENSGXXXXXX ATF5 ENSGXXXXXX IL1RAP ENSGXXXXXX CTD-2227C6.2 ENSGXXXXXX RARB ENSGXXXXXX RP11-465L10.10 ENSGXXXXXX FADS6 ENSGXXXXXX NDST4 ENSGXXXXXX TAF9 ENSGXXXXXX RP11-38M8.1 ENSGXXXXXX PIAS1 ENSGXXXXXX SCN9A ENSGXXXXXX CTD-3105H18.16 ================================================ FILE: tests/testthat/matrix.csv ================================================ V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13,V14,V15,V16,V17,V18,V19,V20,V21,V22,V23,V24,V25,V26,V27,V28,V29,V30,V31,V32,V33,V34,V35,V36,V37,V38,V39,V40,V41,V42,V43,V44,V45,V46,V47,V48,V49,V50 5.6677905966163795,-4.772962322864686,-10.841580908135525,-2.209881300903652,-5.786124286357204,-9.447302083519922,-2.9608701221548905,5.180268422373425,0.9757778734454816,-2.767401772182076,-6.985494772072344,-2.788594807404795,-1.5926001949818696,0.0569611711405516,-0.9120965995131036,-4.80543375707097,-2.3234423144181644,8.95609509255886,-3.4936927768203905,5.422067761524336,6.383754556270587,3.985268674703266,0.6572030533705839,1.094535937479605,-10.026315159810164,-3.6674609727917407,-2.1679147907017406,-9.691009576633661,0.2667420385134678,-2.317717569008728,-2.914704667769489,5.314974026907998,-0.21997020012709623,2.1234113415946023,-9.08547311012854,-3.0390059841327064,2.4073014944776623,0.32007278736315004,-1.8231019136195825,-0.5601161170648448,-2.5375225634632717,-4.480048558430584,7.304791020003927,-1.7565655946155458,-9.228884629768455,-1.1853010986448131,-5.696114606003398,-10.644524438514612,0.36508187279680104,-9.511060963252003 -4.903912005007914,-6.255865830401945,-1.2828594591251896,-2.6994999171645593,1.529776884849898,-0.14805958887925552,-4.92429577976289,2.3932623195918916,0.04092726272616587,-2.7530019546075204,0.61423408246408,-1.2124733858988244,-4.81186218596541,-3.48213375877448,-2.376363448809085,-5.976695335662106,0.20133142135774473,2.075791479461702,-2.1967454189313114,-1.3788860954278188,-2.0126929339463464,-9.641250225821105,-0.9809086946453254,0.956352054712345,0.2060141774783424,0.6989806526715334,-7.3964748720647,0.6457450797672339,-0.8692632648273646,-1.8350737491863878,-1.4908961736042357,0.8483323482193889,-1.3560557560546607,9.30923436841894,0.8179448312462206,-2.7156160912058596,1.6618772737520005,-2.2758791605211166,-1.2681515032665613,3.545410055854629,-1.6325673859737837,-3.7934024153830035,-2.8656158962174256,-2.1857552277735977,-1.941415445182147,-1.1045638835979548,-0.6582882956601042,-2.597403689128013,3.23273311560339,1.1954938411668325 -1.6630564425359273,1.4660716767816937,-3.3543708667169643,-4.747852390173787,5.175772089943396,3.8647270684090316,0.3716776169760889,3.446919138917524,-3.5766947142248773,-1.4419821185285617,-1.883008503777993,6.241386766249099,-1.0560629299575153,-5.988097488789856,-3.4858383619320357,-6.428117425646453,-0.8275758947296596,-1.1756116329158848,-1.4287283014493717,4.322595033687989,-1.798026011344816,-3.703995208194979,-0.5823149380497668,1.5275131409009983,-6.382582234836498,-4.521955787849564,0.7095672730843253,0.22446533629229726,-2.5628226796379034,-2.7700592934308896,-1.9004833941498762,-7.092578410827339,1.1564805951843644,-4.844473422152349,-3.3302250274741025,0.7102415224579148,-2.3461192928264167,-0.6281103326952658,1.3513274965655961,1.250559581486804,-2.2203944926057972,-7.339100532389258,-4.649385822631681,0.3473741924722748,7.725143505943829,-4.292240163442733,-0.7677091745620812,6.984631412067042,-0.22326398047643436,2.1416377744113118 0.91180136905286,0.9172208892013556,-6.341103562254801,-2.712200203241552,0.6301528084008212,-1.932886388073422,-0.49245210509387405,2.561514831311895,-0.9392122850317073,-3.3959610544843377,-2.540271367626355,0.9560560808325851,-2.389739137988473,-1.2417283427375296,-1.4758875658716923,-3.8001190366526014,-1.436789818946997,0.3907781084471065,-3.886228727232629,4.085194860330388,1.1650476460208514,1.0739843915520324,-1.5677006307394257,2.032472203741211,-5.455501634036393,-6.863536366221897,2.207695188207163,-2.7280517207880486,-1.043100425064087,-0.6195472334853985,-4.344020451346337,-2.84581109951762,1.1013788026307507,-0.1704271395614656,-6.419786313610798,0.5684252600382795,-2.6080949738437633,-1.9555675532365857,-1.6761251005513411,1.7713609613085821,-1.6089224871487207,-6.6393024269971,3.1534633633320146,2.026464312871349,-0.0807129000582767,-2.22730982505314,-2.2332694605417047,3.1726489961867665,-1.3027917575160086,-3.909438657619539 -5.650783670228126,-13.206756840062786,-15.180409784465045,-13.385459399509731,-9.986801816307642,-6.0217695193489105,-11.169679719702794,-1.0470442252466197,-9.600281398940627,-5.248414512721558,-11.250155880389261,-2.856946166105229,-11.572870683299113,-11.867274747149718,-6.408393740823626,-13.443813766610003,-10.192448420413958,-3.510886494117525,-10.094614437105244,-0.3491639512437712,-6.910476174846227,-5.173769302344667,-11.129381117196502,-6.784228081016155,-13.29421055221947,-8.163620331221738,-9.589798704784803,-10.625607654095647,-13.66332304755879,-8.735631860056195,-6.239187148874542,-4.995787869624762,-11.070241991674033,-4.679393202878272,-13.073336342891952,-15.687421253152435,-4.87452619224537,-8.563082649566036,-3.8113947357729474,-6.008152775302346,-4.00163464013357,-17.850850740771115,-3.217091919228515,-11.367912124947017,-6.875782144319771,-11.454032748697802,-5.706258592676169,-12.904687526666036,-6.968064133381645,-12.968826518259661 -2.0922015467642647,-4.460490099538217,-9.939945572272872,-9.996193948891046,-0.17845774121156804,-5.0846466868317775,-8.351213060011988,5.481742894398112,-4.040090255458912,-5.554537714934751,-6.978337419977086,-2.07807869757172,-5.928119587857869,-4.105514167443129,-3.1516011623389826,-11.64834803988012,-6.921652588262575,1.705080162230331,-3.8634208285744744,4.974499856562232,0.2867360064691953,-3.651256084005299,-6.668596991867458,0.36671699960464776,-8.018599942335182,-10.514031518846455,-3.813720907583003,-4.623013637722218,-4.931513652421778,-4.893200015123526,-6.639125633205192,-2.280572260808277,-8.637281506070742,1.322042106661276,-12.261747434790639,-6.879031940399698,-2.619125300097242,-4.8791910439254575,-2.654205098248424,6.135466920985012,-5.383512889138441,-18.2573354678096,-2.7846669331134444,-3.1137481289386018,0.6698262258492209,-8.104178112759108,-7.065200552036771,-1.5142325020837475,2.0745410602722174,-5.485779501010603 -3.5329845039997125,4.327812476500276,-2.158727213477263,-0.09061579609665885,2.639408847603761,2.747240253913329,-0.08908551029777667,5.633869994269129,-2.280101508210608,-5.722845827472765,-0.43810943171572936,7.880795242134933,-2.1521252647205356,-2.174866274412867,-2.618391606900421,2.7279733146509133,3.1242022407769428,-3.5915899450294466,-1.339939437686014,6.7321402785198865,-3.2308144793104603,-2.216180757136446,1.2486026034705615,2.8218622859578115,-0.08324338823408484,0.7454335555153223,4.633249188817489,1.601325424860412,-2.426288501845983,-0.08247447423574952,-1.3826420832755881,-5.95005489646438,7.489196554540411,1.9697005300815358,3.0350984753930756,3.561692432359125,-2.2998018067509327,0.5659200121866553,4.2712583676388105,2.6096983474401667,3.7984769010629553,-3.7791329132714306,2.0645169079688253,3.6879022590107695,10.980173234744454,-0.14391854587383157,-0.638736306704018,7.075024547566713,-4.070024418800087,2.3597356151686664 4.242078651433553,3.81418380907584,-0.6149330261662165,-1.700187019643458,3.329452777554978,-4.422578376412592,3.954766692143302,0.6524921465075579,-1.6574800731960702,-1.5884324660055968,-3.390955347501111,2.4708029600922328,3.6813002372310906,0.5468124559189754,-2.523562167986373,-3.4782467589756028,-3.74060813753078,1.5766230435227824,-1.6825359371328543,5.81661108574976,1.5901311556552282,2.5169243357594304,-0.12744826313023885,-0.8009555475228924,-8.327920673349047,-5.923960876607239,3.7112186607096813,-3.1535193998638102,-1.677590213719901,-0.8918052199379751,-3.4940466375450363,-3.0804711397762503,0.34156902781619525,-9.056160375336606,-9.928122572188766,3.2352786242992706,-3.686354991857656,1.083664769337687,-1.747246581435131,-2.8103232389492616,-2.6549203044487086,-4.253898397960296,-1.1098648258032637,4.295932036483787,1.2170544150485212,-1.3635540921589546,-2.7016041280466565,3.171891254773744,-1.761454303069832,-2.368699653538454 -11.269259583670772,-9.805389621495818,-11.320887968499255,-11.386905414612784,-1.6997200691807655,1.6036626508479046,-8.995308428567107,5.676938578862654,-6.695111639127362,-4.825077334979873,-7.054327943009575,1.2677316334300535,-4.524039014638808,-13.12597972801467,-8.610203305862875,-13.061861202259205,-3.8675535255391997,-3.708927033918332,-1.3376059893014118,-1.381297864170823,-6.382511163993435,-8.01585370141623,-5.28224395959748,-3.3508040190136095,-5.343406300539025,-2.505427194007444,-7.840981427096676,-6.567719659127713,-4.492830022277239,-5.4467297067209675,0.16726654706477262,-4.762521292152137,-3.0888282127681066,4.9242144764666556,2.6324014639350546,-10.606655789019785,0.8735099614083388,-5.879485596355457,1.3433343210956044,-0.7498585827496083,-5.001064912600257,-10.138061657199412,-5.86600472044212,-11.867849701716185,5.428672527125926,-10.295935269522102,-5.661607040830257,-7.386449148358813,2.33091183707016,-1.5150795425479426 -1.8708502192641796,-1.0563884885571009,-3.928403544208017,-2.4990378185148696,3.2853688210668346,2.6333061280601577,-4.014383369169726,3.518572660110852,0.9681058674207005,-3.6243365980491173,1.5410950037860718,-0.03231361900762989,-7.839447746136001,-1.7215279818358526,0.6914828501220225,-3.917532862537868,1.4262360972300465,0.8389166007068969,-3.9834445058766152,1.1463606742490233,0.45544485977969984,-5.10998407579212,-2.2626928951417566,5.233315789658328,-0.13830673811762528,-6.169725316920115,-2.0971189737773366,2.938753246871368,0.06069401590645551,-0.9433064127116013,-5.464040922374904,-2.360789348873897,0.15726826897342372,7.549027357816509,-2.4617085409481803,0.5152930202375566,-0.8765359214945394,-3.3221789728230275,-1.6375911103207335,8.40287194216557,-0.8794995021672488,-7.948433326720323,0.7061787909841213,2.4398810002396525,0.13885313759427498,-1.2612474017812014,0.40430097817145016,7.309883084101909,2.1259975263527027,0.21119970403906385 -6.148196274854828,-2.95573773424717,-7.434825178407012,-10.586805796335877,4.734321128347609,-6.028555144625939,-10.520026753361945,11.684858588777617,-2.2595138461826685,-12.268164762915944,-7.884292069342257,-2.7935352267846443,-4.582269067309762,-3.220204777119691,-6.062055396863398,-10.560532534801602,-6.022067183405763,0.7526999561201322,-0.087119070812129,4.7877946717498014,-0.03825496378743498,-4.0732596334147235,-7.917282282061605,0.6455669745453152,-3.3465945692784302,-12.5232774960027,-3.9100823551928805,-4.7492374439893315,-0.7801420738038134,-3.953335843115299,-9.21794747734672,-2.0109098779775474,-4.102675254376376,9.126087383371551,-5.995034045212805,-3.8672914797091895,-0.8185843424907762,-5.911562123222572,-1.5672254123413663,9.717443865848264,-6.719729951070626,-17.639824182872523,-4.25590757171175,-0.9774679972612526,6.813850405447417,-8.210699533642929,-10.97754763636377,1.5096555149277586,7.53091240164572,-1.8640342146296716 -4.977590533181835,-5.829826268650475,-9.34030317031672,-4.769669667894739,-1.0814368826739587,1.345044414826222,-5.302857249261143,5.705908727989611,-0.7166936304530749,-4.512514372451035,-3.291768140000639,-0.145882244477514,-5.124415292137225,-6.863082813357308,-3.7969131883610165,-6.125042925098645,0.9102630327400953,0.25158954514542486,-2.141836993651646,-1.1379814798985564,-0.8287667014509159,-2.775021929468083,-1.9388041888107415,1.3481505515712984,-2.7304585335087412,-3.3002356864277527,-4.371976211878159,-4.501731664649789,0.991465111919785,-2.069506851265815,-1.9069521951105837,-1.2166689160083424,3.1130229295565495,8.456547837874348,3.497355618214588,-3.739662796506711,2.3970945022352677,-3.8266247926071877,0.26643994889170775,1.6861409466985118,-2.493615089788988,-3.582814648201344,1.7334363180493697,-4.887083667737707,-0.17335993991803567,-4.000244725423498,-2.773844700631755,-2.7739000157863254,2.710966778716278,-2.2166975417986743 -6.914238282173409,-3.4180373767043593,-10.89437200313452,-7.966408124672669,-1.9705670561934086,8.299242798683316,-6.17563457366253,1.2174198207178413,-3.688658382546098,1.427523321222092,-1.3320541253060312,-0.7914809301541916,-5.502829033619394,-8.03929318643968,-0.4599451109211056,-7.536006681801844,-1.1303280140186307,-6.2362264204218185,-0.4545872471804787,-3.981219803666048,-2.476152150731508,-0.5705409866168056,-5.3341598891062985,1.425482879944565,0.6698956938859146,-6.186086373016167,-0.9299593685952302,-0.1826751427924137,-0.7160143711475266,-1.6528826038808182,0.9649385580627909,-5.1182581815666115,-5.353897735640558,2.9085349580874293,2.9653512730052545,-8.218638514409232,-0.49623116507765663,-6.292926095148688,0.5713998296490775,5.254859424947489,-3.200520909771338,-8.89458665554238,-1.2442868495395725,-8.98750326480766,4.749337487393482,-8.531021181858836,-0.4645078905037694,2.8755271754847564,2.4624292552663682,-0.66471310070299094 2.1992770749410173,7.527202100482432,-4.770751316011317,1.6408560676391277,3.1886242445082122,-4.164169037159468,6.683645413951781,3.8287613901982884,-0.4970283097351549,-4.318851976752217,-1.546792561517606,6.213221382341321,5.445942966855494,1.9071899757850608,-3.79762357126175,-0.5316674653270067,0.6546431518291499,0.206311787829903,-2.9448932955191607,10.327552126929746,1.7351836311639723,3.4643611997393546,4.303324998474329,2.911577475892535,-6.444210121867549,-3.2556409887637088,10.2756049515217,-3.5514921563855926,0.8932304632444097,3.1894308435609666,-1.526344763415603,-3.9591227439584014,8.974263468862542,-1.5192478400527119,-5.548073771220433,7.417426330442902,-4.130975754484164,2.036007146816278,0.07434827387823872,-0.9909053435968684,0.38737865944180483,-1.6687151134307592,7.947710473218518,7.349337964349928,5.149609149503878,1.2033672777786444,-2.790790202352269,5.515604891072159,-4.147905984424455,-2.493468600279579 -6.400858276836673,-3.971918848880579,-7.668317113879781,-3.8758423024220385,-3.7401091427578894,-6.39998689328078,-1.0411817600523747,-3.802585556388485,-4.836733175530718,-8.081021785221806,-3.104597719535782,-0.38538947801102874,-5.562038368256837,-4.356336565458507,-7.1223248859359,-5.688187851243359,-4.613636185653403,-6.972033894361416,-11.696921664687029,1.1367359479941803,-7.7683666599063175,-4.892471833911242,-5.3140331529419935,-3.651027556672148,-6.373100695295879,-5.759595584224411,0.18955123523791878,-5.943516426149251,-8.126071160831705,-0.004047953302063645,-6.324754705496171,-6.351637406617513,1.683864765184173,2.240639068876841,-8.17996396827435,-2.68120278472841,-8.28892160607579,-6.74119173769451,-5.7857977497467425,-6.679247951308963,-0.20679574804472806,-7.064606205684229,5.050894726517137,0.7184725714744438,-5.219595464249089,-2.8388738710978587,-1.9354359937154229,-2.605556166469805,-9.726928794814778,-9.415908848483252 -5.00764413966239,-3.4042046589189896,-3.9461648618914626,-7.7049587553100105,3.6045361159650477,-5.720587709421868,-3.595307779062502,4.621981242925948,-1.0759565274687468,-12.5588511869458,-8.744910720847516,-5.0467901482720485,-1.4763964877448155,-5.493532183449378,-9.182887051359046,-6.598496645879689,-5.971455247725868,-4.228094934297063,-1.5463437178716206,-4.832761661136431,-2.6280228675801536,3.5478323553111797,-9.706676170793974,-5.749312610660546,-4.994077028297058,-15.175688115038847,-3.5953006236892238,-10.071678368435883,1.7522547195965628,-2.374461237092486,-11.031470179998172,-4.197451416103401,4.448069277223551,1.4867712924005319,-0.6959632853343718,-0.3920674250328382,-1.6769858737842784,-7.797142653056947,-4.794798530392056,-5.789584524330861,-7.8618554262374065,-3.0290688123817544,-3.286417618379971,-0.4941388217510271,-1.059612957240962,-6.309822329774938,-8.268065019529242,0.28395979772751945,4.166944285052217,-5.12922042088373 -8.169887981541617,-4.226162948600265,-9.010959123857146,-10.346641482744616,1.0328688856150672,-2.4458461964459444,-4.119635223496607,-0.3481925406196953,-6.682328246819828,-3.9724834210316162,-4.320889894751572,-0.6979032228558194,-2.160900945271431,-7.258891050037022,-7.2696992111074374,-14.653826740362538,-7.793067281425629,-5.526431112080415,-5.510067747373918,1.4228579328457243,-5.718211271095136,-7.0144000668136375,-6.594161583802514,-2.7594422704531034,-6.865468500291676,-8.968486368982074,-1.5871718316157812,-4.223961543365697,-6.138741349903939,-2.1749813358419243,-3.344753101269456,-6.940312538925636,-7.340962173362754,0.33448718762292917,-9.395171086142838,-6.947977603414576,-6.245946014571138,-7.04127898052226,-4.440886956363049,0.43494396073359853,-6.2248576933386985,-14.901668543966684,-3.8834792549126,-4.805125420781335,2.51611478315021,-9.259892189921517,-4.835986488312402,-0.3370350945243028,-0.7883791580487348,-4.208710334810881 -6.1095423536280435,-4.0244114431552696,-4.813531964495341,-8.001096355925595,2.7111212481082547,-3.2764653825059,-6.205771621934939,4.400217348031142,-2.506325081380238,-6.851045045153516,-4.758350901209804,-2.9771705336536614,-2.601680437649217,-4.262061891581126,-5.517111183056262,-9.485707205615874,-5.22450993553377,-1.6892639573582926,-1.1756576582705702,-0.22267986203554146,-2.2853559742197054,-4.224696389289736,-6.336702975684059,-1.747373471813288,-2.655871070707856,-8.475169100852339,-4.088717319941357,-3.842762397379113,-1.2755197303288417,-2.4819075571367577,-5.34452947151569,-2.4415472311781796,-3.8762593127422496,5.001881880849014,-3.6463622977569288,-4.175838852127862,-1.4500872532604505,-5.531392629553625,-2.7583719306622485,3.078064445089545,-5.79501731494204,-10.28404686043447,-4.389809369714058,-2.801477172777651,2.2859215829826667,-6.5072484622012645,-6.2714008684864435,-0.4837179072954607,4.448586567602295,-1.7547687638453786 -5.264473936249475,-9.925503105015565,-17.201439291275932,-13.670768797931387,-2.9283221026612796,-8.295578096509029,-6.766775296578629,3.1755444659021723,-5.620557536480526,-8.820577672649588,-11.932681930233809,-5.003275835572521,-4.812427754562492,-10.718597093716221,-10.315661839609737,-18.252290003382626,-10.09241450067341,-1.5607052657880773,-8.11129202097381,-0.6574254113370506,-2.394973738771557,-0.777990997107791,-9.93607417045559,-4.96324793143947,-14.994127953882312,-16.471979254639756,-5.834959931550701,-14.83595127829961,-4.2435163100994835,-5.026797965976535,-8.921452028197322,-4.737642393247538,-4.005273896859109,0.44383536484595165,-12.179918024952961,-9.330412075862345,-3.7321932841075154,-9.84709833175664,-7.105699740168557,-5.250281235355655,-10.390029944744754,-14.26089722364572,-0.2823029602719682,-7.631175723194573,-6.75281591739793,-11.76544510656663,-9.61647939750781,-8.068228378485843,0.7382912223383209,-12.637275488933268 -1.6419037159266296,-4.291809050424256,-4.599743949138573,-2.186419664835814,-1.2239993102532616,3.1793640139671866,-5.054245204052447,3.7317909087816257,-1.2693793566803753,-0.10236659096717204,0.06951384934566818,3.086965745765499,-7.084513250445558,-4.281548636142984,0.7301303285190883,-3.022707202491465,2.280181596367356,2.9175468216997413,-2.449948349330062,2.6319251481620354,-0.5769626795790623,-7.015900287448649,0.8310053428606132,3.545215244676733,-1.9140047554257795,2.6134823156332034,-4.825861914711093,1.830226867942564,-3.115316759407578,-3.3409707111751703,0.20172158326041767,-0.49691112899118317,-1.1825453570540236,4.495036286820058,-0.02946359639185281,-3.4076347849945425,1.826022235477731,0.02451314261052745,2.408827816187874,5.0843659156036605,1.6272187170102783,-6.048282467316049,-0.7323886571355196,-2.6822246570050177,1.0128131302475136,-1.2360112048968246,1.0654130458816495,-1.0375168127997862,-0.32131993982480056,0.766374981867507 -2.870844813701693,4.915075921949868,-9.849594988932191,-4.746602974614281,0.10399526786130978,-7.550148866639983,4.8153973440193285,-4.609834686793067,-2.9120398645841643,-1.9380054040531558,-2.2643623134604254,-4.562779848916039,7.895888297602289,2.399610097936058,-5.036935903622656,-9.91350905920969,-8.421466734982607,-7.296123092325683,-4.6723937065632315,2.8560133869908073,-0.3461367479393125,6.111512322892823,-3.907273219199091,-1.8929931225848,-4.673951812908566,-13.154780974769288,11.699067808872671,-6.702620997907105,0.6168221410934339,6.110364996691527,-1.9599145323895844,-4.568570144875768,-4.17874200954972,-0.9959824606763916,-12.714935729951,-0.6063195508634975,-9.47099600630182,-6.226156401078378,-8.285894262247059,-1.241003583775519,-6.852062397072143,-8.384056899768138,7.802478992354825,1.1960645040207896,-0.7952686008027876,-5.740841933962981,-5.694198011932023,2.793629532788611,-1.9104038332661595,-8.650149752509828 -4.598712167667982,1.0883576975609301,-7.499460545299732,-3.191068042279849,-2.7869329998345895,-2.831177282394651,-2.263871672127932,1.9789433924163697,-3.81887336846963,-1.7986654850881982,-1.7703812198491322,1.5522240056370407,1.284551582272555,-0.08759919682437763,-2.2138891210854506,-3.7109044247283327,-2.5296806732698203,-3.27853789367239,-1.459138868951553,6.3287843939481085,-1.961826046454001,-1.503517573803855,-0.22226517518999622,0.6807385860734465,-0.5287654188193116,0.27033115106810135,5.003164826050384,-1.688623705240758,-3.3432315925783143,1.3955435198434287,2.3747322772209176,-1.8086976877689378,-3.489887265502772,3.7991320881328443,-3.695958235149819,-3.7966639947050362,-3.4010246708239382,-1.4196114663600066,0.7136983455329552,3.959385303527003,0.37871646574464235,-9.031326819115177,4.288251155495134,-2.2806011626789404,5.9699004184604725,-3.5499926563321376,-3.8153796059465055,-2.024002712979427,-2.753446592780553,-2.6047951745096416 0.25190543740861504,1.1606224393731681,0.8204823691278014,1.557946956398079,1.8618236911012334,4.389740551936775,-2.027256447697206,5.84849984761271,2.3078567680593314,-4.404608264834073,-0.32620491686555886,2.565180819032581,-4.800112598242837,-1.3576394561904366,0.9771110593277117,6.12593783639822,5.133063611872048,0.3361276806887896,1.53056820118509,-0.6707531208069584,0.7559529466841337,2.070743898970901,-0.15278163574300319,2.753987334208381,2.676872997273189,-0.6333680210705219,-1.0281132720688908,1.3387390995146744,2.666514741837366,-1.333944664605149,-3.2644051272711683,-0.988179511094408,8.171470049328486,3.3396466445396986,8.120464551173065,3.559053931729218,3.1886111446927647,0.40264453528475064,3.7129186505300322,2.190767541614462,2.6505216231021125,3.1566281978885478,0.6695642990945805,2.417256255166494,3.697397985858652,1.7206746174769878,0.7290212463870382,5.172388149575215,1.3370653048183436,2.6908671830235127 -1.279728156089312,4.421589607955573,-0.9912908835298233,-0.33869386945915553,9.816368191274156,5.060434260931503,2.9209213403668004,8.608503453703175,0.35900691687030983,-3.7206884921783416,1.0688042992118079,9.439107773461526,0.906951750537603,-3.881543244187998,-3.750846845941137,-3.2781850433666735,4.841242099321505,2.371467315015006,0.44116661364278376,7.120513928857955,0.6516547240031509,-5.2062202623684115,4.859785965318508,5.9166303832775196,-3.6761359979797703,-1.1654936388671355,2.4185536958099068,2.295784194517002,2.828330290123453,-0.36358567646137935,-1.2265052141325077,-5.466083758297529,10.174093635084152,2.7807538350802385,2.7844011995578786,7.554917195142502,0.7816250619494827,2.432566110070697,3.582334087294785,4.655129885186527,-0.5621205440843833,-1.858636363094898,-1.4842252250174566,5.275960081375719,10.90343077060764,0.21961888014321418,-0.1836603295798087,10.85730149495244,2.803958237311144,6.603973347175566 -0.029233728639695533,-1.5159574463882497,-0.8078791734745221,-2.0036516878452755,2.1628435537610313,-3.47706307854045,-3.3269425455309634,8.063282905392402,-0.39462586682510437,-8.257912342341996,-5.384763680901254,2.908631489504872,-3.084546815413728,-2.6442930790109465,-3.9359884076992637,0.05309507338391706,0.2157654809185925,3.189643458561418,-0.34959316373019145,4.143039387847075,-0.09690828264457085,-1.4225192623938785,-1.0731664746790417,-0.2736873302880909,-4.524000782033003,-1.931743833935334,-3.54646767396689,-3.827078483566049,-1.183467361820779,-3.891694908697267,-5.490985944676526,-0.8471497947469484,5.7934856460999615,1.2948233188392755,0.13511934605509723,1.7621692686432515,1.668289860401938,0.5402975821346947,2.2034630731244755,-0.614973248160692,-0.06858833059184444,-2.6284800800809376,-1.9186289613944134,2.1130468318629205,2.979638437692591,-0.543163106976386,-4.357488265814592,-0.9404520255738337,0.6259134875022698,-0.41514404583527065 -2.425552214783761,-5.450374997964338,-10.762951401209614,-7.610316016080095,0.6029749516031115,0.9114940706789696,-0.4329810289328784,0.01602300915867305,-3.5255620189560117,-0.22885689930091457,-4.047844195229571,0.6098029112760627,-2.069907322533945,-9.4994342122451,-5.665460899584012,-13.73689759479869,-3.615707006799343,-0.3273398254069444,-5.491592034035366,-0.9180738233993484,-1.2754139936382112,-2.9566201058860733,-2.695957282122637,-0.7562506629879615,-11.142266608670173,-8.392844928645834,-3.183422328311761,-6.38322842882747,-1.696369566497882,-2.8247534964054086,-2.3145635233257353,-5.263445143909542,-0.9803728642558753,-2.589364533928876,-6.392202430058328,-4.253535493691393,-1.9104107480723134,-4.630916691598316,-3.7025674453044846,-3.679735781915117,-6.67094424865177,-6.459033603962853,-1.4820010556884973,-5.157747621452045,-3.424945051757896,-6.906439804730328,-1.6908382670586495,-1.1134981386300549,0.5115177903076635,-4.731187285413622 5.712976621401008,4.11426232698083,-5.072817048632197,0.9802629344304328,2.5476051385134335,-0.026631723943411956,0.7895563964730787,5.524075057499319,4.440086318291591,-0.09269290307049749,0.3681671139435241,-0.942218723817218,0.26170622381053965,3.7943204680514944,3.5424983003256116,-0.5677167605314595,2.3951473548636337,5.798023455788057,0.9009135471249153,4.112306943016095,8.350653042990153,5.8302122502412645,2.2293622391229952,6.943931691341856,-1.2942861905649519,-6.409016448209117,4.368995962876447,-0.1498025623315793,6.697341198410188,1.859475766987238,-2.358358989041268,2.3982325073625237,2.444470478830812,4.140003028886492,-3.0062136387934064,4.120954647872868,2.049366449998594,0.9104407401454679,-0.5001609637740483,8.309429776750468,-1.8806300043314388,-2.3484624090430697,6.327200360530032,4.093539085798107,-0.6234871388307119,0.8768919751884388,-1.498687518748683,6.020430837078351,5.1593225918272125,-0.9530657325669405 -7.040761596698446,-4.196037148516803,-8.377231588566865,-9.453276688657368,-2.571400883574737,-0.43368281655500596,-6.949353013225078,0.44346780677526154,-6.197581744202307,-7.1383759795687025,-6.574524467603235,-1.5373804035590666,-7.677176709161656,-7.703569489743624,-5.113563525880314,-6.331090725604002,-6.101478380806049,-8.50447666999284,-5.1179092085113185,-2.1323697537276676,-6.551317164183004,-0.6513038112914145,-10.424498475516064,-4.1379512435088674,-4.1814092526850155,-10.560637153071712,-2.3255112195715677,-4.940582692525461,-6.871753814105796,-3.9905185884317413,-6.971560890657577,-7.645635374861211,-3.914711032585516,-1.9869878850040947,-4.1060632162118225,-7.271309837439166,-5.30066536329661,-8.291099309709615,-2.2881841391217432,-1.986640560669674,-2.912356721767098,-11.923188844871184,-2.759979918478274,-4.790596541570249,2.082719047344838,-8.700928983262077,-4.134418394205061,0.5783457496112947,-3.466058779777512,-6.017830330859811 -1.8660364900693285,-4.490083149521396,-1.3806351218242576,-4.29973352316924,0.5639477823831307,-0.8331844114206173,-3.0537618425892563,2.77513903539264,-3.3698613013530974,-3.126549908692906,-4.3616985528913945,2.921568389953934,-3.1029905403309392,-6.148359042125595,-4.223789421982114,-4.57697535062531,-1.9624507662459347,1.0351393112438858,-1.672559206771762,1.3673403577820906,-3.0002524649968096,-5.084562937379233,-1.8544106239762677,-2.4581935582300565,-6.213975675726214,-0.284260778921893,-5.8121597904744515,-2.911960724127121,-4.633050771920309,-4.990229238264126,-2.2745656646398675,-2.660739902679604,-0.08574393074342052,-3.2389576943652276,-1.9346850439115424,-2.751237476991633,0.17660560815735138,-0.49454754003042445,1.1169740032301094,-3.3224869380179656,-1.246790848431182,-4.178995265675768,-5.8172323394504435,-2.5783828203257575,1.401788931675355,-3.062702538955925,-1.9511650348166403,-3.280891937192886,-1.013917289290502,-0.38288503269775087 -10.044935644029398,-12.102053783124964,-13.620254071396609,-13.466873491342342,-4.094223722502415,-3.6610126282941913,-13.283817110470984,6.094547338911014,-7.333623792689982,-11.657165962541553,-10.639547797558702,-1.4737927876326438,-12.706318500183151,-12.844031925976292,-8.88707905030483,-12.463757748796851,-6.699040967778458,-3.9281638407404538,-7.459036466330505,-0.5316443611991035,-7.367323351663802,-7.715771865622549,-11.143268456720836,-4.25064803221302,-8.81102933121733,-9.508316312992651,-10.513820537101607,-9.134364075802168,-9.155707544103159,-8.146623588857429,-8.848912907097933,-5.924633029106532,-3.936477104154731,4.871009932345924,-4.822606652214283,-11.678971189461752,-2.3222886589422616,-9.45136751104754,-1.8418532175145257,-1.1233808505240273,-4.4207701273313855,-16.762571526347276,-4.232634571247212,-8.810756552676166,-0.27501013943005614,-10.937914045463629,-7.605930117759539,-7.356840597864626,-1.6433626138137702,-8.004133167080035 -3.379577106141671,-3.6839515753391603,-9.732749870398173,-6.974386196786922,-4.128491673821321,-6.906079243705532,-2.51822379310126,-5.316626291914871,-4.553487884674181,0.49691182671855827,-2.333793952412946,-5.508747639706794,-0.5785443931833376,-0.8022516545336842,-2.5889209377061553,-12.481862980870122,-8.890731508402805,-2.972983443127238,-6.94193837777372,1.6448355460776507,-1.9399966669439506,-3.040788842137508,-5.151935549706077,-2.090357592331916,-5.696630655528355,-7.638857340619408,1.079438898899786,-4.279820467168019,-5.573983964622187,0.6889857687413207,-1.432688023802304,-1.6204880111917883,-12.313479551249234,0.5484298082323511,-15.08543002241588,-7.994803626644673,-6.966157505676064,-6.072258239305662,-7.393620762002926,1.1743922163918643,-5.137269362033961,-13.74280471728881,2.4623329925949644,-4.041628196882175,-5.899374144767545,-6.448116334001494,-3.7079322176093688,-4.779620645270024,-2.5872400298237026,-8.749884697495727 -2.9832372644834617,-4.276535791001166,-9.241940640184726,-7.8615796224567855,-1.0409120758399888,-2.818066912960285,-6.654412082847156,2.142090242772108,-3.0172158790453825,-5.152356026574752,-4.42656168619172,-2.887711878418511,-7.4898103546362655,-4.082288574962955,-2.4124703506631278,-9.059154681289362,-5.086844312866846,-1.2191351127227499,-5.826033083699944,1.321392473542716,-1.2118054668462381,-2.881078817968595,-6.993434053536195,0.39971108169467007,-5.545165686986355,-10.539793094422883,-2.9879331675777,-3.4676372608674666,-4.209725852100943,-3.1060010224959975,-6.970010970828006,-3.3047867285309507,-5.806321294695086,2.6516361908046426,-9.280212324805124,-5.585623305189814,-3.5349239595579403,-6.293921072440583,-3.9040410148107356,4.064750233786827,-3.975719758149685,-14.044939439609927,-0.22985410133814632,-2.173284372763215,-2.1609137802031624,-6.4097441317071455,-4.066480978136914,0.5035510378047323,0.004239186582957188,-6.04843052748459 -6.928023697699993,-1.2441922249573771,0.3960459416627179,-0.6905560205891976,2.15388159665837,0.6676406000250887,-3.3040096323382424,-0.5219856551542866,-1.548533080174244,-3.578952446246117,4.512494449133173,1.523861271274849,-6.16000537414732,-0.23769584520246648,-1.0509950685903278,-2.678458116738057,0.6759894621421823,-3.0188475481359687,-5.442367096065299,2.509342916710107,-5.355115990398296,-12.246096260626876,-0.8775964671531084,2.8251252390145707,3.688033875855386,1.6098597919380822,-1.4545635472820362,5.92386884393516,-4.777700992855304,0.8484847761959315,-1.8826632193348045,-2.9212269121550922,-1.9041713297746958,9.904593320031898,-2.0065314630767186,-0.428931492620491,-3.980695121991596,-2.672358959645028,-1.5160696553131996,6.874996625704347,2.483004983356441,-7.989619971283295,-0.004431450562042416,2.7145745626862294,2.4498690777434766,0.5348526763581201,1.6716554847910878,4.292289420951891,-2.69166879564497,1.8819692313654557 -0.6317923607564113,0.03523751601222169,-6.326568795927896,-1.9542105980758304,-0.43395107390538235,4.699754580573758,-2.450328393386727,2.9132301789249784,-0.6785367636219446,1.3819297914146824,0.7534949856317827,2.4193703128389803,-3.973236636378671,-2.4458287543493813,1.9010428316712464,-2.38589905863922,2.0765952670114727,0.4693623017091224,-1.0765706084779874,2.5342213872339316,1.2454039794822007,-1.2972400457555688,0.6217824524535396,4.649225244436869,-0.6832361337802433,-1.2292550129967246,0.9359841387285533,1.9145272329067862,-0.18363415218283374,-0.7260918687376887,0.7917121798615392,-1.7744801407109192,-0.9103660640008506,2.7783309661288254,-0.14478391269709223,-1.822425437378505,0.43750555395452984,-0.6592896858582911,1.8091580906546,6.3570439895515305,0.5215549354430022,-5.720900589903688,2.0722104262326684,-1.7040003591656392,3.126065709873898,-2.087067547930699,0.9107756185539502,3.5638209487246777,0.4855513913048075,0.41099581467174984 -6.693787299933747,-9.599823301348533,-8.91128662367198,-10.419422209872435,-4.943475085522362,-3.686448933347482,-6.599545378466568,1.1105970568587356,-6.456234337899636,-5.929715633008902,-10.62176979956369,-2.753029492509664,-3.1758766523653694,-11.017771074269245,-8.741760528577334,-9.320593811496718,-7.459346703694812,-4.899974622598003,-2.498238960869684,-4.826376948908846,-5.9885617167013825,0.1314066798955467,-8.818598325052593,-8.71422291054778,-8.31173320476884,-6.647175365476363,-7.362285998174858,-11.522291999359245,-5.450360584837054,-5.891195911916901,-3.65942686726952,-4.004215315469639,-2.9036362131930575,-3.9931988768655766,-1.2560943996176195,-10.082262184305417,-0.9655006718112458,-6.712249336708442,-1.707592738009667,-9.882500647981345,-5.712125488513234,-5.58312878595676,-5.2062662124484005,-10.486664939249511,-1.9272358251724844,-9.560293984328311,-6.563878023491003,-10.796247405635805,-1.0491664040841138,-7.2256069922630575 -6.947195647640122,-6.897478103632185,-7.883298688393742,-7.528743460425029,4.361473692495339,0.34214450286487663,-1.0860531401875617,2.022635161721583,-1.2816197141803958,-9.356636294547878,-5.143222047123476,-1.7692832760573425,-4.773879866176475,-11.651232988289463,-10.466662703100342,-11.678528039888443,-2.4041970531629078,-4.155387921195758,-6.630018512626031,-7.187289507886158,-4.546721483157627,-2.7196621514749877,-6.8508963830510385,-3.1729515179479524,-8.47886966292451,-13.747841116164365,-6.161263884086727,-8.97271494284716,1.579683984744698,-2.3396678611293087,-9.316616551363655,-7.420693797361372,8.504576353107423,3.6314759494827467,1.0287230023203882,-0.15034640652889064,-1.6006515732295936,-8.841380834668492,-5.914366312506525,-7.900969271289894,-8.069895110665318,-0.563308810831282,-2.035362560779738,-2.0983958912491647,-4.846512834480008,-6.025057325296851,-3.049982761760855,2.043921433268447,2.439471526226406,-4.64377445704373 8.046187463791929,2.513588500727353,-6.620705359050154,1.9868801668275884,2.480236160004778,-14.704044860483513,10.68119995635415,-2.6058404470749004,4.176335497491387,0.6755996406823614,-0.8846871986937084,-5.004053715580204,11.869851274547885,6.8183989494366966,-3.8436328431131614,-11.296231263314143,-4.909822845389117,9.009777350219668,-5.8560592105612,6.471929532973272,8.900679733331435,4.386672470724631,5.503265045675352,1.6013166538592674,-12.321732594263736,-8.601332401183932,7.066521333668345,-9.590743997040812,6.527834391685503,6.3572212952793805,-1.902126729851517,4.716259487273763,1.9627427899704248,1.166848466256417,-18.00685328277566,6.506972080249308,-3.3526680176859163,0.7804360936939836,-10.600886910791418,-3.780484903495655,-8.238020326106469,1.0429876247064964,11.376908296510853,6.967264116546871,-14.277836029325274,2.379085734108223,-4.640325886707037,-3.9996595577027225,2.0536096424440182,-9.588518758643751 -7.015334223560936,-3.025109080175101,-8.577230476483665,-11.893330158245142,0.5302487050553091,-5.928487490905601,-3.815455372533766,-0.9870547565279084,-8.085940542742367,-9.223478288063397,-8.845804165125658,-1.7072551196109909,-3.336189181926893,-7.7309155934463245,-9.609255622089611,-12.441649899469867,-11.010421971310492,-8.95776132809522,-7.4075428556414415,0.5537713322854088,-7.431481816035163,-1.5751884413375523,-11.576178717130166,-6.708331137126665,-10.128759093386924,-15.589061630752722,-0.5029515821182654,-8.401363473322846,-8.407313517161157,-3.621986125624831,-9.515565534101412,-10.087985505405493,-5.119554523608317,-6.312240974077174,-12.470777343011326,-5.880218304647321,-9.402656963981512,-9.326910711994856,-6.107252860803644,-5.564213699436985,-6.872931585119818,-15.440824504006105,-4.423298587951023,-2.4719205556624515,1.4781107997702507,-10.61247859172624,-7.170046741784502,0.9541528835877313,-4.195396402974122,-8.408847273158523 -10.88889458406532,-3.1822606628085657,-6.81077944560293,-6.393014613314544,-1.5138302241199495,2.992118916008733,-4.641607713085715,-2.3221136876740216,-5.438049764742652,-4.214244833970522,-0.3941463509632684,-0.17932229188214324,-6.202331028761344,-6.578787469410011,-4.461645988014444,-6.058976578424353,-2.915262849273735,-10.630953455856462,-5.659834953589491,-2.7341692076746513,-8.626376767161597,-5.996402411937139,-6.693486903072883,-1.7771794858945387,1.2281193467344118,-4.825747049497695,-0.27632463082253067,-0.07846471031873561,-5.764303574162556,-0.19346552425217284,-2.2203142573558865,-7.717889909838677,-2.8916839651982666,4.667463304688882,0.12816351927268416,-5.924923150294725,-5.858578434758434,-7.817432980665817,-2.237897343826556,0.7302885969434538,-0.8075034617051269,-9.127139665794427,-0.39386829636259446,-4.2702924480961,3.6391827822092417,-6.2039189524587695,-0.4818667158297443,2.628288069460074,-4.01622767412357,-2.4304219549927373 3.171804673223744,10.210296451822872,-1.2823983596631652,3.3529704553233857,2.349209364846774,-5.210173251472921,3.5980406330101977,0.5271145498838843,1.1024348492331593,-2.340297712094894,3.4035119792009314,1.4637238441892502,2.100677553869573,9.956653203322897,3.037503202438935,2.6113947811093383,-0.4651870736394783,-0.28202551476414506,-3.487250993243367,11.213171798410817,3.1540857029459684,1.909259217862679,2.257867871163225,6.090130114930284,1.3502370280506808,-3.906854735389399,12.266827587548704,4.027843716724438,-0.5231881955161677,5.485956744660566,-2.6079302722183417,-1.0097459676669966,-0.6216157075098145,3.003034034911749,-10.97257916692792,6.351433015623735,-6.797329826636816,1.1288483594897911,-2.164956785629034,9.419953557399818,2.7579956811764763,-8.581348548695116,9.40515073797441,10.73924217550124,3.8286891792745474,3.0702146954667437,-1.1744024340099117,9.765224231468483,-3.975559026080253,-2.049236024934567 2.076042783334671,7.277480273907379,3.3574901319247985,6.497728403729722,2.959085318799209,-0.8529894413199075,2.346909499425578,0.9105739690297994,3.5901049993125946,2.0910857366403026,8.195060945929473,1.9230762319431363,1.6476303438826994,10.158985210509014,5.793197058444499,4.2537908074455295,4.133148327131987,3.5296207099471593,-0.15229707562809758,8.620345057252203,3.7313745383435624,-4.270566011799207,6.6811856595320975,8.287544317135147,7.0210622812754835,5.439390817944272,7.23588539623631,9.20168742169335,1.8259429220993386,5.567149451904252,2.823527492247716,3.4326533792800276,-1.0608635574365752,9.410067450877293,-3.288351580974595,5.870628993304685,-1.478900709905655,4.128457100783841,0.5810268779371776,12.962045633317034,4.821117463119503,-3.2997265204869417,6.8258077326901585,8.458541267444247,3.5674521763605958,6.337803454218854,2.449629812146418,7.068883386483478,-0.10599995601759171,4.096745120562474 -3.762593497992012,1.8805692783434047,-11.086203340862465,-3.8943279249983815,-0.014845803271620495,2.9168706479749207,-2.976299847013826,7.774610783934082,-2.5858790892085692,-0.9028849346287502,-1.4322108205563397,5.281862410967542,-0.12740615799631616,-3.0424114181678212,-1.2364473490073162,-5.018195011248153,1.530745480796058,-0.23671964408764712,0.5877740379450543,8.230907399310368,1.0859324663312258,-1.5304689475734496,2.4260188504715967,5.524980972527206,-1.5473693205183452,-0.21380046511169026,4.951890797899668,-0.4180563955983123,0.14637670099448874,0.3272052918179542,3.3606582104892624,-2.67661907314657,0.2731628176608587,6.133120327190376,0.34299721703911645,-2.3786377425122787,0.1236845248893732,-0.05582643788257946,4.086987732352867,8.94071985077262,-0.04669962019787777,-9.668424699396956,4.418343885449603,-2.950022992318261,10.55974343934477,-4.354992817205261,-3.2369713040746855,2.3787619686413803,1.1320994131384934,0.7481709286922094 3.2103352406722054,0.13005246939556758,-1.4554469701813426,-0.8684130200716456,5.987990813448036,2.990811987701159,1.3242978743215015,4.315611444414213,0.9037889034048079,5.2646991777731955,2.6340097183377766,4.8652591183455645,1.686417319591796,-1.3709618425297703,1.1578616475272896,-8.239605727956112,2.097531269782538,9.08837080755869,0.9562252935429252,6.631286788797967,4.893689854734893,-7.656767054239764,6.519488161463312,6.479050410160541,-5.240089175491322,2.1903645260014057,-1.8091690603194746,3.7035565943712854,1.8231165080457592,-1.413638584651508,3.2971798524835525,1.0169190215098816,-2.234369586535126,1.044925453956788,-4.926978705200638,1.6378295546831185,2.8195335813122417,4.2935286463389595,1.4530978648464214,7.841574743438371,-2.4603153838153173,-5.059941609280764,-3.317383007290497,0.6875189803722772,2.7084122204127175,-0.04950338307058477,1.1279717342918256,3.0654298495956773,5.440047569663381,5.190695234104885 -7.554209466594839,-8.285723067026689,-11.88858718523325,-7.753897725207418,-3.0338982638054137,3.772305810884731,-10.072413451176933,4.111733696381326,-3.730319499012949,1.2981757775119025,-0.18632357115582932,0.3072519429063408,-8.230187693277497,-7.104812433266381,-0.38285325186443103,-11.752627784073221,-0.644880932424017,1.163875436755526,-2.99454820607997,2.1498267612809254,-1.8240244237768706,-11.61683498531475,-1.4085356976011258,4.259302960154287,-1.0532891934365072,1.029946939878847,-6.34105866471295,1.401182404534639,-4.106276232427856,-3.2438742701791803,2.7890326458504835,-0.5215281818636646,-9.270879746773058,11.18180644327341,-1.8991454161661918,-10.671815595185713,1.2773611142381056,-3.7622117482462123,0.9790853741743673,10.938106389242192,-1.6730989213233025,-14.753444497747479,-1.0284626239043955,-9.329361199353292,2.2810364525869966,-6.6557521739543075,-1.2586779889393924,-4.165491653345672,2.739887005856594,-0.28438718867147506 -3.4748276217737555,-1.2403165519346562,-5.59081246813985,-5.767307738348996,0.999564501231382,-1.6049391628152931,-5.578764055871952,4.343716308476247,-4.625797664050756,-3.597892396217396,-2.1716616855981146,3.704241470787067,-5.219690238810669,-2.7764743855002685,-1.981431777749128,-7.071632115883489,-2.929297265488367,0.29271729032495086,-3.952604294045526,8.361926532739473,-2.4058084636622117,-8.58194223254891,-1.919647309433491,2.4605729366494034,-4.402384539778496,-2.127146594391939,-1.0331599922803938,1.2438611158161708,-7.062766244928589,-3.2393529478165446,-2.515601594134307,-3.787045383521278,-5.773612066287463,2.139513025404549,-8.761476853898115,-3.8062127849191767,-3.6020772324201955,-1.4602742927665586,0.6377664572831163,7.437850190445136,-0.09107134117772064,-15.936521374386581,-2.2149945231553425,-0.25275891809139706,6.098054156546235,-4.3548586050973315,-3.0309603325747974,1.7367968405537764,-2.011790890473515,-0.7220492596927999 -11.184875949806285,-9.43792539916453,-4.798132125820539,-7.991659963690572,-2.1707194855707996,-5.897521094808245,-9.319170345491445,1.9101009639360575,-5.729568081948696,-14.371188169308398,-6.725970928189548,-1.488754614816321,-11.402210877740396,-8.544061414130374,-9.59363568873102,-5.975230484124075,-5.272280797526939,-6.944312567141145,-9.083495943213515,-2.038989438920838,-10.8710427609914,-9.838648002435624,-10.237338210577699,-6.115233890672722,-3.982798203550255,-6.000770960437904,-8.952053647325906,-6.133563888880784,-9.752461553370514,-5.071240561362407,-10.291386584444144,-6.140480260433251,0.9019984820206988,6.46842919679397,-2.4666324059201736,-6.076719492448624,-4.902827277767846,-8.756027623775426,-3.45502312100792,-4.9952992543495665,-0.9472987798294491,-9.980185051345394,-3.291799342239423,-2.408045534926377,-1.7690300957960927,-5.388671030869511,-5.021163098907956,-5.0005314578970665,-5.493560845509165,-6.408349750076943 -1.6137093962438924,0.38113061302706563,-4.720133762521122,-1.359260679812205,1.1769475068147592,2.2151504480670976,-0.8066168708124679,4.788201574085052,-1.2519760419300228,-4.83445998785843,-1.9452320545297754,5.049147285600128,-4.083023959272304,-4.359290585449762,-2.6495793197628332,-0.2686610114610475,2.4249985852029607,-1.0145489868409805,-2.863087655404946,3.3140265758392236,-1.4844452404606643,-1.0790638852027499,0.02578698779278632,2.169714873191377,-3.5440462351935715,-2.1584825319222816,0.7110751536319544,-1.4675177944922795,-1.270611277968246,-1.5665707867396494,-3.0122243682824834,-4.510865008781342,7.017720382071136,1.3205238945998075,1.5707607965927375,1.813594988192136,-0.619746313348146,-0.7972037060173973,2.0881324378713897,0.08961678626595293,1.2863088858547007,-2.4251420194867332,2.019215140896403,1.4656053694279536,4.076809194919521,-1.0008994547441477,-0.49804405099258986,3.877234469929896,-2.4646659677840193,-0.5432036758861605 -5.959477879495048,-8.792386223375445,-7.1272141567206235,-6.6040402792740025,-5.322518059456642,2.3867081035888136,-7.80968099209353,-0.4985574997025304,-3.338164642828461,-5.120279598253152,-4.9896060637413875,-3.1362206046949237,-10.95841489508213,-9.262891976776546,-3.1144497759801455,-3.1100356656969796,-2.1133370829496743,-5.8238626436216085,-4.930317503434487,-7.895103193817922,-5.732974281330749,-0.9128236107280389,-8.936985011121804,-3.989429703321507,-2.175178974450451,-6.159901239847494,-7.851617245085208,-4.854952799102254,-4.566667601431585,-4.799404616009593,-5.665843523731415,-4.112826487928211,-0.6251324233159857,1.1962409659034414,2.840275437489472,-7.87958904039661,-0.5501359852253952,-7.617320687376932,-1.5003374099446243,-4.660154539043272,-1.3050773930512836,-3.7306004811099163,-1.9651592802841742,-7.199689532906659,-4.5180823139637445,-5.798155397543122,-0.7251649180207322,-3.7931754261618487,-2.4216524556726884,-5.618987348605578 -2.409211530987373,-6.1467465572758,-6.502185486427029,-6.635103755559682,0.7650415881797575,-1.7067020678962346,-0.18674355019841726,0.16246726849120097,-4.398336384342725,-0.9615527768343748,-4.877566190351484,2.093808183929491,-0.3460875724004071,-8.992717582184985,-7.057681590856988,-12.318740900896193,-4.233980170724056,0.9421268411285872,-4.456979682963339,0.606727012319842,-2.7904274123570025,-5.565848123513017,-1.4676330688128136,-3.0883755202105574,-11.627155224369316,-3.7855109767178594,-5.092765837498278,-6.523532062540322,-3.916189011152562,-3.998310235699043,-1.336461464791528,-4.180934288062561,-0.9372986238010661,-4.549129845289676,-6.326111050372278,-3.944504316861045,-1.4639131922661188,-2.332471345537931,-2.399708334338221,-6.471751683744684,-5.412609783980106,-4.941353131128884,-4.471925406906852,-4.722596846147836,-2.4293445403171416,-5.527796790749497,-2.3218856465868165,-4.788428206550718,-0.721246243474372,-3.3609774734356783 -0.46050425965767294,-3.0375668739612975,-4.751382860596547,-5.700331676077348,-0.816845970578663,-9.322393044301712,-6.195877047962728,7.955814846326355,-3.9611596187200666,-9.827567071407083,-8.981356885649443,2.2468782741329347,-3.9250872679384594,-2.4923844404290567,-5.384392497964638,-4.174732016377709,-4.857708640846796,3.2826540764196093,-3.162610222274547,8.89269554224684,-1.1823370473591472,-3.2040688586903934,-3.4396242021456023,-1.9829416380527158,-8.93448538292246,-3.630863163338045,-3.4855790877364408,-6.53848309641122,-6.811268959899394,-5.61713909760091,-6.7661208565227415,-1.2356032309611735,-0.8153907992496192,-0.8769045401116946,-9.108924001567877,-2.623768147155987,-1.6540569346777336,-0.5362972397865051,0.8058778357527141,-0.03335346821492946,-0.8464052587955307,-11.94577218737174,-1.9423708026682567,0.7257033288290673,2.6282435725933104,-3.598838730467087,-7.931795886606168,-5.400471481285752,-2.4337778558295584,-4.899075864262878 -10.085954847938853,-4.738014766141009,-8.607776073649543,-9.670039163765134,-2.088029692510925,-3.410446604535756,-6.322336013541976,-1.798918580581874,-4.079578636386769,-8.070369541930246,-5.548532742103587,-8.111106531920349,-3.8117217351629757,-5.193234683295481,-6.614370239808706,-8.784285796667398,-8.165781842357058,-10.794166529130996,-4.1375744344855,-7.062754719190012,-6.163840903656885,1.3936869707787738,-12.322476916174477,-5.785722725754818,-0.08020013658673486,-14.077582069454971,-1.2072361280922657,-6.988758582689389,-2.1335810926823253,-0.05740459463700337,-6.995285315564806,-5.271025108506153,-5.036540287257658,4.561791381725529,-2.5686756084601297,-7.518318619035343,-5.580525766092526,-11.631909573253218,-6.629542650428977,-1.9991170754141625,-6.651488402110453,-9.180822257790538,-0.37504932857638806,-5.607972700954413,-1.6227674407456623,-9.208588871272974,-6.2074747284424,-1.007670968266313,0.7331565809229557,-7.476992917718296 -3.2452265263788864,-1.7939758130270467,-3.105345286675024,-7.03202663812565,2.4667089055840643,-8.843406655008637,-4.640825058048544,-0.11888450574244969,-4.003498913585898,-5.884796201026271,-2.688343278163879,-2.851450553261056,-3.3962484984846126,0.7636555739477052,-3.705570951152397,-11.015095799977507,-8.664702527102426,-0.45348500204741304,-6.616643139410356,6.047902738900974,-2.6208982947224997,-8.576500146448716,-6.047872552551064,-0.7833957863553878,-5.46965985240872,-8.890002566020463,-1.5271535297295498,-0.8503198061173671,-7.291517118050111,-1.732921487857014,-7.28611335546254,-2.804904642300095,-10.430379920896225,1.9044524292473919,-17.10481880719342,-3.5095772375963588,-7.427353761238864,-4.73597888583779,-6.307087966052337,5.5071359099262525,-4.221088871805229,-18.211322657134232,-3.108270322725895,2.5258845472497273,-0.8352183775524712,-4.80668641513234,-5.384607944059866,1.0150417555182933,-1.075206643009459,-4.603909842159341 -10.286420778490715,-6.837569081454224,-1.259291568473524,-5.875683725942936,2.084852576774099,-1.3638527948430144,-7.367316465147879,-0.3078520229902133,-4.722175939312509,-5.832978578598273,1.3500797285539137,0.7481391759881117,-9.279356906069578,-5.027663837082593,-4.727276244096021,-9.506672010674016,-3.2206814522671903,-3.12608372328598,-7.800774525329498,1.7324576506878917,-8.652535357420062,-18.326269713203352,-4.4857604828792805,-0.1212045009025291,-0.5173610365763337,-0.284611887598851,-7.667500591030159,3.5959522294724704,-9.242962291755418,-2.8937340126398357,-4.179859543041255,-4.515305106059172,-6.318252259649952,9.299372821946616,-5.918788988491402,-5.163147211368155,-4.923178386816412,-5.240322021834099,-3.0843179800026475,5.2383475987538946,-0.25078385961039107,-14.046578139389917,-5.61498220267874,-0.8678107272997422,1.1829675248763754,-3.3420644007830362,-0.48828465586268655,0.1991294072771298,-2.5238684326217524,0.428123130130824 -8.992456343343694,-13.873519566127671,-8.83402243325979,-15.164517052750952,-4.041433971832255,-11.343257345036358,-13.850806813786352,1.6705098195745007,-8.172051333500686,-15.448700607312588,-13.525017893911066,-6.9196178249616604,-13.272063443743873,-10.711284033325336,-10.890789415311056,-13.609237173036453,-13.115092839195164,-5.123744200403326,-10.209055857695876,-3.229952821456267,-9.389039392504817,-7.387962520054891,-17.071807756566624,-10.01971165746181,-11.235183065642346,-15.244741180777662,-13.592919086266535,-11.932250070959103,-12.722518089032777,-9.542579273741499,-15.346015844606935,-5.774117531831898,-8.480278077126679,-0.20967629907309648,-12.979397763563574,-12.350206501244037,-6.122116454152545,-12.198224765010137,-7.2885896703785,-6.544011963016835,-6.936898070847747,-18.827161785381342,-8.247414671954271,-6.401900606925696,-6.9996164725163466,-11.487846820562986,-9.970697071224082,-9.838733633417311,-3.2812843451140776,-12.32601221920471 -2.816729221012802,5.32658948590529,-4.17297829354328,1.3896004742434953,9.45407218290934,4.0300163271892,4.472479939572938,5.618494041222016,2.541757536858791,-1.4238794346917523,6.00872919185986,5.974128500064455,2.2607585605917717,0.055198927001653436,-2.0587951406254863,-6.245765434799246,5.351214037521221,2.131048590924205,-2.1092172519136314,7.1967678066018,2.248599033256326,-7.033783511737824,6.7430749908284815,9.482318233728002,-0.19750585710996926,-1.8136525032920416,6.087705763421892,4.3620730393364315,5.706848637426043,4.76190924491573,0.7732134565096416,-3.3818448203114895,7.9246992377184355,11.763349680265005,0.29540174551283327,7.948242177895633,-0.9395306940349508,0.7050029965541406,-0.33940673521130404,10.082857104773606,-1.3413090462615749,-3.104922370234064,5.344853316198958,6.522811771693078,6.983427785024245,1.596799000349297,0.9932082667462532,12.115513981538287,3.914090959431277,5.1693802774170585 -7.864604452316811,-4.623511301082251,-4.045510338692232,-6.972166328688149,-1.8826774510121664,-7.726752056648331,-7.109022331149229,3.027375932708688,-7.612544836800958,-9.415861223809284,-6.024073709142415,2.9377560334956927,-5.242542433634374,-4.438357512837138,-7.409879013035931,-6.342151121871667,-6.276622001300887,-3.5041771808422757,-6.09843269838465,7.389065244930707,-8.276319714571887,-10.692873861250858,-4.979908645738722,-4.006758081021091,-5.475610168029414,-0.19063793162985432,-3.608296387858229,-3.424042104027832,-11.934355957357031,-4.432094943725913,-4.379460819492369,-4.702885037762384,-4.524526537297897,1.7292803927126101,-8.502634777832176,-5.958857996187328,-5.801724670846991,-3.2029277151546585,-0.24540957392456816,-0.49357256247566483,0.7131540504146306,-15.033183981328747,-3.5214417965178617,-1.46582281628464,4.9869887816396945,-4.953278976257737,-5.985852382797814,-5.6325400600222855,-6.611923367653348,-3.940958264799418 -4.053753198312593,-11.526198119379059,-11.090657180756834,-12.316181681683801,-3.976212293642832,-2.675293382326384,-9.45057293485513,-2.8784782597009766,-6.364706337961671,-2.917620593304255,-6.177637640049521,-4.815106564669545,-12.561853563006038,-9.771806569770243,-3.6494807288508446,-15.639128468039523,-9.034205392367996,-1.6951913575835618,-10.370779278352197,-2.8560601524108384,-4.611434237207671,-7.993625914931831,-11.024825650701212,-3.212003982164852,-11.00774364985153,-12.191268399785251,-10.444981233882867,-5.1123883570680695,-10.141805796142073,-7.316276061132063,-8.454056839051416,-5.02279181110328,-12.616253812219833,-2.5234257621658154,-14.91098321742231,-12.073116398856655,-5.068227842491491,-9.276114554792002,-6.9166833115957775,-0.639586099063324,-6.195428123943477,-18.11734598049635,-5.695223528319456,-7.356561648091347,-9.123876801909384,-9.914284348990623,-2.563239184429099,-4.537046235200306,-2.362049233088529,-9.399415241369502 -5.973967522542111,-5.405942541304203,-6.39579021526364,-4.83270556305396,2.2557023292631984,-3.0326755386667306,1.6430316703521006,-3.4106416184035426,-2.3323049555942226,-5.554788319034531,-1.5001388407097949,-1.3803036756132447,-2.697990957621532,-7.198388548216324,-8.652147968944394,-11.885511156972434,-3.7185100517455627,-3.857064945638153,-9.788259454767475,-3.133620226053657,-5.383185594260257,-6.631777404275649,-3.9054880662300233,-2.638480930389196,-8.234091836858692,-8.872245070656216,-3.032358491755991,-6.015622318639029,-2.309430780553856,0.14806267880968288,-6.071045165035338,-6.186141298698014,3.527248315966438,3.042659813778463,-6.0618416550265835,-0.3886962099357221,-5.345321748051242,-7.024381888759757,-7.778662919677963,-7.0337246394336015,-5.581076923445491,-2.949195571241771,0.8575411282702676,0.037048154454485116,-7.317169525826941,-3.4952217958170655,-0.9476047916763606,0.1553649915443328,-2.485002034940421,-5.916940229075898 -3.5815676098547353,-0.3950564427029083,-3.5659798033531236,-3.901778050655931,0.9944671842954499,0.8644061439445274,1.0489068160047654,-3.8486782654757756,-4.337249951373425,-1.4660948970268044,-0.3524971963082617,2.0849745463257237,-2.746074032333341,-4.530629573254332,-3.6038827229508503,-6.038285104556519,-3.207387511776126,-5.613842679555842,-6.415488402934749,0.5836426863304026,-5.184018757107788,-4.138572009355928,-3.3717181780173173,-1.3459795232494887,-4.980338314839072,-5.335167644901004,1.12122426491232,-0.4944124642270785,-5.569029320076479,-0.6694281120901733,-3.0076399603298873,-7.3529999649953,-1.4153180689012386,-4.033653508750057,-6.492358171981054,-1.3136134677511213,-6.426306435910752,-3.974548118866,-3.204972236345562,-2.8151489814257435,-1.3900789132850524,-6.641191903254074,-1.2312773011503624,0.31502437692216445,0.44772937997567974,-3.566970083902769,0.882491298249322,4.219682202038037,-5.249685058920397,-2.739769720253792 0.6083766679735495,-1.574207940021655,-4.928751380104611,-0.04455786573300097,2.7247405509223874,-0.5641060293211912,3.4983583769610402,1.622253462578048,1.0268899423306448,-0.40305576926932063,0.4527790782204637,2.570643515077615,0.8532718435682599,-3.185532724137283,-3.3722953241319726,-6.690472604396377,1.6866917829349255,4.034210981977018,-4.059534933605225,2.4368760205493403,1.5989166106486916,-3.627952094024936,3.889719571232392,2.7878605019322307,-6.850967414366674,-1.5052277448678106,-0.47059579726336465,-2.8914349322422472,2.1290480018017233,0.6382874191238885,-0.4354404586554783,-1.0475781104565813,5.749400196250916,3.083735574014192,-2.6846568295130386,2.932873128652986,0.35439336022064905,0.2503451607622419,-2.0554267229801866,-1.4576717183303542,-2.537317021286793,0.963514960958467,3.219313188535848,1.45981041573731,-3.5724270448809916,0.5478722969213495,0.356643414173029,0.2857750830175465,0.6745474735124015,-1.3292036101694817 -8.90713942337615,-3.7554500769686694,-6.754638460515207,-4.976138763794888,0.5544146479626733,7.072238594705798,-7.844168263210881,8.736975681481828,-1.9024550826866966,-6.809061606910573,-2.6009380009667034,3.3785026773822686,-7.7077878572661,-8.626363692989337,-3.525463798616503,-1.226720006318153,3.6418112281727324,-4.225500623647959,0.7938106400528626,-1.58061333952493,-4.099528063209988,-3.6802420966555,-3.144363836538071,1.902471552472913,2.7437250209924016,-1.1507963929864642,-3.8430866552842877,-0.5820745018172369,0.1678574966125379,-2.9716329950507063,-1.6357291947049586,-4.412078964956937,5.681686767530697,9.600323502063864,11.564707915546913,-3.281887753746872,2.9524020390200656,-3.900077135368891,4.833588952152607,4.429054618880993,0.6211499903423371,-3.85685545255036,-1.2531270103012755,-5.067707425608918,9.138033003034533,-4.559900898456317,-1.9557218029966785,2.396164395540878,2.477650132713112,2.7326605176924694 -13.634960459695323,-10.421269659248777,-9.697133745681736,-11.897614869167343,-0.5110302445002843,-5.531588073075317,-8.202833466863439,3.6961839264792853,-6.273470407365006,-11.294560972989444,-8.664655283788871,-3.1442273354480035,-3.538810637498657,-11.312006721796845,-12.935959221330204,-14.433688113597878,-7.590703203169191,-6.4469846568628775,-4.5385456969568105,-3.4240539906096976,-8.757360433823079,-7.915699410740648,-9.111283769740062,-7.106998287741096,-5.777038322244193,-7.932351557523775,-7.86919599957583,-10.4086385903429,-4.603162987948088,-3.728757918241413,-5.377363757470933,-5.2280381854123545,-1.329774551177968,7.634239190804035,-0.7089000906143736,-8.780486131041402,-2.5797944198211162,-9.494370754574513,-3.9394063045507908,-5.039021395439556,-7.389528554110284,-9.648217955126052,-4.65130305539671,-8.493250687578517,0.8371491816223955,-10.027045220475268,-8.7313608356483,-8.407919417828001,1.5019687175344163,-5.451801773899619 3.335596982410327,8.437807683541246,-0.670812651516228,4.293725849159362,4.62808752308888,7.686736343130516,4.4187275493372695,4.009084186230692,4.078722931899185,6.817964167316108,6.207632766956438,3.8859934169228274,5.851087394276057,4.571137373305591,5.740019576367855,2.5736855664498766,6.50863997217803,3.231710640437384,6.289123360972438,4.486220324376345,7.068808934976632,3.705065488802358,7.861450858684627,8.895467415337567,5.476996808810816,2.8622879408330437,8.639543365365546,6.672104830238277,8.543281651250087,4.790865753393792,6.877652762905953,1.6979768578036467,2.5926229472504194,4.0164188875524545,5.298161258185286,5.535215832011583,3.381604812922075,4.765492270763841,4.2395140464015775,11.055810223499565,1.0911625972075323,2.4096308427256607,4.6191226946959,2.6810772090052426,8.51860742838192,2.5509693750951103,2.643119093943485,9.621553575207242,5.896975104732698,7.249994262720576 -2.158010534213452,-2.427268160027485,-8.201976624702613,-7.364153933224479,-6.0565145599511885,-1.6084761064123996,-8.454389213284513,2.7863080161281895,-5.4532844450668,-4.912109623061697,-7.542429428513923,-0.7711587911175217,-7.596634832530524,-3.878957640375365,-0.5988215047820331,-1.0128002116573223,-4.9662278387214505,-4.561287798872339,-2.2712475407364003,1.9889382285575787,-2.721580425750468,3.311731170791008,-8.239218827715487,-2.4373450539307386,-3.2804719626509584,-6.525751230425007,-0.8331111237231885,-4.016910270452575,-7.31938223185478,-4.892232233470024,-4.806103208116551,-3.739261977085688,-6.2237078709762175,-4.160087695175469,-4.958059384570919,-8.056838465141952,-3.003075395793309,-4.4517600582997785,1.258144893766416,1.2023584202017847,0.2915117314782981,-12.72412853733964,-0.7508995182146758,-4.760757900213946,3.275046938242803,-7.012117114299776,-4.610287366044299,-2.15843538405114,-4.124351666767309,-6.201030018979541 1.322492546162537,-3.6840452286458256,-4.883037570054301,-4.637892054781712,-1.9692875608603495,-7.615914752648039,-4.236139993783813,-0.19987357474912493,-1.3275802811719055,-3.560329452448331,-3.985862631781782,-4.725420962718958,-4.393448398191093,0.4852614394215994,-0.9399585571074849,-6.583380202885288,-5.9992670882280414,2.3722303883738682,-5.368116066207754,2.172362875585203,0.9736679247847224,-1.493751511901182,-5.043964251001055,-0.894062377415235,-6.034716582760114,-7.875932129078185,-3.0082177636717864,-3.8792935090668323,-3.9953486777739244,-2.328710608481174,-6.442084140975963,0.6950045542388184,-7.0192921090345,0.17976473452781372,-12.714220215699031,-3.885125680617923,-3.1681404568208467,-3.589342284431942,-5.176119237959892,1.6518666533072937,-3.336817422437928,-10.565302282110462,0.40872659420156543,0.2637532807258176,-6.985192067472285,-3.067516248091323,-3.9407540385082687,-3.274444012788762,-0.6617294601354327,-7.11404059390768 -15.286535144178803,-6.519121901704113,-10.555853389881271,-9.969391578421302,3.992042347956597,0.7405758311335129,-4.656525483304414,4.544238147696028,-6.881048832812058,-5.596489976575812,-1.821363532077509,4.343677010083991,-1.0933704300215927,-11.390094990084165,-11.521964563782044,-17.949301720539825,-3.222001874934429,-4.929650815124685,-4.727141837446396,3.4669564138853577,-8.560445348617176,-16.452562172850076,-1.4136273998041897,-0.19935812289672383,-4.749549189132131,-1.3346871397493558,-3.4948436367223836,-2.8769632972264736,-4.487122495775209,-1.1463222101518147,1.4459669683536243,-7.463764627820022,-1.2513047726588382,11.451976046018077,-0.7982746945010553,-6.101451727095413,-3.3132534533699296,-5.837104256698291,-1.174276626488636,3.033692994850375,-5.312653467472748,-12.94983117356348,-3.6535753425489084,-6.733131884790879,8.72676903982495,-8.569701809240282,-4.680254882728522,-1.6972585996283236,1.4821929260303346,1.2081347289296795 -10.69179829741132,-0.6253060626441068,-8.139633346227754,-7.576665047168744,-1.328612186708466,-3.9656967754563643,-5.607157732342566,0.5833333931569409,-4.517307591589591,-10.849567614514218,-4.566670685531744,-4.241377917195824,-3.907399725973984,-2.8063067281647207,-6.499838060623312,-4.817511656040364,-6.2747980883582,-11.993391947076518,-4.786501418980497,-1.4872451564376172,-7.043041506610672,0.6651967546786184,-10.395213430412715,-3.5916040919811065,1.582159677831699,-11.826779733986855,3.277462717885294,-4.86096839248186,-3.5911390392941898,1.2908144961703374,-7.141092324108061,-6.784945529624967,-1.6853988808991698,6.527337441477599,-2.3660133884837258,-4.4357259023923605,-7.428906576567887,-10.035408505362868,-4.397731706433075,0.8101115378455115,-2.9720820127875616,-11.059181745737654,2.8399797376956526,-1.4832931498142363,3.9401858454764516,-7.267779081432085,-6.417711743742028,2.574716742442561,-2.5479377191151737,-6.384940900926946 -7.935448600210236,-4.85115407654922,-6.118155852302122,-7.445841504887703,4.69445752047382,3.571464139968706,-4.392490195639987,2.286532959546379,-0.7889088550893466,-4.1289448517187815,-0.799288269596124,-3.1568946327760545,-4.277143278078024,-7.31098525662761,-4.890859719572984,-11.32311745450883,-1.9415703173193277,-3.482632933419988,-2.2177240679936383,-5.607236273958332,-2.6037819690461643,-5.390060142372762,-5.723758420932352,0.5781384039018924,-0.774569898329041,-10.023511425889769,-4.741136480904549,-1.5784069035931343,2.3218597166373898,-0.9048790032685043,-4.462314256529328,-4.374871930649975,-0.8059107027109633,8.19018992781231,1.4481449797011283,-3.042205385353898,-0.513598794696073,-7.546452527366029,-3.9570124400528295,3.546679391253792,-6.9383129663870085,-6.2095440396027115,-3.6568961386911494,-3.795702631569623,0.40790466717350693,-6.397987180824883,-2.1509953039854617,4.33916070029095,6.509100474507605,0.23869995295883206 -0.936168249703647,-4.2001540690058405,-10.601886138872064,-3.9298753496282615,0.7842261872812455,2.7782004785195156,-1.4205354654342948,4.198132148900821,0.8866382125933043,-0.9108051169084672,-1.835244771014941,-0.006743548601560165,-3.299504979487276,-6.492297376093685,-2.4025919327992282,-8.55490554937997,1.5052485743633304,2.8464081377681665,-2.963840858802458,-1.0356831508711624,2.612196347915468,-0.635199594964561,0.0695095529989227,3.556759319230447,-6.044092942489971,-6.240994480932775,-2.5746190678120837,-4.4391043933775425,3.700510671588064,-1.0825747085292143,-1.7587093514210181,-1.3807995847368044,3.749567905843403,5.386089949811943,0.17246197887844916,-1.3485016642847993,2.370194589380066,-3.037141350619949,-1.5142842901741458,1.6081852438219013,-4.514166841458233,-2.176110784005661,3.092787560647923,-3.308838984547929,-3.3130322327816124,-3.4527170007551597,-0.9779189350427924,0.4237072106538926,3.991955840138081,-2.662141929626336 -9.10871451073854,-9.909731976419565,-8.53351010701348,-8.768842983662894,1.744364061926817,-3.3948338223462238,-5.15217484575674,2.189052205429258,-4.421230426792009,-8.479802582199298,-4.526621492927047,0.012909380198054368,-6.936549779114789,-10.773004178797645,-10.189054504357056,-14.929508084942492,-4.310382978165381,-1.601414234968976,-8.978588214162658,-0.7235233325839583,-6.730995392405331,-12.51902617512536,-5.199327674992448,-2.4742692096194325,-9.221248349074614,-6.768977577621099,-8.830981731891445,-6.39980110518586,-5.409818139220484,-4.011653256041741,-6.526716155619278,-5.6303885041073,0.694977767222628,6.830265812491261,-5.382303554644421,-4.847833488539608,-3.0290811137297915,-7.174535907124568,-4.956193435335653,-3.1718357605791883,-5.568810747301724,-9.576168877281933,-3.3667677004969994,-3.6792009793757527,-3.584256926801501,-6.239906624698822,-3.8037305097858933,-3.8640009096745933,-0.5859042337855684,-4.621763426106328 -5.443164862667945,-10.316906332716995,-9.60366141203787,-8.9176477729131,-4.834247073646126,-5.6413701401342475,-3.4767490035995676,-10.119273911194918,-4.693075947288248,-2.5573551315836194,-3.279891907507761,-8.87005573675074,-7.843736506517106,-6.8890121286071615,-5.4601213958771915,-14.832933978456401,-9.868070257887307,-6.063372270983299,-13.063126978947663,-7.724043232861963,-6.29130859586575,-5.196798847739515,-10.86709942018541,-5.970834103818813,-8.801503725029832,-13.34931674019462,-6.485144703546523,-7.1281192852151865,-7.512467622482566,-1.8505842787589761,-7.954857315597954,-4.82038651124717,-9.445022778806667,-0.755528278459761,-14.38497933419088,-9.272453995486602,-8.178364509523355,-11.522131755681611,-12.1622368425924,-7.223968407942709,-7.131578629058638,-10.208662269003046,-0.23798683544347865,-5.21926379305456,-15.885812571019395,-7.35852373124673,-1.0362278409659955,-5.3395799256707095,-4.7158513906012685,-12.58269893725782 1.255766002879084,-1.5113879751616937,-3.878116244189829,-0.5741484536796778,3.7585271507119495,-0.7130116699554683,-0.7922397078319174,4.419313820231582,2.6297804276976757,0.6343164296184551,2.0579512256356614,0.2633819146125042,-0.8414455607050664,0.36689149888499784,0.43387206508008136,-7.189175176677443,1.6937177824046388,7.614179784631681,-1.4946295805823013,4.2949028873898865,4.667983191126636,-6.031406247080584,3.8200595814732514,6.119628109103813,-2.83757699765293,-1.0703616274532806,-1.9876866117879293,1.1905899585580042,3.232090807083566,0.3551179514108911,-0.1800229099354005,2.6586991208793087,-0.4625231018257949,8.417054903322002,-4.272733544530895,1.3746552301596167,2.2503655029242338,0.9319946344572021,-1.4372602977015199,8.146640846075233,-2.6594290440365107,-4.645231428308381,1.798232574866713,1.5704051333157754,-2.0450183406028475,0.5667615993924209,-0.6392051762300239,1.2433216118986798,5.400398312645244,1.152732031137753 2.1611685934471616,0.010804808026578905,-0.10998423856517414,-1.3376352272148289,0.8303278499819969,-5.593235660166272,-3.301737662798353,4.630807486634525,0.7456883506122245,-4.967423547836372,-3.3900353958531744,-1.5367188306344304,-1.74472222665272,2.4763967876876,-0.19443426333400063,-0.05170188994989666,-1.9935345392421517,3.6974254362835035,-0.006444923052492557,3.8604946964798623,2.4003370494954948,0.6211262184396721,-1.8875903349707013,0.47802386538343367,-1.8920061148950973,-3.5439297909517844,-1.3344115421187497,-1.8314122572505822,-0.5234209062404014,-1.8008261198916213,-4.841692221635926,2.0026308011039564,-1.1174298258615962,1.524560433495988,-4.892945889489397,0.5215602641149054,0.26609277285836286,0.11113965109548685,-0.4569261066098628,3.253955000308916,-0.7930816705288671,-5.221738597101137,-0.267105851821114,2.750196036536411,-0.2583132734297112,-0.18813956312001062,-4.1965963390274945,-0.7736236091493834,1.6814896852049168,-1.8553921067558448 0.6300058556089572,1.5296700056324892,1.5590171491832634,0.3361083692525337,4.476363689871827,-6.178463682952254,-0.2564521235394456,4.043216199361571,-0.7749595311683108,-4.944878745013207,0.41062776714671356,4.400909287099413,-1.3625174043875616,2.632631841644263,-2.0619311483940943,-3.0653650402597026,-0.7291446739888593,4.981290028619727,-4.241709437658833,10.712166147820902,-0.17897871693418121,-9.437613617417892,2.7614655264439083,3.2182247987229458,-4.350056531335546,1.1706083627445063,-0.029484443803964422,2.1422662963607486,-4.438731087174852,-0.8887952017866936,-3.5553295185504386,-0.5982863050389136,0.2389296672832184,3.3969389795210576,-9.712655054991297,3.786191922680552,-3.0072146080858406,2.43869249366537,-0.5762202073627374,5.2184626206832325,1.4118457119225263,-8.798830119128171,-0.2351666154383505,7.281224939536419,2.5304763145398814,2.2286594492406793,-2.0163866340739047,2.272531011185459,-2.2219258632979653,0.7088648774129065 -5.79711194524251,-6.612031715176755,-5.422644681895455,-7.517593715938296,-3.746038451615285,-4.366032249222548,-6.8983242559834475,-1.4889607144976775,-5.517785495780723,-1.0843534719282375,-3.494808588840317,-2.931936138190216,-3.326269093419892,-3.6415938185838135,-3.0821947947550283,-9.579832767478274,-7.037535355492072,-2.081882373673837,-3.421813581862636,0.875691219374973,-4.4038090026660806,-7.389803197813846,-5.237439687488392,-3.4193988063894563,-3.1067686942595687,-1.5783483416657376,-4.85593532108818,-2.3769858066807013,-7.513083347600554,-3.059112325760563,-0.37837379117458003,-0.9397872698592381,-11.733707531735059,1.374701197331372,-8.146590360125154,-9.784173419652973,-3.234143685551074,-4.248651112739375,-2.605944052421142,1.5053422481182006,-2.831525666847011,-12.736345145595475,-3.9793140034380015,-6.645118496056785,-0.9742876879048122,-6.3747178621116145,-3.798198588503527,-7.497609877912785,-1.2531461232800667,-4.002697329554693 4.501795227079165,-3.797856722269149,-4.917340717069216,-4.142392850621615,0.24295194437565593,-1.2682927562397468,-3.1949060307042743,2.4778838976085273,-1.0073531312905908,2.210621765444139,-2.3681712607776335,0.6226398815184304,-4.296014127688362,-2.513008137413566,1.3641365464246793,-8.098387858281129,-2.2137268480555394,7.69377623895491,-2.8486199466710196,4.4947029632415605,3.8231714977478326,-4.195032278837573,0.004832131057723288,2.8998001597846104,-8.727497263081611,-3.307802658685026,-5.000772538506325,-0.7473195809191235,-2.9090942091686296,-4.661790015214612,-2.2053262015251227,0.8479579522048293,-6.4897419297511565,-3.020384115060815,-10.725495677330795,-3.3387927820646204,0.7966901616225841,0.7008457086365731,-1.0236673108862275,4.302474441543135,-2.828836439068322,-9.718872369271185,-2.9827617335108654,-1.4128147041070411,-3.804840500933106,-2.689596211480693,-0.8888987565023758,-1.576983819637632,1.7629967134840139,-2.140642113332695 2.968487780515906,6.389653357413825,-4.058617080066083,-1.7666573858686148,3.699493756413152,-5.639845307755697,3.292624929860595,4.581820174139961,-1.2726283414179487,-3.291494161904586,-3.5291274893984994,3.2358306200884357,5.646705685731791,2.5731611015992626,-2.7361253840658755,-3.6797692241158675,-3.032282393459801,1.7809234751577359,-0.3857227936761996,10.079912152650035,3.22059557264244,3.5456291234206683,1.4331560429139771,1.8364601829488372,-6.676904790292362,-5.991789813196137,7.8423239180084625,-3.403218915357356,0.2661016411372694,1.1385674821342344,-2.273119509351617,-2.3860834605976997,1.2899355283228486,-3.698342506055302,-9.446690510827146,3.9550476955507623,-3.6010395208448704,1.4469067879950281,-0.5143388684759985,2.2449135351577687,-2.4413618940939554,-7.166272292989753,2.9878754729597525,5.0618964234308725,5.746839410576374,-1.7433963828683647,-5.300666029151062,4.202513128067871,-0.3461384406327255,-2.1514643775710236 -6.2673755787042555,-4.019575069338683,-4.579993821466237,-6.429867255647408,3.0901548872272717,-1.4374335932824212,-6.641463688528069,2.923425603128485,-2.1037169780074354,-6.974022955191634,-1.1157815599102976,-1.5327048728962374,-8.01122023528172,-3.6852540803446447,-3.460790591648249,-8.728804765137829,-3.096446922156906,-1.6222280798864954,-5.649445283449058,1.1589525200902926,-3.4201071247840233,-9.242266642738025,-5.768521735018762,1.53558453673564,-1.8166828523231342,-7.8085903771845695,-4.566656512815283,0.4907050647387128,-3.7580083124150954,-2.232156786388717,-7.134239853835378,-3.8077526467273235,-3.6698716254087813,8.109257898230476,-5.9656537477594505,-2.9322167755860598,-3.397592869702807,-5.9997349372584194,-3.6529486436288003,6.599203709014336,-3.004717066858839,-13.074791625190327,-2.571398998282058,0.41514383585917725,0.5342436538462094,-4.382187533257807,-2.8005007175058667,3.666622319388285,1.386792987784996,-1.5707168316102655 -4.208424669527474,-5.908040816225524,-7.316164323952265,-5.304792828441856,-0.8524281716783049,-1.8698296713016416,-6.894789514239907,8.36796673048242,-3.7528162710783657,-9.048749654566736,-6.1875010362041785,5.163207345010622,-8.621447179948877,-7.989218669581615,-5.748038033599747,-4.4387810516321995,0.08933448864790128,1.5455502780387027,-4.965067810842413,5.4567165183089585,-3.6759063231877844,-7.3770748066317795,-2.0519801457274305,0.3697565230805533,-7.40795869704556,-0.896836662118573,-6.187855172907137,-4.504725101052121,-6.098439915510005,-5.946546863782664,-5.167054253782234,-3.697150087308268,4.342080755255587,4.037672115912088,-1.240947929437005,-2.9595535574124905,0.2831407812456286,-1.720054429504684,2.5887181337325043,0.04646132487663568,0.8353110506837851,-8.658927074462532,-1.134911960326436,-1.7174607769200247,3.11183929028161,-3.3399188427660413,-3.8562039897609703,-3.3573883746662556,-2.9035202918282503,-2.5704661699583955 0.21380924706806548,-4.106652459992706,-4.827738193353805,-4.580126923696446,-1.178820719520894,-9.09797335248272,-3.651198457841796,1.8472101298698806,-2.828386321190484,-8.814523069107153,-6.241591079439768,-0.6848051324017099,-6.483809414062534,-2.352594841671538,-4.7699364119901455,-5.462372314945073,-5.264266771308365,1.5111060242124683,-8.411792745857642,4.601065347478929,-2.0890200984562326,-3.629158996560051,-5.07842685823857,-2.1376878826950696,-9.817801539898994,-7.559221014522796,-3.837710197693899,-6.1226327207439555,-6.999434121635661,-4.020201542928028,-9.596414402186793,-2.542715731268743,-0.566393580134799,-1.1080838786713954,-12.458614006158747,-1.5480433649865768,-4.516254101389014,-3.431902566790592,-4.218193517683515,-2.91284734793052,-1.6302428425149518,-10.037433562628477,0.40392414595533754,2.751063569510663,-5.586989810205397,-2.3535665918986495,-4.531527830331861,-3.041307293275281,-4.834587450975379,-8.076209535597702 -2.5157972441760164,3.861474539275994,-6.081922962926324,-2.253121573481353,0.29707061666426576,-1.1522546344396358,-0.9051680072598539,2.1248229452009855,-3.2350383825212243,-1.729142418449925,0.3151992402200592,3.743696386857375,-0.6433392033977932,1.0365648867725423,-0.5746413336971554,-3.30354794653822,-1.372842337341135,-2.340741405562945,-3.202013166064814,8.911629299134223,-1.1160391537995116,-3.0691120272962413,0.48163201671133593,3.6494991029366974,-1.3901527364289348,-1.6617587419838227,6.455116106207098,1.7337328048055052,-3.950409785808194,1.3855390133991596,0.13477646604786364,-3.7656748144149104,-2.8060292011784034,2.461498055938796,-6.976065687468839,-0.6247919731575923,-5.056967479207937,-0.7254947432454417,0.40071790852841027,7.093522934962321,1.2734591907551764,-11.600663504556925,4.046426069287369,2.0483322137722255,7.0900851700854535,-2.296922635725412,-1.9117115883652815,4.531976969177347,-3.6128649728616824,-1.2297345663783765 -1.4709527332060666,-2.2859336381213025,-6.366740161078279,-3.8668413995401605,2.6578017394064126,-3.2506450254491632,-1.2173520772363242,5.932485189954048,-1.8421482456853,-5.971358665246308,-4.010443873205928,3.8435862800281413,-2.1707564635734062,-4.817370432940268,-5.681423206694016,-7.448644588561232,-0.9268428271123073,2.973271157872109,-4.365028901978009,6.001790025978922,-0.4869610394021109,-5.041136545545907,0.3303504974603957,1.4075421989437409,-8.723260378056946,-3.819521114654818,-1.8764461725979036,-4.561530535150908,-1.9993294415304947,-2.5295737301750263,-4.0612639906541554,-3.072107721333424,4.171042524918059,2.4192510412023562,-5.177943102833455,0.7405968696225859,-0.9624313364176484,-0.9139972490284108,-0.5641809967761837,0.016098440644816137,-2.385613190236393,-6.535643254631346,0.42409625517214655,1.1791367569119149,1.2574325875342034,-2.4049640605737292,-3.6825695993974445,-0.25044441910494686,-0.39131684489191354,-2.3258562786012233 -4.502972136126779,-8.341294558406757,-6.229225120514656,-4.5280194773049525,5.114406631935327,-0.6075487950445222,0.9034456708753881,-0.6469614678117717,-0.3539215148551651,0.8149451107872594,1.9968122893822866,-0.01555696409508344,-1.0653015041685312,-7.558568524007801,-6.48617708868817,-18.08052876822756,-1.212043695133324,5.016749715435796,-7.111995952976846,-0.6457888180052342,-1.089824177015394,-14.577386982048528,2.3331701201261374,2.129587007067722,-8.911404064518747,-2.6447259920243713,-7.664303362763219,-2.2806965471569853,0.4866922734585051,-0.6297186964903381,-0.3714064653258531,-1.4009365374710714,-0.5050836364851707,8.253642577312648,-6.407347010637617,-1.4415315676354763,-0.15856665033966322,-3.068769004211492,-6.266264845547968,0.15689609202492516,-7.277012853502568,-4.1746778846906984,-2.214132640562297,-2.439913138710221,-7.647812580997913,-2.53409408616761,0.5141172584262828,-2.432003281096029,4.245307732274421,-0.6118758022007285 -2.01479033082884,-4.530998618071512,-1.6047836765115693,-3.2515119836660276,-2.1241944946230356,-1.4549963793386531,-1.8705631821635107,2.9052083710968155,-4.168747767680897,-7.077551278290954,-7.739486652307728,4.390206817595272,-3.7939704528260463,-8.41939285836812,-6.759654080824167,0.7303805589920522,-1.110602174879513,-2.3386670317784115,-2.49684894205888,-0.5820883478205349,-5.487302820045784,0.06538912313821693,-3.29492859019282,-5.982291401553539,-7.660156358707505,-0.37381631674627597,-5.121784158755415,-7.321598540873365,-5.563156144627303,-5.7806391009918325,-4.271500626844832,-4.783404854900069,7.255436362918928,-7.0678084864709,2.6710939726535132,-1.4753842424823231,-0.028990241251812904,-0.9767542471251933,2.5132843297926173,-11.67627904917873,0.7829338873184309,1.7622472787799983,-3.857855204946766,-2.4431902792777813,1.050594116035797,-2.455733847751699,-2.351489081913167,-5.197188421036548,-5.4675070848599026,-3.088960973339876 -9.952855521047445,-10.728312270872031,-9.923188577961927,-8.392247091897614,-4.099297467851235,-4.351192164322134,-9.727309905092048,3.9746062214064306,-4.698692418768775,-4.73933674560476,-4.756072850188566,-2.1134290969418568,-4.7213837308511355,-7.254953356168473,-6.420500981342299,-11.932520514437357,-4.31637147131138,-0.2866056530938419,-3.3795095619556346,0.8309916242757529,-4.802970818840804,-10.786433317925862,-3.5672504110204,-2.1951930448978714,-3.041917625252888,0.8565557967311681,-8.144505384869698,-5.349543879158311,-5.135970335998552,-3.3746527253860528,0.49765923946381907,0.4417090769274985,-6.368402363414072,11.09908900070949,-1.8374484352650158,-10.680783051375847,0.4645481662700477,-4.9104574272728785,-1.0228818111549787,2.383252120701208,-3.286172525012196,-11.142411743918798,-1.6542791639197705,-9.441947417261055,-0.3539808572536334,-6.731589662978496,-5.824192462520308,-11.820196243727187,1.45611632525604,-3.663612060425426 -7.834492650183964,-6.8881359897353684,-8.83520715690916,-9.08754280056328,-4.516252265442722,-10.50581975718381,-8.727389920891166,-1.1440273322753844,-5.288395362744433,-10.487653820062238,-6.490168349051586,-6.879295100841678,-7.8109295258525835,-2.7052809015666597,-6.244232178881896,-9.451076044146205,-9.762351311998303,-5.89394256028522,-9.307704769348172,0.2496942905741535,-6.326460051544639,-4.9722475324982165,-11.2009670834969,-4.880416898028658,-4.343743440975308,-10.932580443262555,-3.672921832858218,-6.840317757325597,-8.660704647298775,-2.158029484781523,-9.404193861652569,-3.2130395360900614,-8.279977268406997,5.680426131327992,-12.36229968335578,-8.45353142308068,-7.619863983439451,-9.787749624109443,-7.453136198393764,-0.16004684956609294,-3.7775576828477666,-16.238099075038726,1.1119026615684853,-2.4633457550777704,-5.390922555094757,-7.072969848179285,-7.187921316593306,-5.681605178746404,-4.175886068868282,-10.85890907775043 -4.836175332908319,-8.872051589936289,-6.478268280740753,-7.37354742684208,-4.164052645398226,-7.1607218406966435,-6.910209336288325,-2.0423420690749703,-2.9749892063767955,-3.5927821682568877,-4.6161762610129315,-7.543324633998639,-4.370720951327358,-3.2938837788371584,-3.954037998828343,-10.308511603056942,-7.6292224895786696,-1.0506523802108,-5.046318564204038,-3.217481244246015,-2.8644285646461656,-4.585844215260735,-7.316651675948672,-4.39970240418158,-3.9513893045651054,-6.287684840720005,-6.900190166543439,-5.938957289815281,-4.476297787294335,-2.4565286744436117,-4.147270818641489,0.881499599606903,-9.342151355524338,4.198586671644002,-8.291029751803187,-8.954058066276037,-2.398758826455889,-6.672518440983416,-6.1454934582760075,-1.0581601445582796,-5.243846081348415,-9.474199880937094,-1.6621501412400452,-5.956852803896696,-8.193603062261856,-5.811102475520174,-4.784538976795078,-9.153336735772017,0.6768566629724766,-7.458773394162623 -1.2061910210670717,-1.1998295108607935,-8.914576395451874,-1.544942997349061,-2.4707614895159264,4.702579835387483,-3.919296951080721,5.233759438109315,0.9593269522102859,-1.677033800029153,-1.046390461823699,0.6628361994775973,-5.642804900282866,-3.271670951990915,1.4134429983828247,0.37292349663824353,3.7714260548006777,-0.17282006207688028,-0.6316340159535154,-0.22326778301527872,1.9679018481015031,2.9591682142086144,-0.5797693519280196,4.408109324045217,0.6723185056647005,-2.779170689758742,0.4561600771054529,-1.1915471886981468,2.619012805088464,-0.464366161518015,-0.7801584429501296,-0.7260782169293813,3.8389481925559914,6.489864067630375,5.103508530181092,-1.7040519597231143,2.6149672047302706,-2.109148979366802,2.3015832067870914,4.890277528393632,0.7336072254012764,-1.9489518115242501,5.553694187550029,-2.6905009070400907,1.4095211882748864,-1.8818125330230702,-0.2299516593965265,1.9398140352895379,1.3902069600112532,-1.5466305901397863 -1.3264536777337446,-13.643661499224615,-15.80461366322815,-11.922101219064949,-5.701289288610631,-1.2154430790011679,-11.164457715456958,4.152070053745815,-3.9743827153532325,-3.3108804386082005,-9.328269290818701,-3.515449961227486,-12.931584854378759,-12.205801152945433,-3.2574900215198728,-14.255182678003536,-5.10251657086217,3.91685730540621,-7.097571971680653,-2.346449659335458,0.08352588002787709,-3.658959974882622,-8.073894569873968,-0.9748543517163748,-13.07459411624668,-10.453217368021564,-12.58374297207006,-9.123274325536995,-5.409969400958742,-9.090058749917507,-7.0147960507022615,-1.6140304159709946,-6.739614110040031,0.271500762250116,-8.550090802390539,-12.091500075251945,1.5943887645927517,-6.970336736958036,-2.9369629426359607,-0.3742877828145428,-6.336230887995097,-13.711090364621684,-2.969548914570818,-10.335062951561321,-9.228748152210246,-9.671485210499712,-4.21659619470492,-8.742997433779191,1.5546610315769556,-9.4371360617993 2.36709846081605,1.8816606141599448,-6.042322411888497,-3.7550099663624414,0.6360195680437222,-2.464474359055439,1.674302073335208,0.9975626685243519,-2.51914985841374,-3.5301070652330755,-5.016768959595438,1.7956407390766405,-0.7495604529874544,-2.793389769954437,-3.0731144052589263,-3.885671122491626,-3.421336696540603,-1.355157288736061,-4.149865328336146,3.5182339680174755,0.3073446283942982,4.483855175074107,-3.042899315060937,-0.7653486882585594,-9.093448954569315,-9.484486808273026,3.296791366735499,-5.320647349366821,-2.3211501117607667,-1.6435229966319955,-5.575577521652009,-5.320076815354164,1.9320117075447452,-7.720632627444835,-8.305476333756815,0.9617655147082416,-4.2160147548422815,-2.1601181463871173,-2.0889916521228438,-3.6581951459556317,-2.6620796562349014,-5.606045459509255,1.4542722535532346,2.027856700193256,-0.04438363867231865,-3.554933788697373,-2.701343043919742,3.5094014916771314,-3.3468095190198106,-5.482309579285571 -10.792990442998128,-10.561868460566117,-7.94673320053642,-12.476433343961151,-1.0074610649535496,-1.7841051492125446,-11.186400296916924,3.617494867461283,-8.101294461370614,-9.950128020565222,-8.100799876993918,0.45895156829093603,-11.170946746818359,-12.547313971134123,-8.990833934575672,-12.034915079635885,-6.712108140155016,-5.250727736163098,-6.74730143621714,-0.8178784792750109,-9.591068875411281,-11.326235646343896,-10.332010840368406,-5.052637649736278,-7.413340763007742,-7.079790039051511,-10.658479648368075,-5.388194123010794,-10.614561798710191,-8.028937022492375,-7.731677355931504,-7.5980728639909065,-4.932185062433102,1.837450321933659,-4.6535689011196375,-10.023499581028693,-3.641362822174825,-8.236341176249747,-1.5842373687236349,-1.8221225755079529,-3.7485397372887372,-15.818295648234432,-8.681618964761784,-7.173264842067294,2.318919048561198,-9.922448003505624,-5.42432323370785,-4.338747461413809,-2.2994620206168372,-4.209888365021787 -0.9416431902264577,-4.569555958545556,-4.849911994869059,-2.875484810554096,0.5572747460922333,0.598635607130009,-0.3743205395996257,-2.1065126521432718,-2.061183096012539,-0.8333027913253973,0.3363003677204095,1.9491431071909011,-6.727688095264013,-5.585104678984554,-2.3456657828933505,-7.761537354545881,-0.7899346674650367,0.8954394580276135,-8.640180443733177,0.8352084898737151,-2.5386423044897803,-7.9479659880523,-1.0665926468547657,1.3572251936168158,-7.7834071285812785,-3.5905307667939015,-4.240839701319502,-0.5682438999617252,-4.858268998750793,-2.47181098878469,-3.9342274837663185,-4.243348499969317,-0.13633878314721448,-0.2229358550425704,-7.548953630512071,-1.2510052143592043,-3.1756692617899107,-2.7610820849536473,-3.59185250131873,-1.401514547279658,-1.243542545127152,-5.855897961685897,-0.38581895837865665,0.4173427965650867,-5.740723415497311,-1.4959719647522833,2.350185273214139,1.3296776554130614,-3.775602655299647,-3.33234180027792 -7.832393834532059,-8.20950987354011,-7.564997132853279,-8.976224619993685,1.1441208368292863,-5.325228606568661,-8.162677134239631,7.92601798808248,-5.541120559139344,-12.420147724834154,-8.14807602601159,3.2019603831099737,-8.164137764122572,-9.8711038832172,-10.05769846485106,-10.281171738272382,-3.974109862387682,-0.16378310550684283,-6.622285775292121,4.588228378469127,-6.386182613666553,-11.01146771812145,-5.316073743456009,-2.4839980682371308,-9.54665319657833,-4.776765676318185,-8.482381212808246,-6.872322179781343,-7.754255739560097,-6.6365147319894655,-7.8998744643775884,-5.364854897298837,2.0267689712743397,5.00808165675333,-4.78297174021389,-4.50678234226201,-1.9719618866644066,-4.687026799394231,-0.5192401761443601,-1.502478610587691,-2.70133727009783,-12.565358242728982,-4.468671810209432,-2.3865776071430966,2.572607349479169,-6.126310900852659,-6.740958101786881,-4.59571863601505,-1.8408838879762008,-3.811109539017784 -3.3733001618770437,-4.38500537494418,-10.071602099128427,-6.4558216313991945,-2.3584881897691328,0.9852673737707847,-4.4826278085669555,4.885104466906043,-5.614556731064311,-5.350197665012782,-6.3529739851663,5.892122722390028,-7.2778004364475954,-9.707647716982459,-5.1567021215345354,-5.564179840202246,-1.0561722318558093,-1.6411112500236453,-5.557749024449377,4.313904919721065,-4.102546094100093,-3.638531293927914,-2.7911481089338492,-0.41654470656144404,-9.450646286442153,-3.113970779085679,-2.942390629445111,-5.140090830289733,-6.930140630490574,-5.534916454037467,-3.523069273590404,-6.634574647790856,2.6895983415665157,-2.503044699577685,-2.832288580137093,-4.328735231599238,-1.8095376149939597,-2.564360932057756,2.1875017710388023,-2.615676553373659,0.09520889832589396,-8.900310118188099,-0.5999015173576114,-3.7068855333897317,3.458883793674098,-5.522962509962214,-2.360229848150789,-1.5557101525868258,-5.19073330717997,-4.091443438090275 -1.9759362917396341,-2.2021744521004107,-8.04136978589775,-8.065605198110301,-0.87321764942676094,-8.904381937996625,-4.182872707768179,-0.11659425741774632,-5.641297804644903,-5.571450977124004,-5.380918052897367,-1.38118066605266,-3.9718246976164364,-1.5395379694871845,-4.304012037054972,-11.195803814999591,-9.051843844030085,-1.139874677229102,-8.236559794197982,7.1967333825137905,-2.5255855957376707,-5.0743419229108975,-6.22774141827025,-1.4286098196702008,-9.562257799446183,-9.879829225217213,-0.02126756292520393,-4.315771676730654,-8.83099651784064,-2.6787353436093206,-7.040117427759796,-4.29225502695169,-9.137397637459838,-2.060939567728503,-18.444281054730574,-5.062311639509432,-8.042761685758467,-4.935180054193347,-5.584522853049824,2.148380887151658,-3.847381670076449,-18.65132906987398,-0.40804473613866366,0.63385802049214,-1.596565650507356,-6.300899111588449,-5.7906547826299,-1.0505269800151003,-4.278463978438629,-8.115712913408997 -7.860455098941947,-6.687421059673293,-10.638338024054724,-7.16818520456826,-4.957987282474862,-0.7321030496265892,-7.837840666467003,0.54092230792347,-4.706779754578103,-2.9775045435928007,-2.661244542540781,-1.789198025507455,-7.152582866095494,-5.767705681414842,-2.9042489652114027,-8.26878888086107,-3.515888212372145,-4.1041522972854905,-5.410467800387857,0.12501373743650568,-4.689117313288163,-6.299110536952627,-5.1410570420432284,-0.47450003590882317,-1.7183179915878404,-2.8846321356511613,-3.3606061224784143,-2.5474335253646445,-5.849771342301109,-2.005733620410158,-1.0913729695789005,-2.6980451154884104,-6.484274351922346,6.933631355084792,-3.489003175973037,-9.237607804407974,-2.744763666251141,-6.142689041342478,-1.72120901008262,3.348233454720911,-1.341887928478711,-12.012614088608714,1.2909755629784416,-6.885629498833452,-0.3488679658980577,-6.388741355678908,-2.6443142051363218,-4.658756105124133,-2.162196919302546,-4.979866543116972 -10.434084014531352,-4.596377970922135,-9.587333797789015,-9.17889156110062,2.0255597319438086,4.351759990539731,-4.288967941488349,-0.3657349249330446,-7.334395139283663,-7.913211701391685,-2.231504571565604,4.618767383220906,-11.709038483402132,-12.425194221654825,-7.762843776610596,-10.849617449992444,-2.7949936894780354,-9.616738038833159,-11.550991773206304,0.014037696168998304,-10.433888855495193,-10.758601383723718,-7.775642460101415,-0.467541081275582,-7.296876455785572,-9.925696648605228,-3.0159042201447352,-1.193789181232005,-9.406025469521774,-3.80086939337795,-8.05590278662723,-13.62355479826415,1.3720425990172789,1.0719230509586029,-5.038358771053037,-3.8836430760929566,-8.198731726703706,-9.066495530775564,-3.189411582340635,-1.2737831676086693,-1.7819778843303102,-13.850333990466941,-2.752864799084785,-1.456789551913746,3.3305911508072903,-7.670492925272728,0.32146019381253765,7.643723578894147,-6.863875631590329,-3.6203646220047787 0.7009645813140973,-0.08957617846298427,2.029401229326742,0.7249907589441537,7.5793403967688935,-2.335052019959528,1.848001504396791,1.5436733699180776,4.53768846155344,-0.6566207063454489,3.8038382604632375,-3.0323353341352,2.0411155344142613,2.884747608387702,-0.6943178340832457,-6.828309667250407,0.3816137103942374,5.964210762023095,-0.7640351287105149,0.34420399520828315,4.017499276206561,-5.642464007628051,2.290299971548671,4.063814992468142,-0.16878044823741556,-3.9301820795622016,-1.8954644305774413,1.9368757830451846,5.843634160275415,2.450520721819704,-2.4513966203353394,2.837815733071716,0.35255957586687403,8.766871323582084,-3.8480858883479536,4.693406857686588,1.0682027361068198,-0.3346321675338745,-4.795985458443051,6.085972870077269,-4.781773295795198,-0.6765759493124204,-0.34852404027865447,4.778817703272558,-4.188492709262564,2.021734360111013,-0.5229924581039325,4.0254973380048185,7.66723221438881,2.4331306329307507 -5.23428939274357,-7.489455447527511,-5.723117777247468,-6.91894387793148,-0.16575672211754017,-3.8365693413735213,-5.341313582353548,1.809637674447214,-4.9604545581826445,-7.284187612439753,-4.846005184666801,1.7131600576463006,-8.022121370018164,-7.966768724449542,-6.843834439655261,-9.472036324677436,-4.113058763534158,-0.6662108666820292,-8.2537376558295,2.2655709200152696,-5.912266391355055,-10.261386446905808,-4.785932947793937,-2.325719378465661,-8.944279590811684,-4.306547630802495,-7.4222355556572595,-4.299265530060368,-8.335182854510915,-5.280988948735072,-6.531765829709002,-5.021413370872644,-1.147038541294768,1.2908245210577707,-7.637223662650424,-4.488354701012506,-3.568010219387758,-4.488928623064392,-2.6693494847329333,-2.6682928331957494,-2.058943673825312,-10.664082379827667,-3.7234126528893006,-1.774405066900609,-2.208048472944516,-4.590487113542438,-2.8310072594529365,-3.397722437226123,-3.9034027121470953,-4.567915075519506 -11.227956250461123,-6.969391744704872,-17.195634065988394,-12.936468251053054,-6.546904327077589,-1.5026583732574934,-13.493483127633937,0.884953810684844,-7.3108864645470915,-6.33756213239298,-4.780882698006287,-5.202115212019121,-12.596876770237824,-6.396349995364554,-2.6305724783685362,-12.306296307331657,-7.807620911075107,-8.84508531946014,-8.478907676365326,0.6049999860020168,-6.124545903080627,-5.707795507858052,-12.13511331191023,0.1259106644747594,-1.3545405214594481,-11.905426364883544,-1.9417343585938402,-2.8393360582099683,-8.986209558300244,-2.6322466587142843,-5.812191676627872,-5.90205954121471,-13.395599513636162,9.172621892793536,-9.565112344430862,-14.0499929468297,-7.092359390612618,-11.97664469744173,-4.375290441659982,9.354995931368236,-3.2262600809718163,-24.317197844671842,2.2919945462978704,-8.293044639811516,1.1060875517723994,-11.812943676249521,-5.5607817700573605,-1.1359159917595711,-2.677895006026074,-9.125884928480481 ================================================ FILE: tests/testthat/matrix.mtx ================================================ %%MatrixMarket matrix coordinate integer general % 252 142 1059 42 1 1 43 1 1 87 1 1 92 1 37 217 1 1 233 1 1 92 2 11 160 2 1 233 2 6 92 3 10 121 3 1 190 3 1 210 3 1 217 3 2 233 3 22 92 4 21 121 4 1 123 4 1 166 4 2 173 4 1 20 5 1 92 5 74 9 6 1 51 6 1 92 6 5 95 6 1 156 6 1 182 6 1 217 6 2 92 7 25 95 7 1 121 7 2 183 7 1 190 7 1 217 7 5 221 7 1 233 7 65 241 7 1 92 8 5 121 8 1 190 8 1 219 8 1 69 9 1 92 9 21 121 9 2 159 9 1 215 9 1 217 9 4 226 9 1 233 9 7 39 10 1 92 10 14 156 10 1 190 10 1 201 10 1 217 10 10 219 10 1 233 10 16 248 10 3 20 11 1 51 11 1 54 11 1 92 11 16 107 11 1 121 11 1 122 11 1 142 11 1 217 11 11 229 11 1 233 11 12 92 12 11 121 12 1 217 12 4 233 12 8 75 13 1 76 13 1 92 13 30 95 13 1 121 13 3 125 13 1 150 13 1 156 13 1 217 13 4 233 13 3 20 14 1 92 14 19 95 14 1 121 14 1 182 14 1 210 14 1 9 15 1 20 15 1 26 15 1 35 15 1 42 15 1 43 15 2 76 15 1 92 15 26 121 15 1 204 15 2 217 15 3 219 15 1 18 16 1 92 16 11 142 16 1 153 16 1 169 16 1 180 16 1 204 16 1 210 16 1 217 16 5 20 17 1 92 17 12 121 17 1 217 17 2 219 17 1 233 17 3 39 18 1 92 18 33 125 18 2 142 18 1 156 18 1 160 18 1 221 18 1 248 18 1 20 19 1 42 19 1 92 19 26 156 19 1 217 19 1 250 19 1 92 20 23 121 20 1 156 20 1 173 20 1 229 20 1 233 20 1 43 21 2 70 21 1 92 21 17 100 21 1 118 21 1 125 21 1 173 21 1 190 21 1 201 21 1 204 21 2 210 21 1 217 21 31 219 21 1 221 21 1 233 21 16 248 21 1 22 22 1 92 22 26 217 22 2 248 22 1 43 23 1 92 23 5 209 23 1 233 23 19 51 24 1 76 24 1 92 24 24 204 24 1 209 24 1 221 24 1 9 25 2 42 25 1 92 25 35 156 25 1 180 25 1 217 25 4 6 26 1 39 26 1 51 26 1 75 26 1 88 26 1 92 26 25 95 26 1 201 26 1 207 26 1 217 26 3 221 26 1 42 27 1 92 27 14 169 27 1 217 27 2 221 27 1 233 27 12 75 28 1 88 28 1 92 28 33 210 28 1 217 28 2 219 28 1 221 28 1 233 28 1 92 29 16 180 29 1 217 29 2 233 29 1 39 30 1 42 30 1 51 30 1 92 30 10 233 30 10 248 30 1 9 31 1 92 31 15 190 31 1 217 31 1 233 31 1 92 32 21 121 32 1 162 32 1 201 32 1 217 32 13 221 32 1 233 32 8 248 32 1 20 33 1 70 33 1 92 33 40 121 33 1 142 33 1 217 33 2 226 33 1 20 34 1 51 34 1 92 34 19 142 34 1 217 34 1 219 34 1 221 34 2 26 35 1 51 35 1 54 35 1 92 35 30 107 35 1 121 35 1 159 35 1 162 35 2 190 35 1 215 35 1 217 35 2 219 35 1 233 35 1 248 35 1 26 36 4 42 36 1 92 36 6 180 36 1 217 36 2 221 36 1 50 37 1 51 37 1 88 37 1 92 37 7 156 37 1 182 37 1 190 37 1 201 37 1 217 37 5 233 37 12 9 38 1 88 38 1 92 38 35 121 38 1 125 38 1 160 38 1 217 38 2 219 38 1 248 38 1 6 39 2 43 39 1 88 39 3 92 39 8 121 39 1 156 39 1 183 39 1 233 39 3 92 40 11 153 40 1 160 40 1 165 40 1 217 40 5 233 40 25 20 41 1 42 41 2 92 41 24 118 41 1 121 41 2 142 41 1 175 41 1 217 41 1 92 42 5 136 42 1 217 42 1 233 42 3 3 43 1 6 43 1 92 43 9 190 43 1 217 43 2 233 43 12 54 44 1 88 44 2 92 44 18 121 44 1 123 44 1 125 44 1 136 44 1 142 44 1 156 44 2 183 44 1 197 44 1 217 44 8 233 44 6 248 44 1 42 45 1 63 45 1 92 45 25 121 45 2 217 45 1 219 45 1 92 46 22 121 46 1 217 46 2 20 47 2 76 47 1 92 47 7 121 47 1 233 47 3 20 48 1 51 48 1 54 48 1 92 48 10 156 48 2 217 48 4 43 49 1 92 49 11 121 49 2 136 49 1 215 49 1 217 49 5 221 49 1 233 49 2 20 50 1 26 50 1 43 50 2 92 50 20 142 50 1 180 50 1 217 50 3 233 50 1 248 50 1 250 50 1 51 51 1 69 51 1 92 51 22 121 51 1 125 51 1 196 51 1 217 51 2 248 51 1 92 52 9 121 52 1 142 52 1 165 52 1 217 52 1 233 52 1 76 53 1 92 53 5 121 53 1 156 53 1 217 53 1 221 53 1 233 53 4 39 54 1 92 54 12 144 54 1 215 54 1 217 54 4 221 54 1 233 54 12 92 55 3 136 55 1 233 55 3 42 56 1 92 56 12 233 56 8 92 57 5 123 57 1 156 57 1 174 57 1 192 57 1 201 57 1 217 57 6 233 57 4 51 58 1 88 58 1 92 58 40 95 58 1 121 58 1 217 58 4 233 58 5 20 59 1 50 59 1 92 59 19 125 59 1 217 59 2 39 60 1 43 60 2 92 60 6 121 60 1 142 60 1 217 60 1 221 60 1 233 60 16 22 61 1 92 61 14 156 61 1 209 61 1 9 62 2 42 62 2 92 62 50 121 62 1 156 62 1 162 62 1 226 62 2 43 63 1 63 63 1 92 63 25 156 63 1 162 63 2 215 63 1 217 63 4 221 63 1 233 63 1 22 64 1 39 64 2 42 64 1 88 64 1 92 64 15 121 64 1 190 64 1 217 64 2 219 64 1 226 64 1 233 64 16 240 64 1 88 65 1 92 65 7 140 65 1 182 65 1 201 65 1 217 65 2 234 65 1 92 66 1 217 66 1 233 66 2 20 67 1 92 67 28 219 67 1 233 67 1 76 68 1 92 68 20 142 68 1 217 68 1 233 68 1 54 69 2 92 69 8 121 69 1 140 69 1 217 69 2 233 69 7 248 69 2 9 70 1 35 70 1 54 70 1 92 70 12 95 70 1 125 70 1 190 70 1 233 70 18 20 71 2 92 71 25 121 71 1 142 71 1 201 71 1 215 71 1 217 71 3 219 71 1 233 71 2 18 72 1 75 72 1 92 72 4 217 72 1 6 73 1 92 73 21 140 73 1 142 73 1 217 73 2 219 73 1 233 73 32 92 74 10 217 74 3 226 74 1 233 74 3 248 74 1 50 75 1 54 75 1 63 75 1 92 75 20 122 75 1 182 75 1 215 75 1 217 75 10 226 75 1 233 75 7 248 75 1 9 76 2 92 76 27 121 76 1 122 76 1 204 76 1 217 76 3 248 76 1 18 77 1 81 77 1 88 77 1 92 77 13 217 77 4 250 77 1 92 78 15 101 78 1 121 78 1 217 78 10 221 78 1 233 78 12 248 78 1 6 79 1 50 79 1 92 79 23 121 79 1 162 79 1 204 79 1 217 79 11 233 79 4 51 80 2 54 80 1 88 80 1 92 80 4 136 80 1 217 80 3 233 80 1 20 81 1 42 81 1 92 81 6 215 81 1 217 81 1 92 82 15 204 82 1 42 83 1 56 83 1 92 83 18 95 83 1 142 83 1 162 83 1 207 83 1 217 83 1 51 84 1 92 84 10 217 84 1 221 84 1 233 84 14 26 85 1 92 85 6 95 85 1 140 85 1 201 85 1 215 85 3 217 85 12 221 85 1 233 85 8 51 86 1 54 86 1 92 86 3 121 86 1 217 86 6 92 87 12 126 87 1 136 87 1 159 87 1 160 87 1 182 87 1 201 87 1 217 87 4 219 87 1 233 87 32 9 88 1 20 88 1 51 88 1 76 88 1 83 88 1 88 88 1 92 88 21 125 88 1 142 88 1 182 88 1 209 88 1 215 88 1 217 88 2 233 88 1 90 89 1 92 89 21 121 89 1 142 89 1 156 89 1 210 89 1 217 89 3 220 89 1 248 89 1 20 90 1 92 90 26 121 90 2 217 90 1 233 90 1 234 90 1 25 91 1 26 91 1 39 91 2 42 91 1 88 91 1 92 91 22 121 91 4 142 91 1 215 91 1 217 91 9 233 91 16 20 92 1 42 92 1 92 92 11 100 92 1 142 92 1 156 92 1 180 92 1 217 92 3 226 92 1 233 92 1 35 93 1 92 93 42 121 93 2 125 93 1 217 93 4 6 94 2 43 94 1 92 94 8 121 94 1 217 94 5 221 94 1 233 94 22 39 95 1 76 95 1 92 95 16 121 95 1 173 95 2 180 95 1 190 95 1 201 95 1 209 95 1 210 95 1 215 95 3 217 95 9 219 95 2 221 95 2 226 95 1 233 95 21 248 95 1 9 96 1 22 96 1 92 96 12 121 96 1 136 96 2 160 96 1 215 96 1 217 96 7 233 96 5 9 97 1 136 97 1 217 97 2 20 98 2 22 98 1 42 98 1 45 98 1 92 98 33 142 98 2 201 98 1 215 98 1 217 98 3 22 99 1 42 99 1 92 99 16 95 99 1 121 99 1 217 99 1 20 100 2 92 100 31 201 100 2 217 100 1 233 100 1 22 101 1 88 101 1 92 101 23 95 101 1 121 101 1 125 101 2 156 101 1 204 101 1 217 101 4 221 101 1 51 102 2 92 102 4 217 102 2 233 102 12 20 103 2 51 103 1 92 103 28 121 103 1 153 103 1 162 103 1 201 103 1 217 103 2 233 103 12 92 104 12 22 105 1 92 105 7 100 105 1 122 105 1 156 105 1 197 105 1 217 105 2 220 105 1 22 106 1 92 106 23 140 106 1 217 106 3 233 106 15 18 107 1 70 107 1 92 107 20 162 107 1 172 107 1 217 107 3 233 107 34 9 108 1 22 108 1 92 108 8 140 108 1 192 108 1 217 108 3 219 108 1 233 108 1 248 108 1 42 109 1 92 109 4 95 109 1 140 109 1 142 109 2 162 109 1 215 109 1 217 109 10 226 109 1 248 109 1 20 110 1 42 110 1 43 110 1 51 110 1 75 110 2 92 110 35 140 110 1 217 110 2 9 111 1 20 111 2 39 111 1 92 111 15 121 111 2 136 111 1 162 111 1 169 111 1 217 111 8 220 111 1 221 111 1 233 111 17 241 111 1 26 112 1 92 112 26 217 112 4 230 112 1 233 112 18 248 112 1 9 113 1 20 113 1 26 113 1 51 113 1 92 113 27 126 113 1 201 113 2 217 113 2 233 113 2 9 114 1 20 114 1 92 114 22 173 114 1 201 114 2 217 114 1 219 114 1 20 115 2 39 115 1 43 115 1 51 115 1 92 115 28 142 115 1 173 115 1 180 115 1 233 115 1 6 116 1 9 116 1 20 116 1 92 116 21 175 116 1 204 116 1 217 116 2 229 116 1 63 117 1 70 117 1 92 117 17 121 117 2 175 117 1 199 117 1 217 117 15 219 117 1 221 117 1 233 117 8 6 118 1 92 118 13 121 118 1 162 118 2 217 118 2 233 118 8 20 119 1 92 119 10 162 119 1 169 119 1 173 119 1 217 119 4 225 119 1 233 119 1 9 120 1 22 120 1 92 120 8 140 120 1 192 120 1 217 120 3 219 120 1 233 120 1 248 120 1 92 121 6 233 121 18 20 122 1 92 122 29 95 122 6 125 122 1 217 122 1 219 122 1 233 122 2 6 123 1 42 123 1 88 123 1 92 123 17 121 123 1 210 123 1 217 123 1 26 124 1 42 124 1 88 124 1 92 124 16 121 124 1 142 124 1 156 124 1 210 124 1 217 124 5 250 124 1 54 125 1 63 125 1 92 125 12 217 125 1 233 125 3 22 126 1 35 126 1 76 126 1 88 126 2 92 126 23 121 126 1 142 126 1 162 126 1 165 126 1 201 126 2 204 126 1 210 126 1 217 126 14 219 126 1 233 126 3 250 126 1 20 127 1 22 127 1 42 127 1 70 127 1 88 127 1 92 127 28 121 127 1 162 127 1 173 127 1 204 127 1 217 127 4 233 127 1 241 127 1 51 128 1 88 128 1 92 128 11 95 128 2 156 128 1 190 128 1 217 128 9 39 129 1 42 129 1 92 129 14 126 129 1 136 129 2 215 129 1 217 129 6 226 129 1 233 129 3 9 130 1 27 130 1 92 130 22 121 130 1 136 130 1 160 130 1 173 130 1 215 130 2 217 130 2 248 130 1 250 130 1 35 131 1 92 131 8 125 131 1 140 131 1 217 131 1 233 131 9 248 131 1 9 132 1 92 132 18 209 132 1 215 132 2 20 133 2 54 133 1 92 133 21 100 133 1 121 133 8 175 133 1 190 133 2 201 133 1 204 133 1 215 133 1 217 133 13 219 133 4 221 133 1 226 133 1 229 133 1 233 133 5 248 133 1 39 134 1 43 134 1 92 134 11 95 134 2 123 134 1 156 134 1 217 134 3 51 135 2 92 135 4 217 135 2 233 135 12 63 136 1 92 136 13 121 136 3 125 136 1 182 136 1 190 136 1 217 136 3 219 136 1 221 136 1 233 136 14 248 136 1 20 137 1 42 137 1 88 137 2 92 137 10 217 137 2 248 137 1 6 138 3 9 138 4 20 138 2 39 138 2 51 138 2 69 138 2 88 138 2 92 138 101 121 138 7 123 138 1 125 138 1 140 138 1 156 138 2 162 138 8 173 138 5 190 138 5 201 138 1 210 138 1 215 138 1 217 138 29 219 138 3 220 138 1 221 138 3 229 138 1 233 138 85 248 138 3 70 139 1 92 139 13 121 139 1 173 139 1 201 139 1 217 139 2 20 140 2 51 140 1 70 140 1 92 140 10 121 140 1 136 140 1 156 140 2 201 140 1 217 140 10 233 140 26 92 141 9 190 141 1 217 141 3 92 142 8 217 142 3 233 142 17 ================================================ FILE: tests/testthat/test_create_model.R ================================================ context("Creating the model from different objects") library(MOFA2) test_that("a model can be created from a list of matrices", { m <- as.matrix(read.csv('matrix.csv')) expect_warning(create_mofa(list("view1" = m))) # no feature names provided rownames(m) <- paste("feature", seq_len(nrow(m)), paste = "", sep = "") expect_is(create_mofa(list("view1" = m)), "MOFA") expect_error(create_mofa(m)) }) test_that("a model can be created from a list of sparse matrices", { skip_if_not_installed("Matrix") library(Matrix) # Generate a sparse matrix m <- matrix(rnorm(100 * 5), ncol = 5) %*% t(matrix(rnorm(5 * 50), ncol = 5)) m[sample(1:nrow(m), 100, replace = TRUE), sample(1:ncol(m), 100, replace = TRUE)] <- 0 m <- Matrix(m, sparse = TRUE) # Set feature names rownames(m) <- paste("feature_", seq_len(nrow(m)), paste = "", sep = "") # Set sample names colnames(m) <- paste("sample_", seq_len(ncol(m)), paste = "", sep = "") # Test if a sparse matrix can be imported to the MOFA expect_is(create_mofa(list("view1" = m)), "MOFA") }) test_that("a model can be created from a Seurat object", { skip_if_not_installed("Seurat") skip_if_not_installed("SeuratObject") library(Seurat) library(Matrix) m <- readMM('matrix.mtx') genes <- read.delim('genes.tsv', sep='\t', header=FALSE, stringsAsFactors=FALSE)[,2] cells <- read.delim('barcodes.tsv', sep='\t', header=FALSE, stringsAsFactors=FALSE)[,1] colnames(m) <- cells rownames(m) <- genes srt <- SeuratObject::CreateSeuratObject(m) # only for testing purpose, should use scale.data expect_is(create_mofa(srt, features = genes, layer = "counts"), "MOFA") }) test_that("a list of matrices per view is split correctly into a nested list of matrices according to samples groups", { n_groups <- 3 # Create view 1 m <- as.matrix(read.csv('matrix.csv')) rownames(m) <- paste("feature", seq_len(nrow(m)), paste = "", sep = "") colnames(m) <- paste("sample", seq_len(ncol(m)), paste = "", sep = "") # Add second view m2 <- m[1:(nrow(m)/3),] rownames(m2) <- paste("view2", rownames(m2), sep = "_") # Define multiple groups samples_groups <- sample(x = paste0("group", 1:n_groups), replace = TRUE, size = ncol(m)) # Split the data data_split <- .split_data_into_groups(list("view1" = m, "view2" = m2), samples_groups) # Check group assignments for (g in 1:n_groups) { g_name <- paste0("group", g) expect_equal(colnames(data_split[[1]][[g_name]]), colnames(m)[which(samples_groups == g_name)]) expect_equal(colnames(data_split[[2]][[g_name]]), colnames(m)[which(samples_groups == g_name)]) } }) ================================================ FILE: tests/testthat/test_load_model.R ================================================ context("Loading the model") library(MOFA2) test_that("a pre-trained model can be loaded from disk", { filepath <- system.file("extdata", "model.hdf5", package = "MOFA2") expect_is(load_model(filepath), "MOFA") }) ================================================ FILE: tests/testthat/test_plot.R ================================================ context("Making plots") library(MOFA2) filepath <- system.file("extdata", "model.hdf5", package = "MOFA2") test_mofa2 <- load_model(filepath) # Data plots test_that("plot data overview works", { expect_silent(p <- plot_data_overview(test_mofa2)) }) test_that("plot data heatmap", { expect_silent(p <- plot_data_heatmap(test_mofa2, view = 1, factor = 1, silent = TRUE)) }) # yields an error and I do not know why # test_that("plot data scatter", { # expect_silent(p <- plot_data_scatter(test_mofa2, view = 1, factor = 1)) # }) test_that("plot data ASCII in terminal", { expect_error(plot_ascii_data(test_mofa2), NA) }) # Plotting weights test_that("plot weights heatmap", { expect_silent(p <- plot_weights_heatmap(test_mofa2, view = 1, silent = TRUE)) }) test_that("plot weights", { # For multiple factors expect_silent(p <- plot_weights(test_mofa2, view = 1, factors = 1:2)) # For one factor expect_silent(p <- plot_weights(test_mofa2, factors = 1)) }) test_that("plot top weights", { expect_silent(p <- plot_top_weights(test_mofa2, view = 1, factors = 1)) }) # Plotting factor values test_that("plot factor values", { expect_silent(p <- plot_factor(test_mofa2)) }) test_that("plot factor values", { expect_silent(p <- plot_factors(test_mofa2, factors = 1:2)) }) test_that("plot factors correlation", { expect_error({plot_factor_cor(test_mofa2); dev.off()}, NA) }) ================================================ FILE: tests/testthat/test_prepare_model.R ================================================ context("Prepare the model from different objects") library(MOFA2) test_that("a MOFA model can be prepared from a list of matrices", { m <- as.matrix(read.csv('matrix.csv')) # Set feature names rownames(m) <- paste("feature_", seq_len(nrow(m)), paste = "", sep = "") # Set sample names colnames(m) <- paste("sample_", seq_len(ncol(m)), paste = "", sep = "") mofa_model <- create_mofa(list("view1" = m)) model_opts <- get_default_model_options(mofa_model) model_opts$num_factors <- 10 expect_is(prepare_mofa(mofa_model, model_options = model_opts), "MOFA") }) test_that("a model can be created from a list of sparse matrices", { skip_if_not_installed("Matrix") # Generate a sparse matrix m <- matrix(rnorm(100 * 5), ncol = 5) %*% t(matrix(rnorm(5 * 50), ncol = 5)) m[sample(1:nrow(m), 100, replace = TRUE), sample(1:ncol(m), 100, replace = TRUE)] <- 0 library(Matrix) m <- Matrix(m, sparse = TRUE) # Set feature names rownames(m) <- paste("feature_", seq_len(nrow(m)), paste = "", sep = "") # Set sample names colnames(m) <- paste("sample_", seq_len(ncol(m)), paste = "", sep = "") # Initialise a model mofa_model <- create_mofa(list("view1" = m)) model_opts <- get_default_model_options(mofa_model) model_opts$num_factors <- 10 # Test if a sparse matrix can be used to prepare the MOFA model for training expect_is(prepare_mofa(mofa_model, model_options = model_opts), "MOFA") }) test_that("a model can be created from a Seurat object", { skip_if_not_installed("Seurat") skip_if_not_installed("SeuratObject") library(Seurat) library(Matrix) m <- readMM('matrix.mtx') genes <- read.delim('genes.tsv', sep='\t', header=FALSE, stringsAsFactors=FALSE)[,2] cells <- read.delim('barcodes.tsv', sep='\t', header=FALSE, stringsAsFactors=FALSE)[,1] colnames(m) <- cells rownames(m) <- genes srt <- SeuratObject::CreateSeuratObject(m) mofa_model <- create_mofa(srt, features = genes, layer = "counts") model_opts <- get_default_model_options(mofa_model) model_opts$num_factors <- 10 # Test if a Seurat object can be used to prepare the MOFA model for training expect_is(prepare_mofa(mofa_model, model_options = model_opts), "MOFA") }) ================================================ FILE: tests/testthat.R ================================================ library(testthat) library(MOFA2) test_check("MOFA2") # setwd("/Users/rargelaguet/mofa/MOFA2/tests/testthat") ================================================ FILE: vignettes/MEFISTO_temporal.Rmd ================================================ --- title: "Illustration of MEFISTO on simulated data with a temporal covariate" author: - name: "Britta Velten" affiliation: "German Cancer Research Center, Heidelberg, Germany" email: "b.velten@dkfz-heidelberg.de" date: "`r Sys.Date()`" output: BiocStyle::html_document: toc_float: true vignette: > %\VignetteIndexEntry{MEFISTO on simulated data (temporal)} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, message=FALSE, warning=FALSE} library(MOFA2) library(tidyverse) library(pheatmap) ``` # Temporal data: Simulate an example data set To illustrate the MEFISTO method in MOFA2 we simulate a small example data set with 4 different views and one covariates defining a timeline using `make_example_data`. The simulation is based on 4 factors, two of which vary smoothly along the covariate (with different lengthscales) and two are independent of the covariate. ```{r} set.seed(2020) # set number of samples and time points N <- 200 time <- seq(0,1,length.out = N) # generate example data dd <- make_example_data(sample_cov = time, n_samples = N, n_factors = 4, n_features = 200, n_views = 4, lscales = c(0.5, 0.2, 0, 0)) # input data data <- dd$data # covariate matrix with samples in columns time <- dd$sample_cov rownames(time) <- "time" ``` Let's have a look at the simulated latent temporal processes, which we want to recover: ```{r} df <- data.frame(dd$Z, t(time)) df <- gather(df, key = "factor", value = "value", starts_with("simulated_factor")) ggplot(df, aes(x = time, y = value)) + geom_point() + facet_grid(~factor) ``` # MEFISTO framework Using the MEFISTO framework is very similar to using MOFA2. In addition to the omics data, however, we now additionally specify the time points for each sample. If you are not familiar with the MOFA2 framework, it might be helpful to have a look at [MOFA2 tutorials](https://biofam.github.io/MOFA2/tutorials.html) first. ## Create a MOFA object with covariates To create the MOFA object we need to specify the training data and the covariates for pattern detection and inference of smooth factors. Here, `sample_cov` is a matrix with samples in columns and one row containing the time points. The sample order must match the order in data columns. Alternatively, a data frame can be provided containing one `sample` columns with samples names matching the sample names in the data. First, we start by creating a standard MOFA model. ```{r} sm <- create_mofa(data = dd$data) ``` Now, we can add the additional temporal covariate, that we want to use for training. ```{r, message=FALSE, warning=FALSE} sm <- set_covariates(sm, covariates = time) sm ``` We now successfully created a MOFA object that contains 4 views, 1 group and 1 covariate giving the time point for each sample. ## Prepare a MOFA object Before training, we can specify various options for the model, the training and the data preprocessing. If no options are specified, the model will use the default options. See also `get_default_data_options`, `get_default_model_options` and `get_default_training_options` to have a look at the defaults and change them where required. For illustration, we only use a small number of iterations. Importantly, to activate the use of the covariate for a functional decomposition (MEFISTO) we now additionally to the standard MOFA options need to specify `mefisto_options`. For this you can just use the default options (`get_default_mefisto_options`), unless you want to make use of advanced options such as alignment across groups. ```{r, message=FALSE, warning=FALSE} data_opts <- get_default_data_options(sm) model_opts <- get_default_model_options(sm) model_opts$num_factors <- 4 train_opts <- get_default_training_options(sm) train_opts$maxiter <- 100 mefisto_opts <- get_default_mefisto_options(sm) sm <- prepare_mofa(sm, model_options = model_opts, mefisto_options = mefisto_opts, training_options = train_opts, data_options = data_opts) ``` ## Run MOFA Now, the MOFA object is ready for training. Using `run_mofa` we can fit the model, which is saved in the file specified as `outfile`. If none is specified the output is saved in a temporary location. ```{r, warning=FALSE, message=FALSE} outfile = file.path(tempdir(),"model.hdf5") sm <- run_mofa(sm, outfile, use_basilisk = TRUE) ``` ## Down-stream analysis ### Variance explained per factor Using `plot_variance_explained` we can explore which factor is active in which view. `plot_factor_cor` shows us whether the factors are correlated. ```{r, fig.width=5, fig.height=4} plot_variance_explained(sm) r <- plot_factor_cor(sm) ``` ### Relate factors to the covariate The MOFA model has learnt scale parameters for each factor, which give us an indication of the smoothness per factor along the covariate (here time) and are between 0 and 1. A scale of 0 means that the factor captures variation independent of time, a value close to 1 tells us that this factor varys very smoothly along time. ```{r} get_scales(sm) ``` In this example, we find two factors that are non-smooth and two smooth factors. Using `plot_factors_vs_cov` we can plot the factors along the time line, where we can distinguish smooth and non smooth variation along time. ```{r} plot_factors_vs_cov(sm, color_by = "time") ``` For more customized plots, we can extract the underlying data containing the factor and covariate values for each sample. ```{r} df <- plot_factors_vs_cov(sm, color_by = "time", legend = FALSE, return_data = TRUE) head(df) ``` We can compare the above plots to the factors that were simulated above and find that the model recaptured the two smooth as well as two non-smooth patterns in time. Note that factors are invariant to the sign, e.g. Factor 4 is the negative of the simulated factor but we can simply multiply the factors and its weights by -1 to obtain exactly the simulated factor. ### Exploration of weights As with standard MOFA, we can now look deeper into the meaning of these factors by exploring the weights or performing feature set enrichment analysis. ```{r, fig.width=5, fig.height=4} plot_weights(sm, factors = 4, view = 1) plot_top_weights(sm, factors = 3, view = 2) ``` In addition, we can take a look at the top feature values per factor along time and see that their patterns are in line with the pattern of the corresponding Factor (here Factor 3). ```{r} plot_data_vs_cov(sm, factor=3, features = 2, color_by = "time", dot_size = 1) ``` ### Interpolation Furthermore, we can interpolate or extrapolate a factor to new values. Here, we only show the mean of the prediction, to obtain uncertainties you need to specify the new values before training in `get_default_mefisto_options(sm)$new_values`. ```{r} sm <- interpolate_factors(sm, new_values = seq(0,1.1,0.01)) plot_interpolation_vs_covariate(sm, covariate = "time", factors = "Factor3") ```
**Session Info** ```{r} sessionInfo() ```
================================================ FILE: vignettes/downstream_analysis.Rmd ================================================ --- title: "MOFA+: downstream analysis in R" author: - name: "Ricard Argelaguet" affiliation: "European Bioinformatics Institute, Cambridge, UK" email: "ricard@ebi.ac.uk" - name: "Britta Velten" affiliation: "German Cancer Research Center, Heidelberg, Germany" email: "b.velten@dkfz-heidelberg.de" date: "`r Sys.Date()`" output: BiocStyle::html_document: toc_float: true vignette: > %\VignetteIndexEntry{Downstream analysis: Overview} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- # Introduction In the MOFA2 R package we provide a wide range of downstream analysis to visualise and interpret the model output. Here we provide a brief description of the main functionalities. This vignette is made of simulated data and we do not highlight biologically relevant results. Please see our [tutorials](https://biofam.github.io/MOFA2/tutorials.html) for real use cases. # Load libraries ```{r, message=FALSE} library(ggplot2) library(MOFA2) ``` # Load trained model ```{r } filepath <- system.file("extdata", "model.hdf5", package = "MOFA2") model <- load_model(filepath) ``` ## Overview of data The function `plot_data_overview` can be used to obtain an overview of the input data. It shows how many views (rows) and how many groups (columns) exist, what are their corresponding dimensionalities and how many missing information they have (grey bars). ```{r} plot_data_overview(model) ``` # Add metadata to the model The metadata is stored as a data.frame object in `model@samples_metadata`, and it requires at least the column `sample`. The column `group` is required only if you are doing multi-group inference. The number of rows must match the total number of samples in the model (`sum(model@dimensions$N)`). Let's add some artificial metadata... ```{r } Nsamples = sum(get_dimensions(model)[["N"]]) sample_metadata <- data.frame( sample = samples_names(model)[[1]], condition = sample(c("A","B"), size = Nsamples, replace = TRUE), age = sample(1:100, size = Nsamples, replace = TRUE) ) samples_metadata(model) <- sample_metadata head(samples_metadata(model), n=3) ``` # Variance decomposition The first step in the MOFA analysis is to quantify the amount of variance explained ($R^2$) by each factor in each data modality. The variance explained estimates are stored in the hdf5 file and loaded in `model@cache[["variance_explained"]]`: ```{r } # Total variance explained per view head(get_variance_explained(model)$r2_total[[1]]) # Variance explained for every factor in per view head(get_variance_explained(model)$r2_per_factor[[1]]) ``` Variance explained estimates can be plotted using `plot_variance_explained(model, ...)`. Options: * **factors**: character vector with a factor name(s), or numeric vector with the index(es) of the factor(s). Default is "all". * **x**: character specifying the dimension for the x-axis ("view", "factor", or "group"). * **y**: character specifying the dimension for the y-axis ("view", "factor", or "group"). * **split_by**: character specifying the dimension to be faceted ("view", "factor", or "group"). * **plot_total**: logical value to indicate if to plot the total variance explained (for the variable in the x-axis) In this case we have 5 active factors that explain a large amount of variation in both data modalities. ```{r} plot_variance_explained(model, x="view", y="factor") ``` The model explains ~70% of the variance in both data modalities. ```{r} plot_variance_explained(model, x="view", y="factor", plot_total = TRUE)[[2]] ``` # Visualisation of Factors The MOFA factors capture the global sources of variability in the data. Mathematically, each factor ordinates cells along a one-dimensional axis centered at zero. The value per se is not important, only the relative positioning of samples matters. Samples with different signs manifest opposite “effects” along the inferred axis of variation, with higher absolute value indicating a stronger effect. Note that the interpretation of factors is analogous to the interpretation of the principal components in PCA. ## Visualisation of factors one at a time Factors can be plotted using `plot_factor` (for beeswarm plots of individual factors) or `plot_factors` (for scatter plots of factor combinations). ```{r } plot_factor(model, factor = 1:3, color_by = "age", shape_by = "condition" ) ``` Adding more options ```{r} p <- plot_factor(model, factors = c(1,2,3), color_by = "condition", dot_size = 3, # change dot size dodge = TRUE, # dodge points with different colors legend = FALSE, # remove legend add_violin = TRUE, # add violin plots, violin_alpha = 0.25 # transparency of violin plots ) # The output of plot_factor is a ggplot2 object that we can edit p <- p + scale_color_manual(values=c("A"="black", "B"="red")) + scale_fill_manual(values=c("A"="black", "B"="red")) print(p) ``` ## Visualisation of combinations of factors Scatter plots ```{r, message=FALSE} plot_factors(model, factors = 1:3, color_by = "condition" ) ``` # Visualisation of feature weights The weights provide a score for how strong each feature relates to each factor. Features with no association with the factor have values close to zero, while features with strong association with the factor have large absolute values. The sign of the weight indicates the direction of the effect: a positive weight indicates that the feature has higher levels in the cells with positive factor values, and vice versa. Weights can be plotted using `plot_weights` or `plot_top_weights` ```{r } plot_weights(model, view = "view_0", factor = 1, nfeatures = 10, # Number of features to highlight scale = TRUE, # Scale weights from -1 to 1 abs = FALSE # Take the absolute value? ) ``` ```{r } plot_top_weights(model, view = "view_0", factor = 1, nfeatures = 10 ) ``` # Visualisation of covariation patterns in the input data Instead of looking at weights, it is useful to observe the coordinated heterogeneity that MOFA captures in the original data. This can be done using the `plot_data_heatmap` and `plot_data_scatter` function. ## Heatmaps Heatmap of observations. Top features are selected by its weight in the selected factor. By default, samples are ordered according to their corresponding factor value. ```{r} plot_data_heatmap(model, view = "view_1", # view of interest factor = 1, # factor of interest features = 20, # number of features to plot (they are selected by weight) # extra arguments that are passed to the `pheatmap` function cluster_rows = TRUE, cluster_cols = FALSE, show_rownames = TRUE, show_colnames = FALSE ) ``` ## Scatter plots Scatter plots of observations vs factor values. It is useful to add a linear regression estimate to visualise if the relationship between (top) features and factor values is linear. ```{r} plot_data_scatter(model, view = "view_1", # view of interest factor = 1, # factor of interest features = 5, # number of features to plot (they are selected by weight) add_lm = TRUE, # add linear regression color_by = "condition" ) ``` ## Non-linear dimensionality reduction The MOFA factors are linear (as in Principal Component analysis), so each one captures limited amount of information, but they can be used as input to other methods that learn compact nonlinear manifolds, e.g. t-SNE or UMAP. Run UMAP or t-SNE ```{r } set.seed(42) model <- run_umap(model) model <- run_tsne(model) ``` Plot (nothing too interesting in this simulated data set) ```{r } plot_dimred(model, method = "TSNE", # method can be either "TSNE" or "UMAP" color_by = "condition", dot_size = 5 ) ``` # Other functionalities ## Renaming dimensions The user can rename the dimensions of the model ```{r} views_names(model) <- c("Transcriptomics", "Proteomics") factors_names(model) <- paste("Factor", 1:get_dimensions(model)$K, sep=" ") ``` ```{r} views_names(model) ``` ## Extracting data for downstream analysis The user can extract the feature weights, the data and the factors to generate their own plots. Extract factors ```{r} # "factors" is a list of matrices, one matrix per group with dimensions (nsamples, nfactors) factors <- get_factors(model, factors = "all") lapply(factors,dim) ``` Extract weights ```{r} # "weights" is a list of matrices, one matrix per view with dimensions (nfeatures, nfactors) weights <- get_weights(model, views = "all", factors = "all") lapply(weights,dim) ``` Extract data ```{r} # "data" is a nested list of matrices, one matrix per view and group with dimensions (nfeatures, nsamples) data <- get_data(model) lapply(data, function(x) lapply(x, dim))[[1]] ``` For convenience, the user can extract the data in long data.frame format: ```{r} factors <- get_factors(model, as.data.frame = TRUE) head(factors, n=3) ``` ```{r} weights <- get_weights(model, as.data.frame = TRUE) head(weights, n=3) ``` ```{r} data <- get_data(model, as.data.frame = TRUE) head(data, n=3) ```
**Session Info** ```{r} sessionInfo() ```
================================================ FILE: vignettes/getting_started_R.Rmd ================================================ --- title: "MOFA2: training a model in R" author: - name: "Ricard Argelaguet" affiliation: "European Bioinformatics Institute, Cambridge, UK" email: "ricard@ebi.ac.uk" - name: "Britta Velten" affiliation: "German Cancer Research Center, Heidelberg, Germany" email: "b.velten@dkfz-heidelberg.de" date: "`r Sys.Date()`" output: BiocStyle::html_document: toc_float: true vignette: > %\VignetteIndexEntry{MOFA2: How to train a model in R} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- This vignette contains a detailed tutorial on how to train a MOFA model using R. A concise template script can be found [here](https://github.com/bioFAM/MOFA2/blob/87e615bf0d49481821bd03cb32baa5d2e66ad6d8/inst/scripts/template_script.R). Many more examples on application of MOFA to various multi-omics data sets can be found [here](https://biofam.github.io/MOFA2/tutorials.html). # Load libraries ```{r message=FALSE} library(data.table) library(MOFA2) ``` # Is MOFA the right method for my data? MOFA (and factor analysis models in general) are useful to uncover variation in complex data sets that contain multiple sources of heterogeneity. This requires a **relatively large sample size (at least ~15 samples)**. In addition, MOFA needs the multi-modal measurements to be derived **from the same samples**. It is fine if you have samples that are missing some data modality, but there has to be a significant degree of matched measurements. # Preprocessing the data ## Normalisation Proper normalisation of the data is critical. **The model can handle three types of data**: continuous (modelled with a gaussian likelihood), small counts (modelled with a Poisson likelihood) and binary measurements (modelled with a bernoulli likelihood). Non-gaussian likelihoods give non-optimal results, we recommend the user to apply data transformations to obtain continuous measurements. For example, for count-based data such as RNA-seq or ATAC-seq we recommend size factor normalisation + variance stabilisation (i.e. a log transformation). ## Feature selection It is strongly recommended that you select **highly variable features (HVGs) per assay** before fitting the model. This ensures a faster training and a more robust inference procedure. Also, for data modalities that have very different dimensionalities we suggest a stronger feature selection fort he bigger views, with the aim of reducing the feature imbalance between data modalities. # Create the MOFA object To create a MOFA object you need to specify three dimensions: samples, features and view(s). Optionally, a group can also be specified for each sample (no group structure by default). MOFA objects can be created from a wide range of input formats, including: - **a list of matrices**: this is recommended for relatively simple data. - **a long data.frame**: this is recommended for complex data sets with multiple views and/or groups. - **MultiAssayExperiment**: to connect with Bioconductor objects. - **Seurat**: for single-cell genomics users. See [this vignette](https://raw.githack.com/bioFAM/MOFA2/master/MOFA2/vignettes/scRNA_gastrulation.html) ## List of matrices A list of matrices, where each entry corresponds to one view. Samples are stored in columns and features in rows. Let's simulate some data to start with ```{r} data <- make_example_data( n_views = 2, n_samples = 200, n_features = 1000, n_factors = 10 )[[1]] lapply(data,dim) ``` Create the MOFA object: ```{r message=FALSE} MOFAobject <- create_mofa(data) ``` Plot the data overview ```{r} plot_data_overview(MOFAobject) ``` In case you are using the multi-group functionality, the groups can be specified using the `groups` argument as a vector with the group ID for each sample. Keep in mind that the multi-group functionality is a rather advanced option that we discourage for beginners. For more details on how the multi-group inference works, read the [FAQ section](https://biofam.github.io/MOFA2/faq.html) and [check this vignette](https://raw.githack.com/bioFAM/MOFA2/master/MOFA2/vignettes/scRNA_gastrulation.html). ```{r message=FALSE} N = ncol(data[[1]]) groups = c(rep("A",N/2), rep("B",N/2)) MOFAobject <- create_mofa(data, groups=groups) ``` Plot the data overview ```{r} plot_data_overview(MOFAobject) ``` ## Long data.frame A long data.frame with columns `sample`, `feature`, `view`, `group` (optional), `value` might be the best format for complex data sets with multiple omics and potentially multiple groups of data. Also, there is no need to add rows that correspond to missing data: ```{r } filepath <- system.file("extdata", "test_data.RData", package = "MOFA2") load(filepath) head(dt) ``` Create the MOFA object ```{r } MOFAobject <- create_mofa(dt) print(MOFAobject) ``` Plot data overview ```{r out.width = "80%"} plot_data_overview(MOFAobject) ``` # Define options ## Define data options - **scale_groups**: if groups have different ranges/variances, it is good practice to scale each group to unit variance. Default is `FALSE` - **scale_views**: if views have different ranges/variances, it is good practice to scale each view to unit variance. Default is `FALSE` ```{r } data_opts <- get_default_data_options(MOFAobject) head(data_opts) ``` ## Define model options - **num_factors**: number of factors - **likelihoods**: likelihood per view (options are "gaussian", "poisson", "bernoulli"). Default is "gaussian". - **spikeslab_factors**: use spike-slab sparsity prior in the factors? Default is `FALSE`. - **spikeslab_weights**: use spike-slab sparsity prior in the weights? Default is `TRUE`. - **ard_factors**: use ARD prior in the factors? Default is `TRUE` if using multiple groups. - **ard_weights**: use ARD prior in the weights? Default is `TRUE` if using multiple views. Only change the default model options if you are familiar with the underlying mathematical model. ```{r } model_opts <- get_default_model_options(MOFAobject) model_opts$num_factors <- 10 head(model_opts) ``` ## Define training options - **maxiter**: number of iterations. Default is 1000. - **convergence_mode**: "fast" (default), "medium", "slow". For exploration, the fast mode is sufficient. For a final model, consider using medium" or even "slow", but hopefully results should not change much. - **gpu_mode**: use GPU mode? (needs [cupy](https://cupy.chainer.org/) installed and a functional GPU). - **verbose**: verbose mode? ```{r } train_opts <- get_default_training_options(MOFAobject) head(train_opts) ``` # Build and train the MOFA object Prepare the MOFA object ```{r message=FALSE} MOFAobject <- prepare_mofa( object = MOFAobject, data_options = data_opts, model_options = model_opts, training_options = train_opts ) ``` Train the MOFA model. Remember that in this step the `MOFA2` R package connets with the `mofapy2` Python package using `reticulate`. This is the source of most problems when running MOFA. See our [FAQ section](https://biofam.github.io/MOFA2/faq.html) if you have issues. The output is saved in the file specified as `outfile`. If none is specified, the output is saved in a temporary location. ```{r} outfile = file.path(tempdir(),"model.hdf5") MOFAobject.trained <- run_mofa(MOFAobject, outfile, use_basilisk=TRUE) ``` If everything is successful, you should observe an output analogous to the following: ``` ###################################### ## Training the model with seed 1 ## ###################################### Iteration 1: time=0.03, ELBO=-52650.68, deltaELBO=837116.802 (94.082647669%), Factors=10 (...) Iteration 9: time=0.04, ELBO=-50114.43, deltaELBO=23.907 (0.002686924%), Factors=10 ####################### ## Training finished ## ####################### Saving model in `/var/folders/.../model.hdf5...`r outfile`. ``` # Downstream analysis This finishes the tutorial on how to train a MOFA object from R. To continue with the downstream analysis, follow [this tutorial](https://raw.githack.com/bioFAM/MOFA2_tutorials/master/R_tutorials/downstream_analysis.html)
**Session Info** ```{r} sessionInfo() ```